001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.image.annotation.evaluation.datasets; 034 035import java.io.BufferedReader; 036import java.io.File; 037import java.io.FileNotFoundException; 038import java.io.FileReader; 039import java.io.IOException; 040import java.net.MalformedURLException; 041import java.net.URL; 042import java.util.ArrayList; 043import java.util.Arrays; 044import java.util.HashMap; 045import java.util.List; 046import java.util.Map; 047 048import org.openimaj.citation.annotation.Reference; 049import org.openimaj.citation.annotation.ReferenceType; 050import org.openimaj.data.dataset.GroupedDataset; 051import org.openimaj.data.dataset.ListBackedDataset; 052import org.openimaj.data.dataset.ListDataset; 053import org.openimaj.data.dataset.MapBackedDataset; 054import org.openimaj.experiment.annotations.DatasetDescription; 055import org.openimaj.experiment.evaluation.agreement.CohensKappaInterraterAgreement; 056import org.openimaj.experiment.evaluation.agreement.MajorityVoting; 057import org.openimaj.ml.annotation.ScoredAnnotation; 058import org.openimaj.util.iterator.TextLineIterable; 059import org.openimaj.util.pair.ObjectFloatPair; 060import org.openimaj.web.flickr.FlickrImage; 061 062/** 063 * A wrapper dataset for the MMSys2013 Fashion-Focussed Creative Commons social 064 * dataset (Loni, et.al). 065 * 066 * TODO: Need to add the citation here. From 067 * http://dl.acm.org/citation.cfm?id=2483984 068 * 069 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 070 * @created 12 Aug 2013 071 * @version $Author$, $Revision$, $Date$ 072 */ 073@Reference( 074 type = ReferenceType.Inproceedings, 075 author = { "Loni, Babak", "Menendez, Maria", "Georgescu, Mihai", "Galli, Luca", "Massari, Claudio", "Altingovde, Ismail Sengor", "Martinenghi, Davide", "Melenhorst, Mark", "Vliegendhart, Raynor", "Larson, Martha" }, 076 title = "Fashion-focused creative commons social dataset", 077 year = "2013", 078 booktitle = "Proceedings of the 4th ACM Multimedia Systems Conference", 079 pages = { "72", "", "77" }, 080 url = "http://doi.acm.org/10.1145/2483977.2483984", 081 publisher = "ACM", 082 series = "MMSys '13", 083 customData = { 084 "isbn", "978-1-4503-1894-5", 085 "location", "Oslo, Norway", 086 "numpages", "6", 087 "doi", "10.1145/2483977.2483984", 088 "acmid", "2483984", 089 "address", "New York, NY, USA", 090 "keywords", "crowdsourcing, dataset, fashion, multimedia content analysis" 091 }) 092@DatasetDescription( 093 name = "Fashion-Focused Creative Commons Social Dataset", 094 description = "a fashion-focused Creative Commons dataset, which is " 095 + "designed to contain a mix of general images as well as a large " 096 + "component of images that are focused on fashion (i.e., relevant " 097 + "to particular clothing items or fashion accessories)", 098 creator = "Babak Loni, Maria Menendez, Mihai Georgescu, Luca Galli, " 099 + "Claudio Massari, Ismail Sengor Altingovde, Davide Martinenghi, " 100 + "Mark Melenhorst, Raynor Vliegendhart, Martha Larson", 101 downloadUrls = { 102 "http://skuld.cs.umass.edu/traces/mmsys/2013/fashion/Fashion Dataset.zip" }) 103public class MMSys2013 104{ 105 /** 106 * Allowable types of answer for each question. 107 * 108 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 109 * @created 12 Aug 2013 110 * @version $Author$, $Revision$, $Date$ 111 */ 112 public static enum QuestionResponse 113 { 114 /** No */ 115 NO, 116 /** Yes */ 117 YES, 118 /** Not sure */ 119 NOT_SURE, 120 /** Question was unanswered */ 121 UNANSWERED; 122 } 123 124 /** 125 * A response to a HIT 126 * 127 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 128 * @created 12 Aug 2013 129 * @version $Author$, $Revision$, $Date$ 130 */ 131 public static class Response 132 { 133 /** Whether the image contains a depicition of the category subject */ 134 public QuestionResponse containsCategoryDepiction; 135 136 /** Whether the image is in the correct category */ 137 public QuestionResponse isInCorrectCategory; 138 139 /** How familiar is the responder with the category */ 140 public int familiarityWithCategory; 141 142 /** 143 * Constructor 144 * 145 * @param r1 146 * contains category depiction 147 * @param r2 148 * is in correct category 149 * @param familiarity 150 * familiarity with subject 151 */ 152 public Response(final QuestionResponse r1, final QuestionResponse r2, final int familiarity) 153 { 154 this.containsCategoryDepiction = r1; 155 this.isInCorrectCategory = r2; 156 this.familiarityWithCategory = familiarity; 157 } 158 159 @Override 160 public String toString() 161 { 162 return "{" + this.containsCategoryDepiction + "," + 163 this.isInCorrectCategory + "," + this.familiarityWithCategory + "}"; 164 } 165 } 166 167 /** 168 * A record in the Fashion 10,000 dataset. 169 * 170 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 171 * @created 12 Aug 2013 172 * @version $Author$, $Revision$, $Date$ 173 */ 174 protected static class Record 175 { 176 /** The Flickr Photo */ 177 public FlickrImage image; 178 179 /** The category in which the image was found */ 180 public String category; 181 182 /** A set of responses for this image */ 183 public Response[] annotations; 184 185 @Override 186 public String toString() 187 { 188 return this.image.getId() + ":" + this.category + "[" + 189 Arrays.toString(this.annotations) + "]"; 190 } 191 } 192 193 protected String baseLocation = 194 "/data/degas/mediaeval/mediaeval-crowdsourcing/MMSys2013/"; 195 196 protected String expertDataFile = 197 "Annotations/Annotation_PerImage_Trusted.csv"; 198 199 protected String nonExpertDataFile = 200 "Annotations/Annotation_PerImage_NonExperts.csv"; 201 202 protected String groundTruthFile = 203 "Annotations/GroundTruth.csv"; 204 205 protected String queriesFile = 206 "Metadata/queries.csv"; 207 208 /** 209 * Returns the ground truth set. 210 * 211 * @return The grouped dataset 212 */ 213 public GroupedDataset<String, GroupedDataset<String, ListDataset<Response>, Response>, Response> 214 getGroundTruth() 215 { 216 final GroupedDataset<String, GroupedDataset<String, ListDataset<Response>, Response>, Response> results = new MapBackedDataset<String, GroupedDataset< 217 String, ListDataset<Response>, Response>, MMSys2013.Response>(); 218 219 // The ground truth dataset doesn't contain categories, sadly - just 220 // the filename and the results. So we need to go and get the categories 221 // for each of the images first. We'll do that from the queries file. 222 final HashMap<Long, String> categoryCache = new HashMap<Long, String>(); 223 boolean firstLine = true; 224 for (final String line : new TextLineIterable(new File(this.baseLocation, this.queriesFile))) 225 { 226 if (!firstLine) 227 { 228 final String[] parts = line.split(",", -1); 229 230 // The substrings remove the quotes either side of the value 231 categoryCache.put( 232 Long.parseLong(parts[3].substring(1).substring(0, parts[3].length() - 2)), 233 parts[0].substring(1).substring(0, parts[0].length() - 2)); 234 } 235 236 firstLine = false; 237 } 238 239 firstLine = true; 240 for (final String line : new TextLineIterable(new File(this.baseLocation, this.groundTruthFile))) 241 { 242 if (!firstLine) 243 { 244 try 245 { 246 final String[] parts = line.split(",", -1); 247 248 // Get the category for the given image. 249 final String url = parts[0]; 250 final FlickrImage fi = FlickrImage.create(new URL(url)); 251 final String cat = categoryCache.get(fi.getId()); 252 253 // Get the category list 254 GroupedDataset<String, ListDataset<Response>, Response> gds = results.get(cat); 255 256 // Check whether we already have a dataset for 257 // the image in this category 258 if (gds == null) 259 { 260 // Create a new dataset for images in this category 261 gds = new MapBackedDataset<String, 262 ListDataset<Response>, Response>(); 263 results.put(cat, gds); 264 } 265 266 // See if we have any responses for this image already 267 ListDataset<Response> ids = gds.get(url); 268 269 // If not, create the dataset for this image 270 if (ids == null) 271 { 272 ids = new ListBackedDataset<Response>(); 273 gds.put(url, ids); 274 } 275 276 // Get the response for this image and add it 277 final Response rr = new Response( 278 this.parseQR(parts[1]), 279 this.parseQR(parts[2]), 1); 280 ids.add(rr); 281 } catch (final MalformedURLException e) 282 { 283 e.printStackTrace(); 284 } 285 } 286 287 firstLine = false; 288 } 289 290 return results; 291 } 292 293 /** 294 * Returns the results from the non-expert turkers. 295 * 296 * @return The grouped dataset 297 */ 298 public GroupedDataset<String, GroupedDataset<String, 299 ListDataset<Response>, Response>, Response> getNonExpertData() 300 { 301 return this.parseMetadata(new File(this.baseLocation, this.nonExpertDataFile)); 302 } 303 304 /** 305 * Returns the results from the expert turkers. 306 * 307 * @return The grouped dataset 308 */ 309 public GroupedDataset<String, GroupedDataset<String, 310 ListDataset<Response>, Response>, Response> getExpertData() 311 { 312 return this.parseMetadata(new File(this.baseLocation, this.expertDataFile)); 313 } 314 315 /** 316 * @param metadataFile 317 * @return A grouped dataset 318 */ 319 public GroupedDataset<String, GroupedDataset<String, 320 ListDataset<Response>, Response>, Response> parseMetadata( 321 final File metadataFile) 322 { 323 final GroupedDataset<String, GroupedDataset<String, ListDataset<Response>, Response>, Response> results = new MapBackedDataset<String, GroupedDataset< 324 String, ListDataset<Response>, Response>, MMSys2013.Response>(); 325 326 BufferedReader br = null; 327 try 328 { 329 br = new BufferedReader(new FileReader(metadataFile)); 330 String line; 331 boolean firstLine = true; 332 int count = 1; 333 while ((line = br.readLine()) != null) 334 { 335 if (!firstLine) 336 { 337 try 338 { 339 final String[] parts = line.split(",", -1); 340 341 final Response[] r = new Response[3]; 342 r[0] = new Response(this.parseQR(parts[3]), 343 this.parseQR(parts[6]), this.parseF(parts[9])); 344 r[1] = new Response(this.parseQR(parts[4]), 345 this.parseQR(parts[7]), this.parseF(parts[10])); 346 r[2] = new Response(this.parseQR(parts[5]), 347 this.parseQR(parts[8]), parts.length > 11 ? 348 this.parseF(parts[11]) : -1); 349 350 GroupedDataset<String, ListDataset<Response>, Response> gds = results.get(parts[2]); 351 352 // Check whether we already have a dataset for 353 // the image in this category 354 if (gds == null) 355 { 356 // Create a new dataset for images in this category 357 gds = new MapBackedDataset<String, 358 ListDataset<Response>, Response>(); 359 results.put(parts[2], gds); 360 } 361 362 // See if we have any responses for this image already 363 ListDataset<Response> ids = gds.get(parts[1]); 364 365 // If not, create the dataset for this image 366 if (ids == null) 367 { 368 ids = new ListBackedDataset<Response>(); 369 gds.put(parts[1], ids); 370 } 371 372 // Add the each response for this image 373 for (final Response rr : r) 374 ids.add(rr); 375 } catch (final Exception e) 376 { 377 System.err.println("Error on line " + count); 378 e.printStackTrace(); 379 } 380 } 381 firstLine = false; 382 count++; 383 } 384 br.close(); 385 } catch (final FileNotFoundException e) 386 { 387 e.printStackTrace(); 388 } catch (final IOException e) 389 { 390 e.printStackTrace(); 391 } finally 392 { 393 if (br != null) 394 try 395 { 396 br.close(); 397 } catch (final IOException e) 398 { 399 e.printStackTrace(); 400 } 401 } 402 403 return results; 404 } 405 406 /** 407 * Given a string returns a question response. 408 * 409 * @param qr 410 * The string 411 * @return A {@link QuestionResponse} 412 */ 413 protected QuestionResponse parseQR(final String qr) 414 { 415 if (qr.toLowerCase().equals("yes")) 416 return QuestionResponse.YES; 417 if (qr.toLowerCase().equals("no")) 418 return QuestionResponse.NO; 419 if (qr.toLowerCase().equals("notsure")) 420 return QuestionResponse.NOT_SURE; 421 return QuestionResponse.UNANSWERED; 422 } 423 424 protected int parseF(final String f) 425 { 426 try 427 { 428 return Integer.parseInt(f); 429 } catch (final NumberFormatException e) 430 { 431 return -1; 432 } 433 } 434 435 /** 436 * For a given {@link GroupedDataset} that represents the results from a 437 * single category, returns a list of scored annotations for each group, for 438 * question 1 (contains depication of category). 439 * 440 * @param data 441 * The data 442 * @return a list of {@link ScoredAnnotation} linked to image URL 443 */ 444 public static Map<String, List<ScoredAnnotation<QuestionResponse>>> 445 getAnnotationsQ1( 446 final GroupedDataset<String, ListDataset<Response>, Response> data) 447 { 448 final Map<String, List<ScoredAnnotation<QuestionResponse>>> r = 449 new HashMap<String, List<ScoredAnnotation<QuestionResponse>>>(); 450 451 // Loop through the images in this dataset 452 for (final String imgUrl : data.getGroups()) 453 { 454 final ListDataset<Response> l = data.get(imgUrl); 455 456 final List<ScoredAnnotation<QuestionResponse>> l2 = 457 new ArrayList<ScoredAnnotation<QuestionResponse>>(); 458 r.put(imgUrl, l2); 459 460 // Loop through the responses for this image 461 for (final Response rr : l) 462 l2.add(new ScoredAnnotation<QuestionResponse>( 463 rr.containsCategoryDepiction, rr.familiarityWithCategory)); 464 } 465 466 return r; 467 } 468 469 /** 470 * For a given {@link GroupedDataset} that represents the results from a 471 * single category, returns a list of scored annotations for each group, for 472 * question 2 (is in category). 473 * 474 * @param data 475 * The group name to retrieve 476 * @return a list of {@link ScoredAnnotation} linked to image URL 477 */ 478 public static Map<String, List<ScoredAnnotation<QuestionResponse>>> 479 getAnnotationsQ2( 480 final GroupedDataset<String, ListDataset<Response>, Response> data) 481 { 482 final Map<String, List<ScoredAnnotation<QuestionResponse>>> r = 483 new HashMap<String, List<ScoredAnnotation<QuestionResponse>>>(); 484 485 // Loop through the images in this dataset 486 for (final String imgUrl : data.getGroups()) 487 { 488 final ListDataset<Response> l = data.get(imgUrl); 489 490 final List<ScoredAnnotation<QuestionResponse>> l2 = 491 new ArrayList<ScoredAnnotation<QuestionResponse>>(); 492 r.put(imgUrl, l2); 493 494 // Loop through the responses for this image 495 for (final Response rr : l) 496 l2.add(new ScoredAnnotation<QuestionResponse>( 497 rr.isInCorrectCategory, rr.familiarityWithCategory)); 498 } 499 500 return r; 501 } 502 503 /** 504 * @param args 505 */ 506 public static void main(final String[] args) 507 { 508 System.out.println(); 509 510 // Expert annotations for Q1 and Q2 511 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q1r1 = 512 MMSys2013.getAnnotationsQ1(new MMSys2013().getExpertData().get("Cowboy hat")); 513 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q2r1 = 514 MMSys2013.getAnnotationsQ2(new MMSys2013().getExpertData().get("Cowboy hat")); 515 516 // Non expert annotations for Q1 and Q2 517 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q1r2 = 518 MMSys2013.getAnnotationsQ1(new MMSys2013().getNonExpertData().get("Cowboy hat")); 519 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q2r2 = 520 MMSys2013.getAnnotationsQ2(new MMSys2013().getNonExpertData().get("Cowboy hat")); 521 522 // Ground truth data for Q1 and Q2 523 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q1gt = 524 MMSys2013.getAnnotationsQ1(new MMSys2013().getGroundTruth().get("Cowboy hat")); 525 final Map<String, List<ScoredAnnotation<QuestionResponse>>> q2gt = 526 MMSys2013.getAnnotationsQ2(new MMSys2013().getGroundTruth().get("Cowboy hat")); 527 528 // Majority voting on the data sets 529 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q1r1mv = 530 MajorityVoting.calculateBasicMajorityVote(q1r1); 531 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q2r1mv = 532 MajorityVoting.calculateBasicMajorityVote(q2r1); 533 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q1r2mv = 534 MajorityVoting.calculateBasicMajorityVote(q1r2); 535 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q2r2mv = 536 MajorityVoting.calculateBasicMajorityVote(q2r2); 537 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q1gtmv = 538 MajorityVoting.calculateBasicMajorityVote(q1gt); 539 final Map<String, ObjectFloatPair<ScoredAnnotation<QuestionResponse>>> q2gtmv = 540 MajorityVoting.calculateBasicMajorityVote(q2gt); 541 542 // Agreement output 543 System.out.println("Question 1 agreement between raters 1 and 2: " + 544 CohensKappaInterraterAgreement.calculate(q1r1mv, q1r2mv)); 545 System.out.println("Question 1 agreement between rater 1 and GT: " + 546 CohensKappaInterraterAgreement.calculate(q1r1mv, q1gtmv)); 547 System.out.println("Question 1 agreement between rater 2 and GT: " + 548 CohensKappaInterraterAgreement.calculate(q1r2mv, q1gtmv)); 549 550 System.out.println("Question 2 agreement between raters 1 and 2: " + 551 CohensKappaInterraterAgreement.calculate(q2r1mv, q2r2mv)); 552 System.out.println("Question 2 agreement between rater 1 and GT: " + 553 CohensKappaInterraterAgreement.calculate(q2r1mv, q2gtmv)); 554 System.out.println("Question 2 agreement between rater 2 and GT: " + 555 CohensKappaInterraterAgreement.calculate(q2r2mv, q2gtmv)); 556 } 557}