001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.demos.sandbox.video.gt; 034 035import java.io.File; 036import java.io.FileNotFoundException; 037import java.io.FileWriter; 038import java.io.IOException; 039import java.util.ArrayList; 040import java.util.Arrays; 041import java.util.List; 042import java.util.Scanner; 043import java.util.regex.Matcher; 044import java.util.regex.Pattern; 045 046import org.apache.commons.io.FileExistsException; 047import org.openimaj.audio.AudioStream; 048import org.openimaj.data.dataset.ListBackedDataset; 049import org.openimaj.data.identity.Identifiable; 050import org.openimaj.image.MBFImage; 051import org.openimaj.video.Video; 052import org.openimaj.video.VideoDisplay.EndAction; 053import org.openimaj.video.VideoFrame; 054import org.openimaj.video.VideoPlayer; 055import org.openimaj.video.timecode.HrsMinSecFrameTimecode; 056import org.openimaj.video.timecode.VideoTimecode; 057 058/** 059 * A video player that allows the recording of timestamps for creating a 060 * ground-truth. The class uses an {@link IdentifierProducer} to tag objects 061 * within the scene. Each {@link Identifiable} that the producer produces has a 062 * set of states (annotations) which are given by the {@link StateProvider}. The 063 * tool outputs a list of annotations each linked to a list of frames ( 064 * {@link IdentifiableVideoFrame}s) each of which contains a list of 065 * {@link Identifiable}s whose state changed on that frame. For example, 066 * <p> 067 * 068 * <pre> 069 * <code> 070 * 00:00:01:23 Scene 1 - Face 1 SPEAKING 071 * 00:00:04:12 Scene 1 - Face 1 NOT_SPEAKING 072 * 00:00:06:05 Scene 1 - Face 2 SPEAKING 073 * 00:00:10:01 Scene 1 - Face 2 NOT_SPEAKING 074 * 00:00:11:15 Scene 2 - Face 1 SPEAKING 075 * 00:00:14:00 Scene 2 - Face 1 NOT_SPEAKING 076 * </code> 077 * </pre> 078 * 079 * In this example there are two faces identified (within the first scene). Face 080 * 1 speaks between 00:00:01:23 (1 second 23 frames) and 00:00:04:12. Face 2 081 * speaks between 00:00:06:05 and 00:00:10:01. The first face in another scene 082 * begins speaking again at 00:00:11:15 until 00:00:14:00. Note that Face 1 in 083 * Scenes 1 and 2 may be different people (they may not too). The identifier 084 * provider does all it can in the situation to provide unique identifiers. The 085 * meaning of Face 1 and 2 depends on the implementation of the identifier 086 * provider. SPEAKING and NOT_SPEAKING is given by the {@link StateProvider} and 087 * the timecodes are given by the {@link IdentifiableVideoFrame} (created by 088 * default). 089 * <p> 090 * Annotations can also include end times. So the above could be represented as: 091 * <p> 092 * 093 * <pre> 094 * <code> 095 * 00:00:01:23-00:00:04:11 Scene 1 - Face 1 SPEAKING 096 * 00:00:04:12-00:00:06:04 Scene 1 - Face 1 NOT_SPEAKING 097 * etc. 098 * </code> 099 * </pre> 100 * <p> 101 * The user interface provided is based on a {@link VideoPlayer}. The 102 * {@link StateProvider} can provide its own user interface for selecting the 103 * current state, or it can provide a key listener which will be automatically 104 * added to the video player controls. The user should be in the state provider 105 * loop. 106 * <p> 107 * The object for annotating is selected by the user of this class. Also, the 108 * means for updating the state of any selected identifiable is also determined 109 * by the user of this class. When the state of an identifiable changes, the 110 * {@link #updateIdentifiable(Identifiable)} method should be called, which will 111 * query the state provider for the current state of the identifiable. This will 112 * then be added to the dataset for the current video timecode. 113 * <p> 114 * The class provides a {@link #writeDataset(File)} method for writing the 115 * generated dataset to a text file. The file is formatted by the 116 * {@link AnnotatedIdentifiable#toString()} method in the form: 117 * <p> 118 * 119 * <pre> 120 * <code> 121 * id@start>end:[comma-separated-tags] 122 * </code> 123 * </pre> 124 * 125 * This implies that the identifier cannot include @ symbols and that tags may 126 * not contain commas. The timecodes are {@link HrsMinSecFrameTimecode} objects 127 * represented as strings (Hrs:Mins:Secs:Frames). 128 * 129 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 130 * @created 10 Aug 2012 131 * @version $Author$, $Revision$, $Date$ 132 */ 133public class VideoGroundTruth 134{ 135 /** 136 * An interface for objects which produce identifiers to stamp. So, if there 137 * are multiple objects within a single video frame, the identifier producer 138 * can produce identifiers that allow them to be tagged. For example, if 139 * there are people in a video, they may be identified by their URI, or just 140 * by a number. The task of the identifier is to ensure that the 141 * identifiable is somehow tracked between frames. For example, if there 142 * were faces in a shot and the faces moved around, moved off screen, etc. 143 * the identifier producer should produce the same ID for each face within a 144 * given series of frames, at least as far as possible. 145 * <p> 146 * The {@link #getIdentifiers()} method should only return identifiers for 147 * objects that are in the current frame (not for the whole video). 148 * 149 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 150 * @created 10 Aug 2012 151 * @version $Author$, $Revision$, $Date$ 152 */ 153 public static interface IdentifierProducer 154 { 155 /** 156 * Returns the list of object identifiers that are being truthed in the 157 * current video frame. 158 * 159 * @return The identifiers 160 */ 161 public List<Identifiable> getIdentifiers(); 162 } 163 164 /** 165 * Provides states for identifiables. 166 * 167 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 168 * @created 28 Aug 2012 169 * @version $Author$, $Revision$, $Date$ 170 */ 171 public static interface StateProvider 172 { 173 /** 174 * Returns the current state for the given identifiable. 175 * 176 * @param id 177 * The identifiable 178 * @return The states 179 */ 180 public List<String> getCurrentState(Identifiable id); 181 } 182 183 /** 184 * An identifiable video frame that uses the timecode of the frame to 185 * identify the frame. The default implementation uses the timecode of the 186 * frame to identify the frame. 187 * 188 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 189 * @created 10 Aug 2012 190 * @version $Author$, $Revision$, $Date$ 191 */ 192 public static class IdentifiableVideoFrame extends VideoFrame<MBFImage> 193 implements Identifiable 194 { 195 /** 196 * @param frame 197 * The frame 198 * @param timecode 199 * The timecode 200 */ 201 public IdentifiableVideoFrame(final MBFImage frame, final VideoTimecode timecode) 202 { 203 super(frame, timecode); 204 } 205 206 /** 207 * 208 * @param frame 209 * The frame 210 */ 211 public IdentifiableVideoFrame(final VideoFrame<MBFImage> frame) 212 { 213 super(frame.frame, frame.timecode); 214 } 215 216 /** 217 * {@inheritDoc} 218 * 219 * @see org.openimaj.data.identity.Identifiable#getID() 220 */ 221 @Override 222 public String getID() 223 { 224 return "" + this.timecode.toString(); 225 } 226 227 @Override 228 public String toString() 229 { 230 return this.getID(); 231 } 232 } 233 234 /** 235 * An identifiable that has been annotated with states (tags) between 236 * specific times. If the end timestamp is null, then the start timestamp 237 * gives the only time at which the tags are valid. 238 * 239 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 240 * @created 28 Aug 2012 241 * @version $Author$, $Revision$, $Date$ 242 */ 243 public static class AnnotatedIdentifiable implements Identifiable 244 { 245 /** The timestamp at which the tags are valid for the identifiable */ 246 public VideoTimecode startTimestamp; 247 248 /** 249 * The timestamp at which the tags are no longer valid for the 250 * identifiable 251 */ 252 public VideoTimecode endTimestamp; 253 254 /** This identifiable */ 255 public Identifiable id; 256 257 /** Tags associated with this identifiable */ 258 public List<String> tags; 259 260 /** 261 * {@inheritDoc} 262 * 263 * @see org.openimaj.data.identity.Identifiable#getID() 264 */ 265 @Override 266 public String getID() 267 { 268 return this.id.getID(); 269 } 270 271 @Override 272 public String toString() 273 { 274 return this.id + "@" + this.startTimestamp + ">" + this.endTimestamp + ":" + this.tags; 275 } 276 } 277 278 /** The display */ 279 private VideoPlayer<MBFImage> display = null; 280 281 /** List of all the events */ 282 private ListBackedDataset<AnnotatedIdentifiable> dataset = null; 283 284 /** The video */ 285 private Video<MBFImage> video = null; 286 287 /** The state provider */ 288 private final StateProvider stateProvider; 289 290 /** 291 * Constructor that provides an identifier producer that returns the current 292 * frame of the video. 293 * 294 * @param video 295 * The video to ground truth 296 * @param sp 297 * The state provider 298 */ 299 public VideoGroundTruth(final Video<MBFImage> video, final StateProvider sp) 300 { 301 this(video, null, sp); 302 } 303 304 /** 305 * Constructor that provides an identifier producer that returns the current 306 * frame of the video; that is the state provider tags frames of video 307 * rather than objects within frames of video. 308 * 309 * @param video 310 * The video to ground truth 311 * @param audio 312 * The audio to play 313 * @param sp 314 * The state provider 315 */ 316 public VideoGroundTruth(final Video<MBFImage> video, final AudioStream audio, 317 final StateProvider sp) 318 { 319 // If no IdentifierProducer is given, we'll use one that 320 // simply returns a SMTPE-like video timecode. 321 this(video, audio, new IdentifierProducer() 322 { 323 @Override 324 public List<Identifiable> getIdentifiers() 325 { 326 final List<Identifiable> r = 327 new ArrayList<Identifiable>(); 328 r.add(new IdentifiableVideoFrame(video.getCurrentFrame(), 329 new HrsMinSecFrameTimecode(video.getCurrentFrameIndex(), 330 video.getFPS()))); 331 return r; 332 } 333 }, sp); 334 } 335 336 /** 337 * Constructor 338 * 339 * @param video 340 * The video to ground truth 341 * @param audio 342 * The audio to play 343 * @param idProd 344 * The identifier producer 345 * @param sp 346 * The state provider 347 */ 348 public VideoGroundTruth(final Video<MBFImage> video, final AudioStream audio, 349 final IdentifierProducer idProd, final StateProvider sp) 350 { 351 this.video = video; 352 this.stateProvider = sp; 353 354 // Create a video player (with navigation controls) 355 this.display = VideoPlayer.createVideoPlayer(video, audio); 356 this.display.setEndAction(EndAction.STOP_AT_END); 357 this.display.showFrame(); 358 359 this.dataset = new ListBackedDataset<VideoGroundTruth.AnnotatedIdentifiable>(); 360 } 361 362 /** 363 * Start the process. 364 */ 365 public void run() 366 { 367 this.display.run(); 368 } 369 370 /** 371 * Returns the video player component being used to play the video. 372 * 373 * @return the video player component. 374 */ 375 public VideoPlayer<MBFImage> getVideoPlayer() 376 { 377 return this.display; 378 } 379 380 /** 381 * Force the given identifiable to be updated in the dataset for the current 382 * time. 383 * 384 * @param i 385 * The identifiable 386 */ 387 public void updateIdentifiable(final Identifiable i) 388 { 389 final List<String> tags = this.stateProvider.getCurrentState(i); 390 391 if (tags == null) 392 return; 393 394 final AnnotatedIdentifiable ai = new AnnotatedIdentifiable(); 395 ai.id = i; 396 ai.startTimestamp = new HrsMinSecFrameTimecode( 397 this.video.getCurrentFrameIndex(), this.video.getFPS()); 398 ai.tags = new ArrayList<String>(tags); 399 400 this.addToDataset(ai); 401 } 402 403 /** 404 * Add an identifiable time region that will be annotated with the current 405 * state. 406 * 407 * @param i 408 * The identifiable 409 * @param start 410 * The start timestamp 411 * @param end 412 * The end timestamp 413 */ 414 public void updateIdentifiableRegion(final Identifiable i, 415 final VideoTimecode start, final VideoTimecode end) 416 { 417 final List<String> tags = this.stateProvider.getCurrentState(i); 418 419 if (tags == null) 420 return; 421 422 final AnnotatedIdentifiable ai = new AnnotatedIdentifiable(); 423 ai.id = i; 424 ai.startTimestamp = start; 425 ai.endTimestamp = end; 426 ai.tags = new ArrayList<String>(tags); 427 428 this.addToDataset(ai); 429 } 430 431 /** 432 * Add the given annotated identifiable to the dataset 433 * 434 * @param ai 435 * The annotated identifiable 436 */ 437 public void addToDataset(final AnnotatedIdentifiable ai) 438 { 439 this.dataset.add(ai); 440 System.out.println(this.dataset); 441 } 442 443 /** 444 * Writes the created dataset to the given file 445 * 446 * @param file 447 * The file to write the dataset to. 448 * @throws IOException 449 * If the file could not be written 450 */ 451 public void writeDataset(final File file) throws IOException 452 { 453 // Check if the file already exists 454 if (file.exists()) 455 throw new FileExistsException(file); 456 457 // Ensure that the directory exists for the file 458 if (!file.getParentFile().mkdirs()) 459 throw new IOException("Cannot create directory " + file.getParent()); 460 461 // Write all the annotated identifiers 462 final FileWriter fw = new FileWriter(file); 463 for (final AnnotatedIdentifiable ai : this.dataset) 464 fw.append(ai.toString() + "\n"); 465 fw.close(); 466 } 467 468 /** 469 * Loads a dataset created using {@link #writeDataset(File)} into this 470 * class's dataset member so that annotation can be continued. Note that the 471 * identifiables that are created when reading back in are anonymous 472 * identifiable classes and are not necessarily of the same type as the 473 * identifiable which was saved. It will only include the identifier of the 474 * object retrievable with {@link Identifiable#getID()}. 475 * 476 * @param file 477 * The file to read from 478 * @throws IOException 479 * If the file cannot be read 480 */ 481 public void loadDataset(final File file) throws IOException 482 { 483 if (!file.exists()) 484 throw new FileNotFoundException(file.getName()); 485 486 // Read the file, line-by-line 487 final Scanner scanner = new Scanner(file); 488 scanner.useDelimiter("\n"); 489 490 // Match each line against this pattern. This is the pattern 491 // that will match the AnnotatedIdentifiable#toString() method's output 492 final Pattern p = Pattern.compile("(.*)@(.*)>(.*):\\[(.*)\\]"); 493 494 // Read the file 495 while (scanner.hasNext()) 496 { 497 final String line = scanner.next(); 498 499 // Match the line against the pattern 500 final Matcher m = p.matcher(line); 501 if (m.find()) 502 { 503 // Get the parts of the line 504 final String id = m.group(0); 505 final String start = m.group(1); 506 final String end = m.group(2); 507 final String[] tags = m.group(3).split(","); 508 509 // Create a new annotated identifiable object 510 final AnnotatedIdentifiable ai = new AnnotatedIdentifiable(); 511 ai.id = new Identifiable() { 512 @Override 513 public String getID() { 514 return id; 515 } 516 }; 517 ai.tags = new ArrayList<String>(Arrays.asList(tags)); 518 ai.startTimestamp = HrsMinSecFrameTimecode.fromString(start); 519 ai.endTimestamp = HrsMinSecFrameTimecode.fromString(end); 520 521 // Add the annotated object to the dataset 522 this.dataset.add(ai); 523 } 524 } 525 } 526}