001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.demos.sandbox.video.gt; 034 035import java.awt.Color; 036import java.awt.Dimension; 037import java.awt.GridBagConstraints; 038import java.awt.GridBagLayout; 039import java.awt.GridLayout; 040import java.awt.event.ActionEvent; 041import java.awt.event.ActionListener; 042import java.io.File; 043import java.util.ArrayList; 044import java.util.Collections; 045import java.util.HashMap; 046import java.util.Iterator; 047import java.util.List; 048import java.util.Map; 049 050import javax.swing.AbstractButton; 051import javax.swing.ImageIcon; 052import javax.swing.JButton; 053import javax.swing.JFrame; 054import javax.swing.JPanel; 055import javax.swing.JToggleButton; 056 057import org.openimaj.audio.AudioStream; 058import org.openimaj.data.identity.Identifiable; 059import org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.IdentifierProducer; 060import org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.StateProvider; 061import org.openimaj.image.ImageUtilities; 062import org.openimaj.image.MBFImage; 063import org.openimaj.image.processing.face.tracking.clm.CLMFaceTracker; 064import org.openimaj.image.processing.face.tracking.clm.MultiTracker.TrackedFace; 065import org.openimaj.video.Video; 066import org.openimaj.video.VideoDisplay; 067import org.openimaj.video.VideoDisplayListener; 068import org.openimaj.video.processing.shotdetector.HistogramVideoShotDetector; 069import org.openimaj.video.xuggle.XuggleAudio; 070import org.openimaj.video.xuggle.XuggleVideo; 071 072/** 073 * This tool provides a tool for ground-truthing video data based around people. 074 * The tool implements a VideoPlayer which does face tracking and face 075 * extraction. The detected faces are displayed in a window alongside a 076 * classification chooser. When a classification is chosen it is remembered 077 * along with the timecode at which the classification was chosen. This is shown 078 * in a list which can be edited. 079 * 080 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 081 * @created 10 Aug 2012 082 * @version $Author$, $Revision$, $Date$ 083 */ 084public class SpeakerDiarizationTool extends JPanel implements 085 StateProvider, IdentifierProducer 086{ 087 /** 088 * A class that provides a display of the information that the tracker is 089 * tracking. 090 * 091 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 092 * @created 17 Jul 2012 093 * @version $Author$, $Revision$, $Date$ 094 */ 095 protected class TrackerInfo extends JPanel 096 { 097 /** */ 098 private static final long serialVersionUID = 1L; 099 100 /** The list of faces being tracked */ 101 private final JPanel faceList = new JPanel(); 102 103 /** Map */ 104 private final Map<TrackedFace, AbstractButton> map = 105 new HashMap<TrackedFace, AbstractButton>(); 106 107 /** Only allow one face to be tracked */ 108 private final ButtonGroup faceGroup = new ButtonGroup(); 109 110 /** 111 * Default constructor 112 */ 113 public TrackerInfo() { 114 super.setLayout(new GridBagLayout()); 115 super.setPreferredSize(new Dimension(600, 300)); 116 super.setSize(600, 300); 117 this.init(); 118 } 119 120 /** 121 * Initialises the widgets. 122 */ 123 private void init() { 124 final GridBagConstraints gbc = new GridBagConstraints(); 125 gbc.gridx = gbc.gridy = 1; 126 gbc.weightx = gbc.weighty = 1; 127 gbc.fill = GridBagConstraints.BOTH; 128 129 // Add the list of faces 130 this.faceList.setLayout(new GridLayout(-1, 1)); 131 this.faceList.setBackground(Color.black); 132 this.add(this.faceList, gbc); 133 134 // Add a button to force redetection 135 final JButton b = new JButton("Force Redetection"); 136 b.addActionListener(new ActionListener() 137 { 138 @Override 139 public void actionPerformed(final ActionEvent e) 140 { 141 SpeakerDiarizationTool.this.needsRedetect = true; 142 } 143 }); 144 gbc.gridy++; 145 gbc.weighty = 0; 146 this.add(b, gbc); 147 } 148 149 /** 150 * Set the list of faces being tracked. 151 * 152 * @param faces 153 * The face list 154 */ 155 public void setFaceList(final List<SortableTrackedFace> faces) { 156 final ArrayList<TrackedFace> toRemove = new ArrayList<TrackedFace>(); 157 toRemove.addAll(this.map.keySet()); 158 159 // Add new faces 160 for (final SortableTrackedFace face : faces) { 161 if (!this.map.keySet().contains(face)) { 162 // Add the face to the list as a toggle button 163 final JToggleButton b = new JToggleButton(face.toString(), 164 new ImageIcon(ImageUtilities.createBufferedImage( 165 face.face.templateImage))); 166 167 // Store the map from the face to the button 168 this.map.put(face.face, b); 169 170 // Add the button to the panel 171 this.faceGroup.add(b); 172 this.faceList.add(b); 173 this.faceList.revalidate(); 174 } 175 176 // Either the face is new or it's existing, so we 177 // don't want to remove it - so we remove it from the 178 // 'to remove' list 179 toRemove.remove(face); 180 } 181 182 // Remove all the faces that have disappeared. 183 for (final TrackedFace face : toRemove) { 184 this.faceList.remove(this.map.get(face)); 185 this.faceGroup.remove(this.map.get(face)); 186 this.map.remove(face); 187 } 188 189 // If nothing's selected, select the first one. 190 if (this.faceGroup.getSelected() == null && this.map.keySet().size() > 0) 191 this.faceGroup.setSelected(this.map.values().iterator().next()); 192 } 193 194 /** 195 * Returns the face that is selected. 196 * 197 * @return The selected face 198 */ 199 public TrackedFace getSelectedFace() { 200 final Iterator<TrackedFace> faces = this.map.keySet().iterator(); 201 TrackedFace f = null; 202 while (faces.hasNext()) 203 if (this.map.get(f = faces.next()) == this.faceGroup.getSelected()) 204 return f; 205 return null; 206 } 207 } 208 209 /** 210 * Provides a comparable interface for tracked faces such that they can be 211 * sorted in a left-to-right order over the frame. 212 * 213 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 214 * @created 28 Aug 2012 215 * @version $Author$, $Revision$, $Date$ 216 */ 217 private class SortableTrackedFace 218 implements Comparable<SortableTrackedFace>, Identifiable 219 { 220 /** The tracked face */ 221 public TrackedFace face = null; 222 223 /** The identifier of the face */ 224 public String identifier = ""; 225 226 /** 227 * Construct a sortable face 228 * 229 * @param f 230 * The face to wrap 231 */ 232 public SortableTrackedFace(final TrackedFace f) 233 { 234 this.face = f; 235 } 236 237 /** 238 * {@inheritDoc} 239 * 240 * @see java.lang.Comparable#compareTo(java.lang.Object) 241 */ 242 @Override 243 public int compareTo(final SortableTrackedFace o) 244 { 245 // (4,0) in the global matrix is the x-translation. 246 return o.face.clm._pglobl.get(4, 0) < this.face.clm._pglobl.get(4, 0) ? 1 : 0; 247 } 248 249 /** 250 * {@inheritDoc} 251 * 252 * @see org.openimaj.data.identity.Identifiable#getID() 253 */ 254 @Override 255 public String getID() 256 { 257 return this.identifier; 258 } 259 260 /** 261 * {@inheritDoc} 262 * 263 * @see java.lang.Object#toString() 264 */ 265 @Override 266 public String toString() 267 { 268 return this.identifier; 269 } 270 } 271 272 /** */ 273 private static final long serialVersionUID = 1L; 274 275 /** The ground truth tool */ 276 private VideoGroundTruth vgt = null; 277 278 /** The face tracker we'll use in the video */ 279 private final CLMFaceTracker tracker = new CLMFaceTracker(); 280 281 /** The shot detector used to determine scenes within the video */ 282 private HistogramVideoShotDetector shotDetector = null; 283 284 /** The frame in which the tool will be displayed */ 285 private JFrame frame = null; 286 287 /** The scene counter - used in creating unique identifiers */ 288 private int scene = 0; 289 290 /** The faces we're currently tracking */ 291 private ArrayList<SortableTrackedFace> sortedFaces = null; 292 293 /** Whether the tracker needs to do a redetect before the next track */ 294 private boolean needsRedetect = false; 295 296 private TrackerInfo trackerInfo; 297 298 /** 299 * Initiate the speaker diarization tool. 300 * 301 * @param video 302 * The video to diarize 303 * @param audio 304 * The audio to play (can be null) 305 */ 306 public SpeakerDiarizationTool(final Video<MBFImage> video, final AudioStream audio) 307 { 308 this.vgt = new VideoGroundTruth(video, audio, this, this); 309 this.shotDetector = new HistogramVideoShotDetector(video); 310 311 this.init(); 312 } 313 314 /** 315 * Initialises the GUI widgets 316 */ 317 private void init() 318 { 319 this.setLayout(new GridBagLayout()); 320 321 final GridBagConstraints gbc = new GridBagConstraints(); 322 gbc.gridx = gbc.gridy = 1; 323 gbc.weightx = gbc.weighty = 1; 324 gbc.fill = GridBagConstraints.BOTH; 325 326 this.trackerInfo = new TrackerInfo(); 327 this.add(this.trackerInfo, gbc); 328 329 // Set up the video player. 330 this.vgt.getVideoPlayer().setButtons(new String[] { "play", "pause" }); 331 this.vgt.getVideoPlayer().pause(); 332 this.vgt.getVideoPlayer().addVideoListener(new VideoDisplayListener<MBFImage>() 333 { 334 @Override 335 public void beforeUpdate(final MBFImage frame) 336 { 337 SpeakerDiarizationTool.this.processFrame(frame); 338 } 339 340 @Override 341 public void afterUpdate(final VideoDisplay<MBFImage> display) 342 { 343 } 344 }); 345 346 // Show the video player 347 final JFrame f = this.vgt.getVideoPlayer().showFrame(); 348 349 // Show the tool 350 this.showFrame().setLocation(f.getLocation().x + f.getWidth(), 351 f.getLocation().y); 352 } 353 354 /** 355 * This is the method that actually does most of the work. 356 * 357 * @param frame 358 * The frame to process 359 */ 360 private void processFrame(final MBFImage frame) 361 { 362 // Pass the frame to our shot detector to see if the shot has changed. 363 this.shotDetector.processFrame(frame); 364 365 // If we're in to a new scene, we update the scene counter 366 if (this.shotDetector.wasLastFrameBoundary() || this.needsRedetect) 367 { 368 if (!this.needsRedetect) 369 this.scene++; 370 371 System.out.println("=========== Scene " + this.scene + " ==========="); 372 373 // Now try to find faces in the image 374 this.tracker.reset(); 375 this.tracker.track(frame); 376 377 // Get a list of the faces being tracked 378 final List<TrackedFace> faces = this.tracker.getModelTracker().trackedFaces; 379 380 // Created a sorted list of faces (left-to-right in the image) 381 this.sortedFaces = new ArrayList<SortableTrackedFace>(); 382 for (int i = 0; i < faces.size(); i++) 383 this.sortedFaces.add(new SortableTrackedFace(faces.get(i))); 384 Collections.sort(this.sortedFaces); 385 386 // Update the identifiers based on the position in the scene 387 for (int i = 0; i < this.sortedFaces.size(); i++) 388 this.sortedFaces.get(i).identifier = "Scene " + this.scene + " Face " + i; 389 390 System.out.println(this.sortedFaces); 391 392 this.trackerInfo.setFaceList(this.sortedFaces); 393 } 394 else 395 // Continue to track the faces we already have. 396 this.tracker.track(frame); 397 398 // Draw the tracked model onto the frame 399 this.tracker.drawModel(frame, true, true, true, true, true); 400 } 401 402 /** 403 * Shows the tool in a frame. If a frame already exists it will be made 404 * visible. 405 * 406 * @return Returns the frame shown 407 */ 408 public JFrame showFrame() 409 { 410 if (this.frame == null) 411 { 412 this.frame = new JFrame(); 413 this.frame.add(this); 414 this.frame.pack(); 415 } 416 417 this.frame.setVisible(true); 418 return this.frame; 419 } 420 421 /** 422 * {@inheritDoc} 423 * 424 * @see org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.IdentifierProducer#getIdentifiers() 425 */ 426 @Override 427 public List<Identifiable> getIdentifiers() 428 { 429 final List<Identifiable> l = new ArrayList<Identifiable>(); 430 l.addAll(this.sortedFaces); 431 return l; 432 } 433 434 /** 435 * {@inheritDoc} 436 * 437 * @see org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.StateProvider#getCurrentState(org.openimaj.data.identity.Identifiable) 438 */ 439 @Override 440 public List<String> getCurrentState(final Identifiable id) 441 { 442 return null; 443 } 444 445 /** 446 * @param args 447 */ 448 public static void main(final String[] args) 449 { 450 String name = "heads1.mpeg"; 451 if (args.length > 0) 452 name = args[0]; 453 454 final XuggleVideo xv = new XuggleVideo(new File(name)); 455 final XuggleAudio xa = new XuggleAudio(new File(name)); 456 new SpeakerDiarizationTool(xv, xa); 457 } 458}