001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.demos.sandbox.video.gt;
034
035import java.awt.Color;
036import java.awt.Dimension;
037import java.awt.GridBagConstraints;
038import java.awt.GridBagLayout;
039import java.awt.GridLayout;
040import java.awt.event.ActionEvent;
041import java.awt.event.ActionListener;
042import java.io.File;
043import java.util.ArrayList;
044import java.util.Collections;
045import java.util.HashMap;
046import java.util.Iterator;
047import java.util.List;
048import java.util.Map;
049
050import javax.swing.AbstractButton;
051import javax.swing.ImageIcon;
052import javax.swing.JButton;
053import javax.swing.JFrame;
054import javax.swing.JPanel;
055import javax.swing.JToggleButton;
056
057import org.openimaj.audio.AudioStream;
058import org.openimaj.data.identity.Identifiable;
059import org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.IdentifierProducer;
060import org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.StateProvider;
061import org.openimaj.image.ImageUtilities;
062import org.openimaj.image.MBFImage;
063import org.openimaj.image.processing.face.tracking.clm.CLMFaceTracker;
064import org.openimaj.image.processing.face.tracking.clm.MultiTracker.TrackedFace;
065import org.openimaj.video.Video;
066import org.openimaj.video.VideoDisplay;
067import org.openimaj.video.VideoDisplayListener;
068import org.openimaj.video.processing.shotdetector.HistogramVideoShotDetector;
069import org.openimaj.video.xuggle.XuggleAudio;
070import org.openimaj.video.xuggle.XuggleVideo;
071
072/**
073 * This tool provides a tool for ground-truthing video data based around people.
074 * The tool implements a VideoPlayer which does face tracking and face
075 * extraction. The detected faces are displayed in a window alongside a
076 * classification chooser. When a classification is chosen it is remembered
077 * along with the timecode at which the classification was chosen. This is shown
078 * in a list which can be edited.
079 * 
080 * @author David Dupplaw (dpd@ecs.soton.ac.uk)
081 * @created 10 Aug 2012
082 * @version $Author$, $Revision$, $Date$
083 */
084public class SpeakerDiarizationTool extends JPanel implements
085                StateProvider, IdentifierProducer
086{
087        /**
088         * A class that provides a display of the information that the tracker is
089         * tracking.
090         * 
091         * @author David Dupplaw (dpd@ecs.soton.ac.uk)
092         * @created 17 Jul 2012
093         * @version $Author$, $Revision$, $Date$
094         */
095        protected class TrackerInfo extends JPanel
096        {
097                /** */
098                private static final long serialVersionUID = 1L;
099
100                /** The list of faces being tracked */
101                private final JPanel faceList = new JPanel();
102
103                /** Map */
104                private final Map<TrackedFace, AbstractButton> map =
105                                new HashMap<TrackedFace, AbstractButton>();
106
107                /** Only allow one face to be tracked */
108                private final ButtonGroup faceGroup = new ButtonGroup();
109
110                /**
111                 * Default constructor
112                 */
113                public TrackerInfo() {
114                        super.setLayout(new GridBagLayout());
115                        super.setPreferredSize(new Dimension(600, 300));
116                        super.setSize(600, 300);
117                        this.init();
118                }
119
120                /**
121                 * Initialises the widgets.
122                 */
123                private void init() {
124                        final GridBagConstraints gbc = new GridBagConstraints();
125                        gbc.gridx = gbc.gridy = 1;
126                        gbc.weightx = gbc.weighty = 1;
127                        gbc.fill = GridBagConstraints.BOTH;
128
129                        // Add the list of faces
130                        this.faceList.setLayout(new GridLayout(-1, 1));
131                        this.faceList.setBackground(Color.black);
132                        this.add(this.faceList, gbc);
133
134                        // Add a button to force redetection
135                        final JButton b = new JButton("Force Redetection");
136                        b.addActionListener(new ActionListener()
137                        {
138                                @Override
139                                public void actionPerformed(final ActionEvent e)
140                                {
141                                        SpeakerDiarizationTool.this.needsRedetect = true;
142                                }
143                        });
144                        gbc.gridy++;
145                        gbc.weighty = 0;
146                        this.add(b, gbc);
147                }
148
149                /**
150                 * Set the list of faces being tracked.
151                 * 
152                 * @param faces
153                 *            The face list
154                 */
155                public void setFaceList(final List<SortableTrackedFace> faces) {
156                        final ArrayList<TrackedFace> toRemove = new ArrayList<TrackedFace>();
157                        toRemove.addAll(this.map.keySet());
158
159                        // Add new faces
160                        for (final SortableTrackedFace face : faces) {
161                                if (!this.map.keySet().contains(face)) {
162                                        // Add the face to the list as a toggle button
163                                        final JToggleButton b = new JToggleButton(face.toString(),
164                                                        new ImageIcon(ImageUtilities.createBufferedImage(
165                                                                        face.face.templateImage)));
166
167                                        // Store the map from the face to the button
168                                        this.map.put(face.face, b);
169
170                                        // Add the button to the panel
171                                        this.faceGroup.add(b);
172                                        this.faceList.add(b);
173                                        this.faceList.revalidate();
174                                }
175
176                                // Either the face is new or it's existing, so we
177                                // don't want to remove it - so we remove it from the
178                                // 'to remove' list
179                                toRemove.remove(face);
180                        }
181
182                        // Remove all the faces that have disappeared.
183                        for (final TrackedFace face : toRemove) {
184                                this.faceList.remove(this.map.get(face));
185                                this.faceGroup.remove(this.map.get(face));
186                                this.map.remove(face);
187                        }
188
189                        // If nothing's selected, select the first one.
190                        if (this.faceGroup.getSelected() == null && this.map.keySet().size() > 0)
191                                this.faceGroup.setSelected(this.map.values().iterator().next());
192                }
193
194                /**
195                 * Returns the face that is selected.
196                 * 
197                 * @return The selected face
198                 */
199                public TrackedFace getSelectedFace() {
200                        final Iterator<TrackedFace> faces = this.map.keySet().iterator();
201                        TrackedFace f = null;
202                        while (faces.hasNext())
203                                if (this.map.get(f = faces.next()) == this.faceGroup.getSelected())
204                                        return f;
205                        return null;
206                }
207        }
208
209        /**
210         * Provides a comparable interface for tracked faces such that they can be
211         * sorted in a left-to-right order over the frame.
212         * 
213         * @author David Dupplaw (dpd@ecs.soton.ac.uk)
214         * @created 28 Aug 2012
215         * @version $Author$, $Revision$, $Date$
216         */
217        private class SortableTrackedFace
218                        implements Comparable<SortableTrackedFace>, Identifiable
219        {
220                /** The tracked face */
221                public TrackedFace face = null;
222
223                /** The identifier of the face */
224                public String identifier = "";
225
226                /**
227                 * Construct a sortable face
228                 * 
229                 * @param f
230                 *            The face to wrap
231                 */
232                public SortableTrackedFace(final TrackedFace f)
233                {
234                        this.face = f;
235                }
236
237                /**
238                 * {@inheritDoc}
239                 * 
240                 * @see java.lang.Comparable#compareTo(java.lang.Object)
241                 */
242                @Override
243                public int compareTo(final SortableTrackedFace o)
244                {
245                        // (4,0) in the global matrix is the x-translation.
246                        return o.face.clm._pglobl.get(4, 0) < this.face.clm._pglobl.get(4, 0) ? 1 : 0;
247                }
248
249                /**
250                 * {@inheritDoc}
251                 * 
252                 * @see org.openimaj.data.identity.Identifiable#getID()
253                 */
254                @Override
255                public String getID()
256                {
257                        return this.identifier;
258                }
259
260                /**
261                 * {@inheritDoc}
262                 * 
263                 * @see java.lang.Object#toString()
264                 */
265                @Override
266                public String toString()
267                {
268                        return this.identifier;
269                }
270        }
271
272        /** */
273        private static final long serialVersionUID = 1L;
274
275        /** The ground truth tool */
276        private VideoGroundTruth vgt = null;
277
278        /** The face tracker we'll use in the video */
279        private final CLMFaceTracker tracker = new CLMFaceTracker();
280
281        /** The shot detector used to determine scenes within the video */
282        private HistogramVideoShotDetector shotDetector = null;
283
284        /** The frame in which the tool will be displayed */
285        private JFrame frame = null;
286
287        /** The scene counter - used in creating unique identifiers */
288        private int scene = 0;
289
290        /** The faces we're currently tracking */
291        private ArrayList<SortableTrackedFace> sortedFaces = null;
292
293        /** Whether the tracker needs to do a redetect before the next track */
294        private boolean needsRedetect = false;
295
296        private TrackerInfo trackerInfo;
297
298        /**
299         * Initiate the speaker diarization tool.
300         * 
301         * @param video
302         *            The video to diarize
303         * @param audio
304         *            The audio to play (can be null)
305         */
306        public SpeakerDiarizationTool(final Video<MBFImage> video, final AudioStream audio)
307        {
308                this.vgt = new VideoGroundTruth(video, audio, this, this);
309                this.shotDetector = new HistogramVideoShotDetector(video);
310
311                this.init();
312        }
313
314        /**
315         * Initialises the GUI widgets
316         */
317        private void init()
318        {
319                this.setLayout(new GridBagLayout());
320
321                final GridBagConstraints gbc = new GridBagConstraints();
322                gbc.gridx = gbc.gridy = 1;
323                gbc.weightx = gbc.weighty = 1;
324                gbc.fill = GridBagConstraints.BOTH;
325
326                this.trackerInfo = new TrackerInfo();
327                this.add(this.trackerInfo, gbc);
328
329                // Set up the video player.
330                this.vgt.getVideoPlayer().setButtons(new String[] { "play", "pause" });
331                this.vgt.getVideoPlayer().pause();
332                this.vgt.getVideoPlayer().addVideoListener(new VideoDisplayListener<MBFImage>()
333                {
334                        @Override
335                        public void beforeUpdate(final MBFImage frame)
336                        {
337                                SpeakerDiarizationTool.this.processFrame(frame);
338                        }
339
340                        @Override
341                        public void afterUpdate(final VideoDisplay<MBFImage> display)
342                        {
343                        }
344                });
345
346                // Show the video player
347                final JFrame f = this.vgt.getVideoPlayer().showFrame();
348
349                // Show the tool
350                this.showFrame().setLocation(f.getLocation().x + f.getWidth(),
351                                f.getLocation().y);
352        }
353
354        /**
355         * This is the method that actually does most of the work.
356         * 
357         * @param frame
358         *            The frame to process
359         */
360        private void processFrame(final MBFImage frame)
361        {
362                // Pass the frame to our shot detector to see if the shot has changed.
363                this.shotDetector.processFrame(frame);
364
365                // If we're in to a new scene, we update the scene counter
366                if (this.shotDetector.wasLastFrameBoundary() || this.needsRedetect)
367                {
368                        if (!this.needsRedetect)
369                                this.scene++;
370
371                        System.out.println("=========== Scene " + this.scene + " ===========");
372
373                        // Now try to find faces in the image
374                        this.tracker.reset();
375                        this.tracker.track(frame);
376
377                        // Get a list of the faces being tracked
378                        final List<TrackedFace> faces = this.tracker.getModelTracker().trackedFaces;
379
380                        // Created a sorted list of faces (left-to-right in the image)
381                        this.sortedFaces = new ArrayList<SortableTrackedFace>();
382                        for (int i = 0; i < faces.size(); i++)
383                                this.sortedFaces.add(new SortableTrackedFace(faces.get(i)));
384                        Collections.sort(this.sortedFaces);
385
386                        // Update the identifiers based on the position in the scene
387                        for (int i = 0; i < this.sortedFaces.size(); i++)
388                                this.sortedFaces.get(i).identifier = "Scene " + this.scene + " Face " + i;
389
390                        System.out.println(this.sortedFaces);
391
392                        this.trackerInfo.setFaceList(this.sortedFaces);
393                }
394                else
395                        // Continue to track the faces we already have.
396                        this.tracker.track(frame);
397
398                // Draw the tracked model onto the frame
399                this.tracker.drawModel(frame, true, true, true, true, true);
400        }
401
402        /**
403         * Shows the tool in a frame. If a frame already exists it will be made
404         * visible.
405         * 
406         * @return Returns the frame shown
407         */
408        public JFrame showFrame()
409        {
410                if (this.frame == null)
411                {
412                        this.frame = new JFrame();
413                        this.frame.add(this);
414                        this.frame.pack();
415                }
416
417                this.frame.setVisible(true);
418                return this.frame;
419        }
420
421        /**
422         * {@inheritDoc}
423         * 
424         * @see org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.IdentifierProducer#getIdentifiers()
425         */
426        @Override
427        public List<Identifiable> getIdentifiers()
428        {
429                final List<Identifiable> l = new ArrayList<Identifiable>();
430                l.addAll(this.sortedFaces);
431                return l;
432        }
433
434        /**
435         * {@inheritDoc}
436         * 
437         * @see org.openimaj.demos.sandbox.video.gt.VideoGroundTruth.StateProvider#getCurrentState(org.openimaj.data.identity.Identifiable)
438         */
439        @Override
440        public List<String> getCurrentState(final Identifiable id)
441        {
442                return null;
443        }
444
445        /**
446         * @param args
447         */
448        public static void main(final String[] args)
449        {
450                String name = "heads1.mpeg";
451                if (args.length > 0)
452                        name = args[0];
453
454                final XuggleVideo xv = new XuggleVideo(new File(name));
455                final XuggleAudio xa = new XuggleAudio(new File(name));
456                new SpeakerDiarizationTool(xv, xa);
457        }
458}