001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 *
032 */
033package org.openimaj.demos.sandbox.video;
034
035import java.awt.Dimension;
036import java.util.ArrayList;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.List;
040
041import org.openimaj.image.DisplayUtilities;
042import org.openimaj.image.MBFImage;
043import org.openimaj.image.colour.RGBColour;
044import org.openimaj.image.processing.face.tracking.clm.CLMFaceTracker;
045import org.openimaj.image.processing.face.tracking.clm.MultiTracker;
046import org.openimaj.image.processing.face.tracking.clm.MultiTracker.TrackedFace;
047import org.openimaj.math.geometry.shape.Rectangle;
048import org.openimaj.math.util.RunningStat;
049
050/**
051 *      Uses a tracker ({@link CLMFaceTracker})
052 *      to track the face in the frame and determine whether the face stays within
053 *      certain size boundaries to determine shot type.
054 *      <p>
055 *      The shot types detected are:
056 *      <ul>
057 *      <li><b>Close Up:</b> The face fills the frame - that is, it takes between
058 *              12% or more of the frame.</li>
059 *      <li><b>Medium Close Up:</b> The person is filmed head-and-shoulders - that
060 *              is, the face takes up between 3% and 12% of the frame.</li>
061 *      <li><b>Mid-Shot:</b> The person is filmed half-body or less - that is,
062 *              the face takes up between 0% and 3% of the frame.</li>
063 *      <li><b>Two-Shot:</b> If there are two people in the frame.</li>
064 *      <li><b>Group-Shot:<b> If there are more than two people in the frame.</li>
065 *      </ul>
066 *      <p>
067 *      If the tracker loses track of any of the faces, then the face is removed
068 *      from the set that is used to produce the annotations.
069 *      <p>
070 *      The cumulative moving average is used to store the size of each face during
071 *      the tracking of the faces.
072 *      <p>
073 *      The {@link CLMFaceTracker} is also able to provide pose information about
074 *      the subject. From this we are able to determine some extra annotations
075 *      about the subject - such as whether they appear to be talking to an off-screen
076 *      person (interviewee or interviewer), or talking to the camera (presenter
077 *      or anchor).
078 *
079 *      @author David Dupplaw (dpd@ecs.soton.ac.uk)
080 *  @created 22 Jan 2013
081 *      @version $Author$, $Revision$, $Date$
082 */
083public class FaceShotTypeAnnotator extends VideoAnnotator<MBFImage, String>
084{
085        /** The ontology URI */
086        private static final String ONTO = "http://onto.dupplaw.me.uk/video#";
087
088        /** The face tracker we're going to use to track faces */
089        private CLMFaceTracker faceTracker = null;
090
091        /** The percentage that each face covers the full frame */
092        private HashMap<TrackedFace,RunningStat> faceSizes = null;
093
094        /** The average pose of each face */
095        private HashMap<TrackedFace,RunningStat[]> facePoses = null;
096
097        /** The size of the frame stored for any visual outputs */
098        private Dimension frameSize = null;
099
100        /** The last frame processed - for visualisations */
101        private MBFImage lastFrame = null;
102
103        /**
104         *      Constructor
105         */
106        public FaceShotTypeAnnotator()
107        {
108                this.faceTracker = new CLMFaceTracker();
109                this.faceSizes = new HashMap<MultiTracker.TrackedFace, RunningStat>();
110                this.facePoses = new HashMap<MultiTracker.TrackedFace, RunningStat[]>();
111        }
112
113        /**
114         *      {@inheritDoc}
115         *      @see org.openimaj.video.processor.VideoProcessor#processFrame(org.openimaj.image.Image)
116         */
117        @Override
118        public MBFImage processFrame( final MBFImage frame )
119        {
120                // Store the size of the frame.
121                this.frameSize = new Dimension( frame.getWidth(), frame.getHeight() );
122
123                // Track the face in the image.
124                this.faceTracker.track( frame );
125
126                // Get the tracked faces
127                // The assumption is that the tracker will return the same TrackedFace
128                // object for the same face in the image as the tracking continues.
129                // FIXME: If the tracker is set to auto-redetect, it returns different objects for the same faces
130                final List<TrackedFace> trackedFaces = this.faceTracker.getTrackedFaces();
131
132                // Calculate the size of the frame
133                final double frameSize = frame.getWidth() * frame.getHeight();
134
135                // Remove any faces which no longer exist
136                final HashSet<TrackedFace> missingFaces =
137                                new HashSet<TrackedFace>( this.faceSizes.keySet() );
138                for( final TrackedFace face : trackedFaces )
139                        missingFaces.remove( face );
140                for( final TrackedFace face : missingFaces )
141                {
142                        this.faceSizes.remove( face );
143                        this.facePoses.remove( face );
144                }
145
146                // Loop over the tracked faces and update the map
147                for( final TrackedFace face : trackedFaces )
148                {
149                        // ---------- SIZE -----------
150                        // Calculate the size of the face in percent of the frame size
151                        final double faceSizePc = (face.lastMatchBounds.width *
152                                        face.lastMatchBounds.height) / frameSize;
153
154                        // If it's a new face, we make the cumulative average to be the
155                        // current size of the face.
156                        if( this.faceSizes.get( face ) == null )
157                                this.faceSizes.put( face, new RunningStat(faceSizePc) );
158                        // If the face exists, we update the size
159                        else
160                        {
161                                // Get the current running face average
162                                final RunningStat ca = this.faceSizes.get( face );
163
164                                // Update the face average
165                                ca.push( faceSizePc );
166                        }
167
168                        // ---------- POSE -----------
169                        // Get the pose information for this face
170                        final int nParams = face.clm._pglobl.getRowDimension();
171                        final double[] poseInfo = new double[nParams];
172                        for( int i = 0; i < nParams; i++ )
173                                poseInfo[i] = face.clm._pglobl.get(i,0);
174
175                        // If it's a new face, store the current pose into the map
176                        RunningStat[] stats = this.facePoses.get(face);
177                        if( stats == null )
178                                this.facePoses.put( face, stats = new RunningStat[nParams] );
179
180                        // Update the average pose information
181                        for( int i = 0; i < nParams; i++ )
182                        {
183                                if( stats[i] == null ) stats[i] = new RunningStat();
184                                stats[i].push( poseInfo[i] );
185                        }
186                }
187
188                // Store the last frame for visualisation purposes.
189                this.lastFrame = frame;
190
191                // Returns the original frame untouched.
192                return frame;
193        }
194
195        /**
196         *      {@inheritDoc}
197         *      @see org.openimaj.demos.sandbox.video.VideoAnnotator#updateAnnotations()
198         */
199        @Override
200        protected void updateAnnotations()
201        {
202                this.addShotTypeAnnotations();
203        }
204
205        /**
206         *      Determines the shot type annotations to add based
207         *      on the face stats that has been captured.
208         */
209        private void addShotTypeAnnotations()
210        {
211                // Check if we found any faces
212                if( this.faceSizes.keySet().size() == 0 )
213                        return;
214
215                // If we found more than one face, then it's some sort of
216                // group shot.
217                if( this.faceSizes.keySet().size() > 1 )
218                {
219                        // Group shot.
220                        if( this.faceSizes.keySet().size() == 2 )
221                                        this.annotations.add( FaceShotTypeAnnotator.ONTO+"TwoShot" );
222                        else    this.annotations.add( FaceShotTypeAnnotator.ONTO+"GroupShot" );
223                }
224                // There is one face in the video:
225                else
226                {
227                        // If there's only one face, we'll retrieve it.
228                        final TrackedFace onlyFace = this.faceSizes.keySet().iterator().next();
229
230                        // ==============================================================
231                        // Determine the shot type
232                        // ==============================================================
233                        // Retrieve it's average size:
234                        final double size = this.faceSizes.get( onlyFace ).mean();
235
236                        // Mid-Shot
237                        if( size <= 0.03 )
238                                this.annotations.add( FaceShotTypeAnnotator.ONTO+"MidShot" );
239                        else
240                        // Medium Close Up
241                        if( size <= 0.12 )
242                                this.annotations.add( FaceShotTypeAnnotator.ONTO+"MediumCloseUp" );
243                        // Close up shot
244                        else
245                                this.annotations.add( FaceShotTypeAnnotator.ONTO+"CloseUp" );
246
247                        final List<String> faceAnns = this.getAnnotationURIsForFace( onlyFace,
248                                        this.facePoses.get( onlyFace ) );
249                        this.annotations.addAll( faceAnns );
250                }
251
252                // This displays an image containing the average poses of the faces found
253                this.displayAveragePoseImage( this.lastFrame );
254        }
255
256        /**
257         *      Given a face, it will return a list of URIs (as Strings) that provide
258         *      annotations for the given face, based on its pose and shape.
259         *
260         *      Of the pose parameters:
261         *              i=1 is the x-rotation (head nod),
262         *              i=2 is the y-rotation (head shake), and
263         *              i=3 is the z-rotation (headstand)
264         *
265         *      From these we are able to make some assumptions about the face.
266         *
267         *      @param face The face to annotate
268         *      @param runningStats The average running stats of the given face
269         *      @return A list of String URIs
270         */
271        private List<String> getAnnotationURIsForFace( final TrackedFace face,
272                        final RunningStat[] runningStats )
273        {
274                // This is the list of URIs we'll return
275                final List<String> uris = new ArrayList<String>();
276
277                // Get the shape and pose information
278
279                // If the head is mostly looking at the camera...
280                if( Math.abs( runningStats[1].mean() ) < 0.5 &&
281                        Math.abs( runningStats[2].mean() ) < 0.5 &&
282                        Math.abs( runningStats[3].mean() ) < 0.5 )
283                {
284                                // Then they're probably talking to the camera. Determining whether
285                                // their role is presenter, or not, is more difficult!
286                                uris.add( FaceShotTypeAnnotator.ONTO+"TalkingHead" );
287                }
288
289                return uris;
290        }
291
292        /**
293         *      Shows (and returns) an image containing the current average poses of all the
294         *      faces in the video shot so far.
295         *      @param frame (optional) Frame to place in the background of the visualisation
296         *      @return an MBFImage
297         */
298        public MBFImage displayAveragePoseImage( final MBFImage frame )
299        {
300                // We'll generate a video frame the size of the frame that was
301                // last processed (the assumption is that video frame size doesn't change)
302                final MBFImage image = new MBFImage(
303                                        (int)this.frameSize.getWidth(),
304                                        (int)this.frameSize.getHeight(), 3 );
305
306                // If we have a frame to put in the background, dim it a bit...
307                if( frame != null )
308                        image.addInplace( frame.multiply( 0.5f ) );
309
310                // Create a tracked face that we'll morph into the various average
311                // parameters to draw to the image
312                final TrackedFace avgFace = new TrackedFace(
313                        new Rectangle(50, -50, 500, 500), this.faceTracker.getInitialVars() );
314
315                // We need the arrays of triangles and connections to draw the faces
316                final int[][] connections = this.faceTracker.connections;
317                final int[][] triangles = this.faceTracker.triangles;
318
319                // Loop through each of the faces setting the pose and drawing to an image
320                for( final TrackedFace face : this.facePoses.keySet() )
321                {
322                        // Get the average pose for the tracked face
323                        final RunningStat[] poseInfo = this.facePoses.get( face );
324
325                        // Set the model face to this pose
326                        for( int i = 0; i < poseInfo.length; i++ )
327                                avgFace.clm._pglobl.set( i, 0, poseInfo[i].mean() );
328
329                        // Recalculate the face shape
330                        avgFace.clm._pdm.calcShape2D( avgFace.shape,
331                                        avgFace.clm._plocal, avgFace.clm._pglobl );
332
333                        // Draw the model to the image.
334                        CLMFaceTracker.drawFaceModel( image, avgFace, true, true,
335                                        true, true, true, triangles, connections, 1, RGBColour.WHITE,
336                                        RGBColour.WHITE, RGBColour.YELLOW, RGBColour.RED );
337                }
338
339                DisplayUtilities.display( image );
340
341                return image;
342        }
343
344        /**
345         *      {@inheritDoc}
346         *      @see org.openimaj.demos.sandbox.video.VideoAnnotator#resetAnnotator()
347         */
348        @Override
349        protected void resetAnnotator()
350        {
351                this.faceTracker.reset();
352        }
353}