001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.demos.sandbox.video; 034 035import java.awt.Dimension; 036import java.util.ArrayList; 037import java.util.HashMap; 038import java.util.HashSet; 039import java.util.List; 040 041import org.openimaj.image.DisplayUtilities; 042import org.openimaj.image.MBFImage; 043import org.openimaj.image.colour.RGBColour; 044import org.openimaj.image.processing.face.tracking.clm.CLMFaceTracker; 045import org.openimaj.image.processing.face.tracking.clm.MultiTracker; 046import org.openimaj.image.processing.face.tracking.clm.MultiTracker.TrackedFace; 047import org.openimaj.math.geometry.shape.Rectangle; 048import org.openimaj.math.util.RunningStat; 049 050/** 051 * Uses a tracker ({@link CLMFaceTracker}) 052 * to track the face in the frame and determine whether the face stays within 053 * certain size boundaries to determine shot type. 054 * <p> 055 * The shot types detected are: 056 * <ul> 057 * <li><b>Close Up:</b> The face fills the frame - that is, it takes between 058 * 12% or more of the frame.</li> 059 * <li><b>Medium Close Up:</b> The person is filmed head-and-shoulders - that 060 * is, the face takes up between 3% and 12% of the frame.</li> 061 * <li><b>Mid-Shot:</b> The person is filmed half-body or less - that is, 062 * the face takes up between 0% and 3% of the frame.</li> 063 * <li><b>Two-Shot:</b> If there are two people in the frame.</li> 064 * <li><b>Group-Shot:<b> If there are more than two people in the frame.</li> 065 * </ul> 066 * <p> 067 * If the tracker loses track of any of the faces, then the face is removed 068 * from the set that is used to produce the annotations. 069 * <p> 070 * The cumulative moving average is used to store the size of each face during 071 * the tracking of the faces. 072 * <p> 073 * The {@link CLMFaceTracker} is also able to provide pose information about 074 * the subject. From this we are able to determine some extra annotations 075 * about the subject - such as whether they appear to be talking to an off-screen 076 * person (interviewee or interviewer), or talking to the camera (presenter 077 * or anchor). 078 * 079 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 080 * @created 22 Jan 2013 081 * @version $Author$, $Revision$, $Date$ 082 */ 083public class FaceShotTypeAnnotator extends VideoAnnotator<MBFImage, String> 084{ 085 /** The ontology URI */ 086 private static final String ONTO = "http://onto.dupplaw.me.uk/video#"; 087 088 /** The face tracker we're going to use to track faces */ 089 private CLMFaceTracker faceTracker = null; 090 091 /** The percentage that each face covers the full frame */ 092 private HashMap<TrackedFace,RunningStat> faceSizes = null; 093 094 /** The average pose of each face */ 095 private HashMap<TrackedFace,RunningStat[]> facePoses = null; 096 097 /** The size of the frame stored for any visual outputs */ 098 private Dimension frameSize = null; 099 100 /** The last frame processed - for visualisations */ 101 private MBFImage lastFrame = null; 102 103 /** 104 * Constructor 105 */ 106 public FaceShotTypeAnnotator() 107 { 108 this.faceTracker = new CLMFaceTracker(); 109 this.faceSizes = new HashMap<MultiTracker.TrackedFace, RunningStat>(); 110 this.facePoses = new HashMap<MultiTracker.TrackedFace, RunningStat[]>(); 111 } 112 113 /** 114 * {@inheritDoc} 115 * @see org.openimaj.video.processor.VideoProcessor#processFrame(org.openimaj.image.Image) 116 */ 117 @Override 118 public MBFImage processFrame( final MBFImage frame ) 119 { 120 // Store the size of the frame. 121 this.frameSize = new Dimension( frame.getWidth(), frame.getHeight() ); 122 123 // Track the face in the image. 124 this.faceTracker.track( frame ); 125 126 // Get the tracked faces 127 // The assumption is that the tracker will return the same TrackedFace 128 // object for the same face in the image as the tracking continues. 129 // FIXME: If the tracker is set to auto-redetect, it returns different objects for the same faces 130 final List<TrackedFace> trackedFaces = this.faceTracker.getTrackedFaces(); 131 132 // Calculate the size of the frame 133 final double frameSize = frame.getWidth() * frame.getHeight(); 134 135 // Remove any faces which no longer exist 136 final HashSet<TrackedFace> missingFaces = 137 new HashSet<TrackedFace>( this.faceSizes.keySet() ); 138 for( final TrackedFace face : trackedFaces ) 139 missingFaces.remove( face ); 140 for( final TrackedFace face : missingFaces ) 141 { 142 this.faceSizes.remove( face ); 143 this.facePoses.remove( face ); 144 } 145 146 // Loop over the tracked faces and update the map 147 for( final TrackedFace face : trackedFaces ) 148 { 149 // ---------- SIZE ----------- 150 // Calculate the size of the face in percent of the frame size 151 final double faceSizePc = (face.lastMatchBounds.width * 152 face.lastMatchBounds.height) / frameSize; 153 154 // If it's a new face, we make the cumulative average to be the 155 // current size of the face. 156 if( this.faceSizes.get( face ) == null ) 157 this.faceSizes.put( face, new RunningStat(faceSizePc) ); 158 // If the face exists, we update the size 159 else 160 { 161 // Get the current running face average 162 final RunningStat ca = this.faceSizes.get( face ); 163 164 // Update the face average 165 ca.push( faceSizePc ); 166 } 167 168 // ---------- POSE ----------- 169 // Get the pose information for this face 170 final int nParams = face.clm._pglobl.getRowDimension(); 171 final double[] poseInfo = new double[nParams]; 172 for( int i = 0; i < nParams; i++ ) 173 poseInfo[i] = face.clm._pglobl.get(i,0); 174 175 // If it's a new face, store the current pose into the map 176 RunningStat[] stats = this.facePoses.get(face); 177 if( stats == null ) 178 this.facePoses.put( face, stats = new RunningStat[nParams] ); 179 180 // Update the average pose information 181 for( int i = 0; i < nParams; i++ ) 182 { 183 if( stats[i] == null ) stats[i] = new RunningStat(); 184 stats[i].push( poseInfo[i] ); 185 } 186 } 187 188 // Store the last frame for visualisation purposes. 189 this.lastFrame = frame; 190 191 // Returns the original frame untouched. 192 return frame; 193 } 194 195 /** 196 * {@inheritDoc} 197 * @see org.openimaj.demos.sandbox.video.VideoAnnotator#updateAnnotations() 198 */ 199 @Override 200 protected void updateAnnotations() 201 { 202 this.addShotTypeAnnotations(); 203 } 204 205 /** 206 * Determines the shot type annotations to add based 207 * on the face stats that has been captured. 208 */ 209 private void addShotTypeAnnotations() 210 { 211 // Check if we found any faces 212 if( this.faceSizes.keySet().size() == 0 ) 213 return; 214 215 // If we found more than one face, then it's some sort of 216 // group shot. 217 if( this.faceSizes.keySet().size() > 1 ) 218 { 219 // Group shot. 220 if( this.faceSizes.keySet().size() == 2 ) 221 this.annotations.add( FaceShotTypeAnnotator.ONTO+"TwoShot" ); 222 else this.annotations.add( FaceShotTypeAnnotator.ONTO+"GroupShot" ); 223 } 224 // There is one face in the video: 225 else 226 { 227 // If there's only one face, we'll retrieve it. 228 final TrackedFace onlyFace = this.faceSizes.keySet().iterator().next(); 229 230 // ============================================================== 231 // Determine the shot type 232 // ============================================================== 233 // Retrieve it's average size: 234 final double size = this.faceSizes.get( onlyFace ).mean(); 235 236 // Mid-Shot 237 if( size <= 0.03 ) 238 this.annotations.add( FaceShotTypeAnnotator.ONTO+"MidShot" ); 239 else 240 // Medium Close Up 241 if( size <= 0.12 ) 242 this.annotations.add( FaceShotTypeAnnotator.ONTO+"MediumCloseUp" ); 243 // Close up shot 244 else 245 this.annotations.add( FaceShotTypeAnnotator.ONTO+"CloseUp" ); 246 247 final List<String> faceAnns = this.getAnnotationURIsForFace( onlyFace, 248 this.facePoses.get( onlyFace ) ); 249 this.annotations.addAll( faceAnns ); 250 } 251 252 // This displays an image containing the average poses of the faces found 253 this.displayAveragePoseImage( this.lastFrame ); 254 } 255 256 /** 257 * Given a face, it will return a list of URIs (as Strings) that provide 258 * annotations for the given face, based on its pose and shape. 259 * 260 * Of the pose parameters: 261 * i=1 is the x-rotation (head nod), 262 * i=2 is the y-rotation (head shake), and 263 * i=3 is the z-rotation (headstand) 264 * 265 * From these we are able to make some assumptions about the face. 266 * 267 * @param face The face to annotate 268 * @param runningStats The average running stats of the given face 269 * @return A list of String URIs 270 */ 271 private List<String> getAnnotationURIsForFace( final TrackedFace face, 272 final RunningStat[] runningStats ) 273 { 274 // This is the list of URIs we'll return 275 final List<String> uris = new ArrayList<String>(); 276 277 // Get the shape and pose information 278 279 // If the head is mostly looking at the camera... 280 if( Math.abs( runningStats[1].mean() ) < 0.5 && 281 Math.abs( runningStats[2].mean() ) < 0.5 && 282 Math.abs( runningStats[3].mean() ) < 0.5 ) 283 { 284 // Then they're probably talking to the camera. Determining whether 285 // their role is presenter, or not, is more difficult! 286 uris.add( FaceShotTypeAnnotator.ONTO+"TalkingHead" ); 287 } 288 289 return uris; 290 } 291 292 /** 293 * Shows (and returns) an image containing the current average poses of all the 294 * faces in the video shot so far. 295 * @param frame (optional) Frame to place in the background of the visualisation 296 * @return an MBFImage 297 */ 298 public MBFImage displayAveragePoseImage( final MBFImage frame ) 299 { 300 // We'll generate a video frame the size of the frame that was 301 // last processed (the assumption is that video frame size doesn't change) 302 final MBFImage image = new MBFImage( 303 (int)this.frameSize.getWidth(), 304 (int)this.frameSize.getHeight(), 3 ); 305 306 // If we have a frame to put in the background, dim it a bit... 307 if( frame != null ) 308 image.addInplace( frame.multiply( 0.5f ) ); 309 310 // Create a tracked face that we'll morph into the various average 311 // parameters to draw to the image 312 final TrackedFace avgFace = new TrackedFace( 313 new Rectangle(50, -50, 500, 500), this.faceTracker.getInitialVars() ); 314 315 // We need the arrays of triangles and connections to draw the faces 316 final int[][] connections = this.faceTracker.connections; 317 final int[][] triangles = this.faceTracker.triangles; 318 319 // Loop through each of the faces setting the pose and drawing to an image 320 for( final TrackedFace face : this.facePoses.keySet() ) 321 { 322 // Get the average pose for the tracked face 323 final RunningStat[] poseInfo = this.facePoses.get( face ); 324 325 // Set the model face to this pose 326 for( int i = 0; i < poseInfo.length; i++ ) 327 avgFace.clm._pglobl.set( i, 0, poseInfo[i].mean() ); 328 329 // Recalculate the face shape 330 avgFace.clm._pdm.calcShape2D( avgFace.shape, 331 avgFace.clm._plocal, avgFace.clm._pglobl ); 332 333 // Draw the model to the image. 334 CLMFaceTracker.drawFaceModel( image, avgFace, true, true, 335 true, true, true, triangles, connections, 1, RGBColour.WHITE, 336 RGBColour.WHITE, RGBColour.YELLOW, RGBColour.RED ); 337 } 338 339 DisplayUtilities.display( image ); 340 341 return image; 342 } 343 344 /** 345 * {@inheritDoc} 346 * @see org.openimaj.demos.sandbox.video.VideoAnnotator#resetAnnotator() 347 */ 348 @Override 349 protected void resetAnnotator() 350 { 351 this.faceTracker.reset(); 352 } 353}