001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030/** 031 * 032 */ 033package org.openimaj.tools.faces.extraction; 034 035import java.io.File; 036import java.io.FileWriter; 037import java.io.IOException; 038import java.util.List; 039 040import javax.xml.parsers.DocumentBuilder; 041import javax.xml.parsers.DocumentBuilderFactory; 042import javax.xml.parsers.ParserConfigurationException; 043import javax.xml.transform.Transformer; 044import javax.xml.transform.TransformerFactory; 045import javax.xml.transform.dom.DOMSource; 046import javax.xml.transform.stream.StreamResult; 047 048import org.kohsuke.args4j.CmdLineException; 049import org.kohsuke.args4j.CmdLineParser; 050import org.openimaj.image.ImageUtilities; 051import org.openimaj.image.MBFImage; 052import org.openimaj.image.processing.face.detection.DetectedFace; 053import org.openimaj.image.processing.face.detection.HaarCascadeDetector; 054import org.openimaj.math.geometry.shape.Rectangle; 055import org.openimaj.util.pair.IndependentPair; 056import org.openimaj.video.processing.shotdetector.ShotBoundary; 057import org.openimaj.video.processing.shotdetector.HistogramVideoShotDetector; 058import org.openimaj.video.processing.timefinder.ObjectTimeFinder; 059import org.openimaj.video.processing.timefinder.ObjectTimeFinder.TimeFinderListener; 060import org.openimaj.video.processing.tracking.BasicMBFImageObjectTracker; 061import org.openimaj.video.timecode.VideoTimecode; 062import org.openimaj.video.xuggle.XuggleVideo; 063import org.w3c.dom.DOMException; 064import org.w3c.dom.Document; 065import org.w3c.dom.Element; 066 067/** 068 * A tool that provides a means of extracting faces from videos and images. 069 * 070 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 071 * 072 * @created 7 Nov 2011 073 */ 074public class FaceExtractorTool 075{ 076 /** The options for this tool instance */ 077 private FaceExtractorToolOptions options = null; 078 079 /** The video from which to extract faces */ 080 private XuggleVideo video = null; 081 082 /** The output directory where we'll write the extracted faces */ 083 private File outputDir = null; 084 085 /** Used in the object tracking to store the frame in which the best face was found */ 086 private MBFImage bestFaceFrame = null; 087 088 /** Used in the object tracking to store the best face timecode for each face */ 089 private VideoTimecode bestFaceTimecode = null; 090 091 /** Used in the object tracking to store the best face bounding box */ 092 private Rectangle bestFaceBoundingBox = null; 093 094 /** 095 * Default constructor 096 * @param o the options 097 */ 098 public FaceExtractorTool( FaceExtractorToolOptions o ) 099 { 100 this.options = o; 101 102 this.outputDir = new File( o.outputFile ); 103 104 // If we have a video file, read in the video 105 if( options.videoFile != null ) 106 { 107 // Create the video reader for reading the video 108 this.video = new XuggleVideo( new File( options.videoFile ) ); 109 } 110 111 // Create the output directory if it doesn't exist. 112 if( !this.outputDir.exists() ) 113 this.outputDir.mkdirs(); 114 115 // Process the video 116 this.processVideo(); 117 } 118 119 /** 120 * Process the video to extract faces. 121 */ 122 private void processVideo() 123 { 124 if( this.options.verbose ) 125 { 126 System.out.println( this.options.videoFile ); 127 System.out.println( " - Size: "+video.getWidth()+"x"+video.getHeight() ); 128 System.out.println( " - Frame Rate: "+video.getFPS() ); 129 System.out.println( "Detecting shots in video..." ); 130 } 131 132 // This is the video shot detector we'll use to find the shots in 133 // the incoming video. These shots will provide hard limits for the 134 // face tracking. 135 HistogramVideoShotDetector vsd = new HistogramVideoShotDetector( this.video ); 136 vsd.setThreshold( this.options.threshold ); 137 vsd.setFindKeyframes( true ); 138 vsd.setStoreAllDifferentials( false ); 139 vsd.process(); 140 141 // Retrieve the shots from the shot detector 142 List<ShotBoundary<MBFImage>> shots = vsd.getShotBoundaries(); 143 144 if( this.options.verbose ) 145 System.out.println( "Found "+shots.size()+" shots."); 146 147 // We'll use the HaarCascadeFaceDetector for detecting faces. 148 HaarCascadeDetector fd = HaarCascadeDetector.BuiltInCascade.frontalface_alt2.load(); 149 fd.setMinSize( this.options.faceSize ); 150 151 // Now we'll go through the video looking for faces every x seconds. 152 this.video.reset(); 153 154 // For each shot boundary... 155 ShotBoundary<MBFImage> prev = shots.get(0); 156 for( int i = 1; i < shots.size(); i++ ) 157 { 158 ShotBoundary<MBFImage> thiz = shots.get(i); 159 160 // Get the timecodes of the shot. Remember the timecode gives the 161 // start of the shot, so the shot is between the previous timecode 162 // and one frame before this timecode. 163 long pframe = prev.getTimecode().getFrameNumber(); 164 long tframe = thiz.getTimecode().getFrameNumber()-1; 165 166 if( this.options.verbose ) 167 System.out.println( "Shot: "+prev+" ("+pframe+") -> "+thiz+" ("+tframe+")" ); 168 169 // This will be the frame we'll store for a given face 170 MBFImage faceFrame = null; 171 172 // Now loop around looking for faces in the shot 173 List<DetectedFace> faces = null; 174 boolean doneSearching = false; 175 while( !doneSearching ) 176 { 177 // If we're supposed to use just the centre frame, then we'll work 178 // out where that centre frame is and grab the frame. 179 if( this.options.useCentre ) 180 { 181 long mframe = pframe + ((tframe - pframe) / 2); 182 video.setCurrentFrameIndex(mframe); 183 faceFrame = video.getCurrentFrame(); 184 doneSearching = true; 185 } 186 // If we're searching for a face every x-seconds then we'll skip frames 187 // forward by x-seconds. 188 else 189 { 190 // Push the video forward by x frames 191 pframe += options.seconds * video.getFPS(); 192 193 // Check if we're still within the shot 194 if( pframe >= tframe ) 195 { 196 doneSearching = true; 197 pframe = tframe; 198 } 199 200 // Push the video forward 201 video.setCurrentFrameIndex( pframe ); 202 faceFrame = video.getCurrentFrame(); 203 } 204 205 if( this.options.verbose ) 206 System.out.println( " - Using frame "+ 207 video.getCurrentTimecode()+" ("+video.getTimeStamp()+")" ); 208 209 // Detect faces in the frame 210 faces = fd.detectFaces( faceFrame.flatten() ); 211 212 if( this.options.verbose ) 213 System.out.println( " + Found "+faces.size()+" faces in frame."); 214 215 if( faces.size() > 0 ) 216 doneSearching = true; 217 218 // For each of the detected faces (if there are any) track 219 // back and forth in the video to find the times at which the 220 // face is best. As a consequence, we will also end up with the 221 // timecodes at which the face appears in the video (at any size) 222 for( DetectedFace f : faces ) 223 { 224 if( options.verbose ) 225 System.out.println( " - Tracking face..." ); 226 227 bestFaceTimecode = null; 228 bestFaceFrame = null; 229 bestFaceBoundingBox = null; 230 231 // We'll use this ObjectTimeFinder to track the faces once they're 232 // extracted from the video. 233 ObjectTimeFinder otf = new ObjectTimeFinder(); 234 IndependentPair<VideoTimecode, VideoTimecode> timecodes = 235 otf.trackObject( new BasicMBFImageObjectTracker(), video, 236 video.getCurrentTimecode(), f.getBounds(), 237 new TimeFinderListener<Rectangle,MBFImage>() 238 { 239 double maxArea = 0; 240 241 @Override 242 public void objectTracked( 243 List<Rectangle> objects, 244 VideoTimecode time, 245 MBFImage image ) 246 { 247 if( objects.size() > 0 && 248 objects.get(0).calculateArea() > maxArea ) 249 { 250 maxArea = objects.get(0).calculateArea(); 251 bestFaceTimecode = time; 252 bestFaceFrame = image.clone(); 253 bestFaceBoundingBox = objects.get(0); 254 } 255 } 256 } ); 257 258 if( options.verbose ) 259 System.out.println( " - Face tracked between "+timecodes ); 260 261 if( bestFaceBoundingBox != null ) try 262 { 263 saveFace( bestFaceFrame, 264 bestFaceTimecode.getFrameNumber(), timecodes, 265 bestFaceBoundingBox ); 266 } 267 catch( IOException e ) 268 { 269 e.printStackTrace(); 270 } 271 } 272 } 273 274 prev = thiz; 275 } 276 } 277 278 /** 279 * Writes a face image to an appropriate file in the output directory 280 * named as per the input file but suffixed with the frame number. 281 * 282 * @param frame The video frame 283 * @param mframe The frame number 284 * @param timecodes The timecodes of the face 285 * @param bounds The bounding box of the face 286 * @throws IOException If the write cannot be completed 287 */ 288 private void saveFace( MBFImage frame, long mframe, 289 IndependentPair<VideoTimecode, VideoTimecode> timecodes, 290 Rectangle bounds ) throws IOException 291 { 292 File base = new File( this.options.outputFile ); 293 294 if( options.writeFaceImage ) 295 { 296 File img = new File( base, new File( this.options.videoFile ).getName() 297 + "#" + mframe + ".face.png"); 298 ImageUtilities.write( frame.extractROI( bounds ), img); 299 } 300 301 if( options.writeFrameImage ) 302 { 303 File img = new File( base, new File( this.options.videoFile ).getName() 304 + "#" + mframe + ".frame.png"); 305 ImageUtilities.write(frame, img); 306 } 307 308 if( options.writeXML ) 309 { 310 File xml = new File( base, new File( this.options.videoFile ).getName() 311 + "#" + mframe + ".xml"); 312 313 try 314 { 315 DocumentBuilderFactory documentBuilderFactory = 316 DocumentBuilderFactory.newInstance(); 317 DocumentBuilder documentBuilder = 318 documentBuilderFactory.newDocumentBuilder(); 319 Document document = documentBuilder.newDocument(); 320 Element rootElement = document.createElement( "face" ); 321 document.appendChild( rootElement ); 322 323 Element em = document.createElement( "boundingBox" ); 324 em.appendChild( document.createTextNode( bounds.toString() ) ); 325 rootElement.appendChild(em); 326 327 em = document.createElement( "appearanceTimecode" ); 328 em.appendChild( document.createTextNode( timecodes.firstObject().toString() ) ); 329 rootElement.appendChild(em); 330 331 em = document.createElement( "disappearanceTimecode" ); 332 em.appendChild( document.createTextNode( timecodes.secondObject().toString() ) ); 333 rootElement.appendChild(em); 334 335 em = document.createElement( "appearanceFrame" ); 336 em.appendChild( document.createTextNode( ""+timecodes.firstObject().getFrameNumber() ) ); 337 rootElement.appendChild(em); 338 339 em = document.createElement( "disappearanceFrame" ); 340 em.appendChild( document.createTextNode( ""+timecodes.secondObject().getFrameNumber() ) ); 341 rootElement.appendChild(em); 342 343 em = document.createElement( "appearanceTime" ); 344 em.appendChild( document.createTextNode( ""+timecodes.firstObject().getTimecodeInMilliseconds() ) ); 345 rootElement.appendChild(em); 346 347 em = document.createElement( "disappearanceTime" ); 348 em.appendChild( document.createTextNode( ""+timecodes.secondObject().getTimecodeInMilliseconds() ) ); 349 rootElement.appendChild(em); 350 351 try 352 { 353 TransformerFactory transformerFactory = 354 TransformerFactory.newInstance(); 355 Transformer transformer = transformerFactory.newTransformer(); 356 DOMSource source = new DOMSource( document ); 357 StreamResult result = new StreamResult( new FileWriter( xml ) ); 358 transformer.transform( source, result ); 359 } 360 catch( Exception e ) 361 { 362 e.printStackTrace(); 363 } 364 } 365 catch( DOMException e ) 366 { 367 e.printStackTrace(); 368 } 369 catch( ParserConfigurationException e ) 370 { 371 e.printStackTrace(); 372 } 373 } 374 } 375 376 /** 377 * Parses the command line arguments. 378 * 379 * @param args The arguments to parse 380 * @return The tool options class 381 */ 382 private static FaceExtractorToolOptions parseArgs( String[] args ) 383 { 384 FaceExtractorToolOptions fdto = new FaceExtractorToolOptions(); 385 CmdLineParser parser = new CmdLineParser( fdto ); 386 387 try 388 { 389 parser.parseArgument( args ); 390 } 391 catch( CmdLineException e ) 392 { 393 System.err.println( e.getMessage()); 394 System.err.println( "java FaceExtractorTool [options...]"); 395 parser.printUsage( System.err ); 396 System.exit(1); 397 } 398 399 return fdto; 400 } 401 402 /** 403 * Default main 404 * @param args 405 */ 406 public static void main( String[] args ) 407 { 408 FaceExtractorToolOptions options = parseArgs( args ); 409 new FaceExtractorTool( options ); 410 } 411}