001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030/**
031 * 
032 */
033package org.openimaj.tools.faces.extraction;
034
035import java.io.File;
036import java.io.FileWriter;
037import java.io.IOException;
038import java.util.List;
039
040import javax.xml.parsers.DocumentBuilder;
041import javax.xml.parsers.DocumentBuilderFactory;
042import javax.xml.parsers.ParserConfigurationException;
043import javax.xml.transform.Transformer;
044import javax.xml.transform.TransformerFactory;
045import javax.xml.transform.dom.DOMSource;
046import javax.xml.transform.stream.StreamResult;
047
048import org.kohsuke.args4j.CmdLineException;
049import org.kohsuke.args4j.CmdLineParser;
050import org.openimaj.image.ImageUtilities;
051import org.openimaj.image.MBFImage;
052import org.openimaj.image.processing.face.detection.DetectedFace;
053import org.openimaj.image.processing.face.detection.HaarCascadeDetector;
054import org.openimaj.math.geometry.shape.Rectangle;
055import org.openimaj.util.pair.IndependentPair;
056import org.openimaj.video.processing.shotdetector.ShotBoundary;
057import org.openimaj.video.processing.shotdetector.HistogramVideoShotDetector;
058import org.openimaj.video.processing.timefinder.ObjectTimeFinder;
059import org.openimaj.video.processing.timefinder.ObjectTimeFinder.TimeFinderListener;
060import org.openimaj.video.processing.tracking.BasicMBFImageObjectTracker;
061import org.openimaj.video.timecode.VideoTimecode;
062import org.openimaj.video.xuggle.XuggleVideo;
063import org.w3c.dom.DOMException;
064import org.w3c.dom.Document;
065import org.w3c.dom.Element;
066
067/**
068 *      A tool that provides a means of extracting faces from videos and images.
069 * 
070 *  @author David Dupplaw (dpd@ecs.soton.ac.uk)
071 *      
072 *      @created 7 Nov 2011
073 */
074public class FaceExtractorTool
075{
076        /** The options for this tool instance */
077        private FaceExtractorToolOptions options = null;
078        
079        /** The video from which to extract faces */
080        private XuggleVideo video = null;
081
082        /** The output directory where we'll write the extracted faces */
083        private File outputDir = null;
084        
085        /** Used in the object tracking to store the frame in which the best face was found */
086        private MBFImage bestFaceFrame = null;
087
088        /** Used in the object tracking to store the best face timecode for each face */
089        private VideoTimecode bestFaceTimecode = null;
090        
091        /** Used in the object tracking to store the best face bounding box */
092        private Rectangle bestFaceBoundingBox = null;
093
094        /**
095         *      Default constructor
096         *  @param o the options 
097         */
098        public FaceExtractorTool( FaceExtractorToolOptions o )
099    {
100                this.options = o;
101                
102                this.outputDir  = new File( o.outputFile );
103                
104                // If we have a video file, read in the video
105                if( options.videoFile != null )
106                {
107                        // Create the video reader for reading the video
108                        this.video = new XuggleVideo( new File( options.videoFile ) );
109                }
110                
111                // Create the output directory if it doesn't exist.
112                if( !this.outputDir.exists() )
113                        this.outputDir.mkdirs();
114                
115                // Process the video
116                this.processVideo();
117    }
118
119        /**
120         *      Process the video to extract faces.
121         */
122        private void processVideo()
123        {
124                if( this.options.verbose )
125                {
126                        System.out.println( this.options.videoFile );
127                        System.out.println( "    - Size: "+video.getWidth()+"x"+video.getHeight() );
128                        System.out.println( "    - Frame Rate: "+video.getFPS() );
129                        System.out.println( "Detecting shots in video..." );
130                }
131                
132                // This is the video shot detector we'll use to find the shots in
133                // the incoming video. These shots will provide hard limits for the
134                // face tracking.
135                HistogramVideoShotDetector vsd = new HistogramVideoShotDetector( this.video );
136                vsd.setThreshold( this.options.threshold );
137                vsd.setFindKeyframes( true );
138                vsd.setStoreAllDifferentials( false );
139                vsd.process();
140                
141                // Retrieve the shots from the shot detector
142                List<ShotBoundary<MBFImage>> shots = vsd.getShotBoundaries();
143                
144                if( this.options.verbose )
145                        System.out.println( "Found "+shots.size()+" shots.");
146                
147                // We'll use the HaarCascadeFaceDetector for detecting faces.
148                HaarCascadeDetector fd = HaarCascadeDetector.BuiltInCascade.frontalface_alt2.load();
149                fd.setMinSize( this.options.faceSize );
150                
151                // Now we'll go through the video looking for faces every x seconds.
152                this.video.reset();
153                
154                // For each shot boundary...
155                ShotBoundary<MBFImage> prev = shots.get(0);
156                for( int i = 1; i < shots.size(); i++ ) 
157                {
158                        ShotBoundary<MBFImage> thiz = shots.get(i);
159                        
160                        // Get the timecodes of the shot. Remember the timecode gives the
161                        // start of the shot, so the shot is between the previous timecode
162                        // and one frame before this timecode.
163                        long pframe = prev.getTimecode().getFrameNumber();
164                        long tframe = thiz.getTimecode().getFrameNumber()-1;
165                        
166                        if( this.options.verbose )
167                                System.out.println( "Shot: "+prev+" ("+pframe+") -> "+thiz+" ("+tframe+")" );
168                        
169                        // This will be the frame we'll store for a given face
170                        MBFImage faceFrame = null;
171                        
172                        // Now loop around looking for faces in the shot
173                        List<DetectedFace> faces = null;
174                        boolean doneSearching = false;
175                        while( !doneSearching )
176                        {
177                                // If we're supposed to use just the centre frame, then we'll work
178                                // out where that centre frame is and grab the frame.
179                                if( this.options.useCentre )
180                                {
181                                        long mframe = pframe + ((tframe - pframe) / 2);
182                                        video.setCurrentFrameIndex(mframe);
183                                        faceFrame = video.getCurrentFrame();
184                                        doneSearching = true;
185                                }
186                                // If we're searching for a face every x-seconds then we'll skip frames
187                                // forward by x-seconds.
188                                else
189                                {
190                                        // Push the video forward by x frames
191                                        pframe += options.seconds * video.getFPS();
192                                        
193                                        // Check if we're still within the shot
194                                        if( pframe >= tframe )
195                                        {
196                                                doneSearching = true;
197                                                pframe = tframe;
198                                        }
199                                        
200                                        // Push the video forward
201                                        video.setCurrentFrameIndex( pframe );
202                                        faceFrame = video.getCurrentFrame();
203                                }
204
205                                if( this.options.verbose )
206                                        System.out.println( "    - Using frame "+
207                                                video.getCurrentTimecode()+" ("+video.getTimeStamp()+")" );
208                                
209                                // Detect faces in the frame
210                                faces = fd.detectFaces( faceFrame.flatten() );
211                                
212                                if( this.options.verbose )
213                                        System.out.println( "        + Found "+faces.size()+" faces in frame.");
214                                
215                                if( faces.size() > 0 )
216                                        doneSearching = true;
217
218                                // For each of the detected faces (if there are any) track
219                                // back and forth in the video to find the times at which the
220                                // face is best. As a consequence, we will also end up with the
221                                // timecodes at which the face appears in the video (at any size)
222                                for( DetectedFace f : faces ) 
223                                {
224                                        if( options.verbose )
225                                                System.out.println( "        - Tracking face..." );
226                                        
227                                        bestFaceTimecode = null;
228                                        bestFaceFrame = null;
229                                        bestFaceBoundingBox = null;
230                                        
231                                        // We'll use this ObjectTimeFinder to track the faces once they're
232                                        // extracted from the video.
233                                        ObjectTimeFinder otf = new ObjectTimeFinder();                                  
234                                        IndependentPair<VideoTimecode, VideoTimecode> timecodes = 
235                                                otf.trackObject( new BasicMBFImageObjectTracker(), video, 
236                                                        video.getCurrentTimecode(), f.getBounds(), 
237                                                        new TimeFinderListener<Rectangle,MBFImage>()
238                                                        {
239                                                                double maxArea = 0;
240                                                        
241                                                                @Override
242                                public void objectTracked(
243                                        List<Rectangle> objects,
244                                        VideoTimecode time,
245                                        MBFImage image )
246                                {
247                                                                        if( objects.size() > 0 && 
248                                                                                objects.get(0).calculateArea() > maxArea )
249                                                                        {
250                                                                                maxArea = objects.get(0).calculateArea();
251                                                                                bestFaceTimecode  = time;
252                                                                                bestFaceFrame = image.clone();
253                                                                                bestFaceBoundingBox = objects.get(0);
254                                                                        }
255                                }
256                                                        } );
257
258                                        if( options.verbose )
259                                                System.out.println( "        - Face tracked between "+timecodes );
260                                        
261                                        if( bestFaceBoundingBox != null ) try
262                    {
263                            saveFace( bestFaceFrame, 
264                                        bestFaceTimecode.getFrameNumber(), timecodes,
265                                        bestFaceBoundingBox );
266                    }
267                    catch( IOException e )
268                    {
269                            e.printStackTrace();
270                    }
271                                }
272                        }
273
274                        prev = thiz;
275                }
276        }
277        
278        /**
279         *      Writes a face image to an appropriate file in the output directory
280         *      named as per the input file but suffixed with the frame number.
281         * 
282         *      @param frame The video frame
283         *  @param mframe The frame number
284         *      @param timecodes The timecodes of the face 
285         *      @param bounds The bounding box of the face 
286         *  @throws IOException If the write cannot be completed
287         */
288        private void saveFace( MBFImage frame, long mframe, 
289                        IndependentPair<VideoTimecode, VideoTimecode> timecodes, 
290                        Rectangle bounds ) throws IOException 
291        {
292                File base = new File( this.options.outputFile );
293                
294                if( options.writeFaceImage )
295                {
296                        File img = new File( base, new File( this.options.videoFile ).getName() 
297                                        + "#" + mframe + ".face.png");
298                        ImageUtilities.write( frame.extractROI( bounds ), img);
299                }
300                
301                if( options.writeFrameImage )
302                {       
303                        File img = new File( base, new File( this.options.videoFile ).getName() 
304                                        + "#" + mframe + ".frame.png");
305                        ImageUtilities.write(frame, img);
306                }
307                
308                if( options.writeXML )
309                {
310                        File xml = new File( base, new File( this.options.videoFile ).getName() 
311                                        + "#" + mframe + ".xml");
312                        
313                        try
314                        {
315                                DocumentBuilderFactory documentBuilderFactory = 
316                                        DocumentBuilderFactory.newInstance();
317                                DocumentBuilder documentBuilder = 
318                                        documentBuilderFactory.newDocumentBuilder();
319                                Document document = documentBuilder.newDocument();
320                                Element rootElement = document.createElement( "face" );
321                                document.appendChild( rootElement );
322                                
323                                Element em = document.createElement( "boundingBox" );
324                                em.appendChild( document.createTextNode( bounds.toString() ) );
325                                rootElement.appendChild(em);
326                                
327                                em = document.createElement( "appearanceTimecode" );
328                                em.appendChild( document.createTextNode( timecodes.firstObject().toString() ) );
329                                rootElement.appendChild(em);
330
331                                em = document.createElement( "disappearanceTimecode" );
332                                em.appendChild( document.createTextNode( timecodes.secondObject().toString() ) );
333                                rootElement.appendChild(em);
334
335                                em = document.createElement( "appearanceFrame" );
336                                em.appendChild( document.createTextNode( ""+timecodes.firstObject().getFrameNumber() ) );
337                                rootElement.appendChild(em);
338
339                                em = document.createElement( "disappearanceFrame" );
340                                em.appendChild( document.createTextNode( ""+timecodes.secondObject().getFrameNumber() ) );
341                                rootElement.appendChild(em);
342
343                                em = document.createElement( "appearanceTime" );
344                                em.appendChild( document.createTextNode( ""+timecodes.firstObject().getTimecodeInMilliseconds() ) );
345                                rootElement.appendChild(em);
346
347                                em = document.createElement( "disappearanceTime" );
348                                em.appendChild( document.createTextNode( ""+timecodes.secondObject().getTimecodeInMilliseconds() ) );
349                                rootElement.appendChild(em);
350
351                                try
352                                {
353                                        TransformerFactory transformerFactory = 
354                                                TransformerFactory.newInstance();
355                                        Transformer transformer = transformerFactory.newTransformer();
356                                        DOMSource source = new DOMSource( document );
357                                        StreamResult result = new StreamResult( new FileWriter( xml ) );
358                                        transformer.transform( source, result );
359                                }
360                                catch( Exception e )
361                                {
362                                        e.printStackTrace();
363                                }
364                        }
365                        catch( DOMException e )
366                        {
367                                e.printStackTrace();
368                        }
369                        catch( ParserConfigurationException e )
370                        {
371                                e.printStackTrace();
372                        }
373                }
374        }
375
376        /**
377         *      Parses the command line arguments.
378         * 
379         *  @param args The arguments to parse
380         *  @return The tool options class
381         */
382        private static FaceExtractorToolOptions parseArgs( String[] args )
383        {
384                FaceExtractorToolOptions fdto = new FaceExtractorToolOptions();
385        CmdLineParser parser = new CmdLineParser( fdto );
386
387        try
388        {
389                parser.parseArgument( args );
390        }
391        catch( CmdLineException e )
392        {
393                System.err.println( e.getMessage());
394                System.err.println( "java FaceExtractorTool [options...]");
395                parser.printUsage( System.err );
396                System.exit(1);
397        }
398        
399        return fdto;
400        }
401        
402        /**
403         *      Default main
404         *  @param args
405         */
406        public static void main( String[] args )
407    {
408                FaceExtractorToolOptions options = parseArgs( args );           
409            new FaceExtractorTool( options );
410    }
411}