001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.processing.face.detection.keypoints;
031
032import java.io.DataInputStream;
033import java.io.IOException;
034import java.io.InputStream;
035import java.io.Serializable;
036
037import org.openimaj.citation.annotation.Reference;
038import org.openimaj.citation.annotation.ReferenceType;
039import org.openimaj.image.FImage;
040import org.openimaj.image.analysis.algorithm.EuclideanDistanceTransform;
041import org.openimaj.image.analysis.algorithm.SummedAreaTable;
042import org.openimaj.image.pixel.FValuePixel;
043import org.openimaj.image.processing.face.detection.keypoints.FacialKeypoint.FacialKeypointType;
044import org.openimaj.util.hash.HashCodeUtil;
045
046/**
047 * A class capable of finding likely facial keypoints using
048 * a masked Haar cascade for each keypoint, and then picking 
049 * the best combination of points based on a model.
050 * <p>
051 * Implementation and data is based on Mark Everingham's 
052 * <a href="http://www.robots.ox.ac.uk/~vgg/research/nface/">Oxford VGG 
053 * Baseline Face Processing Code</a>
054 * 
055 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
056 */
057@Reference(
058                type = ReferenceType.Inproceedings,
059                author = { "Mark Everingham", "Josef Sivic", "Andrew Zisserman" },
060                title = "Hello! My name is... Buffy - Automatic naming of characters in TV video",
061                year = "2006",
062                booktitle = "In BMVC"
063        )
064public class FacialKeypointExtractor {
065        protected Model model;
066        
067        /**
068         * Construct a new facial keypoint extractor.
069         */
070        public FacialKeypointExtractor() {
071                model = Model.DEFAULT_MODEL;
072        }
073        
074        /**
075         * Get the size of the face image that the keypoint extractor
076         * can work with.
077         * @return the size (both height and width)
078         */
079        public int getCanonicalImageDimension() {
080                return model.imgsize;
081        }
082        
083        /**
084         * Extract the facial keypoints from a canonical image.
085         * The image must contain a face and have the dimensions
086         * specified by {@link #getCanonicalImageDimension()}.
087         * 
088         * @param canonicalImage the canonical image containing a detected face.
089         * @return A list of facial keypoints for the image.
090         */
091        public FacialKeypoint[] extractFacialKeypoints(FImage canonicalImage) {
092                SummedAreaTable sat = new SummedAreaTable(canonicalImage);
093
094                //run the haar cascade detector for each facial keypoint type
095                FImage [] AC = new FImage[9];
096                for (int i=0; i<9; i++) {
097                        FImage map = MaskedHaarCascade.maskedHaarCascade(sat, model.winsize, model.winsize, model.part[i].HCas, model.part[i].talpha, model.part[i].M);                 
098                        AC[i] = map.multiplyInplace(-(float)model.appwt);
099                }
100                
101                //and then fit the model to find the best keypoints
102                return findParts(AC);
103        }
104        
105        protected FacialKeypoint[] findParts(FImage [] AC) {
106                float maxconf=Float.NEGATIVE_INFINITY;
107                float conf = 0;
108                FacialKeypoint [] Pbest = null;
109                
110                for (int t=0; t < model.tree.length; t++) {
111                        FImage [] B = new FImage[model.part.length];
112                        int [][][] argB = new int[model.part.length][model.imgsize][model.imgsize];
113                        
114                        FacialKeypoint [] P = new FacialKeypoint[model.part.length];
115                        for (int i=0; i<model.part.length; i++) P[i] = new FacialKeypoint(FacialKeypointType.valueOf(i)); 
116
117                        for (int ci=model.tree[t].depthorder.length-1; ci>=0; ci--) {
118                                int c = model.tree[t].depthorder[ci];
119
120                                int [] off = model.tree[t].MU[c];
121                                int [] bb = new int[4];
122                                if (off[0]>=0) {
123                                        bb[0]=0;
124                                        bb[1]=model.imgsize-off[0]-1;
125                                } else {
126                                        bb[0]=-off[0];
127                                        bb[1]=model.imgsize-1;
128                                }
129                                if (off[1]>=0) {
130                                        bb[2]=0;
131                                        bb[3]=model.imgsize-off[1]-1;
132                                } else {
133                                        bb[2]=-off[1];
134                                        bb[3]=model.imgsize-1;
135                                }
136
137                                FImage C = new FImage(model.imgsize, model.imgsize);
138                                C.fill(Float.POSITIVE_INFINITY);
139
140                                for (int rr=bb[2]; rr<bb[3]; rr++) {
141                                        for (int cc=bb[0]; cc<bb[1]; cc++) {
142                                                int tc = cc + off[0];
143                                                int tr = rr + off[1];
144                                                
145                                                C.pixels[rr][cc] = AC[c].pixels[tr][tc];
146                                                
147                                                for (int g : model.tree[t].children[c]) {
148                                                        C.pixels[rr][cc] += B[g].pixels[tr][tc];
149                                                }
150                                        }
151                                }
152                                
153                                if (model.tree[t].parent[c] != -1) {
154                                        C.divideInplace((float)model.tree[t].scale[c]);
155                                        
156                                        FImage D = new FImage(C.width, C.height);
157                                        int [][] L = new int[C.height][C.width];
158                                        EuclideanDistanceTransform.squaredEuclideanDistance(C, D, L);
159
160                                        B[c] = D.multiplyInplace((float)model.tree[t].scale[c]);
161
162                                        for (int rr=0; rr<L.length; rr++)
163                                                for (int cc=0; cc<L[0].length; cc++)
164                                                        argB[c][rr][cc] = L[rr][cc] + off[1] * model.imgsize + off[0];
165                                } else {
166                                        FValuePixel min = C.minPixel();
167                                        P[c].position.x = min.x;
168                                        P[c].position.y = min.y;
169
170                                        conf=-min.value;
171                                }
172                        }
173
174                        for (int ci=1; ci<model.tree[t].depthorder.length; ci++) {
175                                int c = model.tree[t].depthorder[ci];
176
177                                int p = model.tree[t].parent[c];
178                                int mini = argB[c][(int) P[p].position.y][(int) P[p].position.x];
179
180                                P[c].position.y = mini / model.imgsize;
181                                P[c].position.x = mini - P[c].position.y * model.imgsize;
182                        }
183
184                        conf = (float) (conf + Math.log(model.tree[t].mix));
185
186                        if (conf>maxconf) {
187                                maxconf=conf;
188                                Pbest=P;
189                        }
190                }
191                
192                return Pbest;
193        }
194        
195        /**
196         * A model of the positions of the parts of a face.
197         * 
198         * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
199         *
200         */
201        static class Model implements Serializable {
202                private static final long serialVersionUID = 1L;
203
204                public static final Model DEFAULT_MODEL = loadDefaultModel();
205                
206                Tree [] tree;
207                Part [] part;
208                int imgsize;
209                int border;
210                int winsize;
211                double appwt;
212
213                static class Tree implements Serializable {
214                        private static final long serialVersionUID = 1L;
215
216                        double [][] E;
217                        double [] var;
218                        int [][] MU;
219                        double mix;
220                        double [] scale;
221                        int [] parent;
222                        int [][] children;
223                        int [] depthorder;
224                }
225
226                static class Part implements Serializable {
227                        private static final long serialVersionUID = 1L;
228
229                        double [][] talpha;
230                        int [][] HCas;  //haar cascade
231                        int [] bb;              //bounding box
232                        boolean [][] M; //mask
233                        FacialKeypointType type;
234                }
235                
236                private static Model loadDefaultModel() {
237                        try {
238                                //TODO: could build a serialised version instead and load that
239                                return buildDefaultModel();
240                        } catch (IOException e) {
241                                throw new RuntimeException(e);
242                        }
243                }
244                
245                private static Model buildDefaultModel() throws IOException {
246                        Model model = new Model();
247                        model.appwt = 2;
248                        model.winsize = 15;
249                        model.border = 15;
250                        model.imgsize = 80;
251
252                        model.part = new Part[9];
253                        for (int i=1; i<=9; i++) {
254                                model.part[i-1] = new Part();
255                                model.part[i-1].HCas = readIntDataBin(Model.class.getResourceAsStream("haar"+i+".bin"));
256                                model.part[i-1].talpha = readDoubleDataBin(Model.class.getResourceAsStream("talpha"+i+".bin"));
257                                model.part[i-1].M = readBooleanDataBin(Model.class.getResourceAsStream("mask"+i+".bin"));
258                                model.part[i-1].type = FacialKeypointType.valueOf(i-1);
259                        }
260
261                        model.tree = new Tree[3];
262
263                        //TREE 1
264                        model.tree[0] = new Tree();
265                        model.tree[0].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {2, 5}, {5, 6}, {6, 7}, {5, 8}, {7, 9}};
266                        model.tree[0].var = new double[]  {1.16, 1.03, 1.19, 2.49, 0.88, 0.92, 1.71, 1.81};
267                        model.tree[0].MU = new int[][] {{0, 0}, {9, 0}, {10,0}, {9, 0}, {1, 12}, {5, 0}, {5, 0}, {-3, 7}, {3, 7}};
268                        model.tree[0].mix = 0.33;
269                        model.tree[0].scale = new double[] {0, 0.431203093871548, 0.484492467521240, 0.420546069849079, 0.200844044022372, 0.571191103033717, 0.546043956131382, 0.292395762807960, 0.275798316570445};
270                        model.tree[0].parent = new int[] {-1, 0, 1, 2, 1, 4, 5, 4, 6}; //sub 1 from matlab
271                        model.tree[0].children = new int[][] {{1}, {2, 4}, {3}, {}, {5, 7}, {6}, {8}, {}, {}}; //sub 1 from matlab
272                        model.tree[0].depthorder = new int[] {0, 1, 2, 4, 3, 5, 7, 6, 8}; //sub 1 from matlab
273
274                        //TREE 2
275                        model.tree[1] = new Tree();
276                        model.tree[1].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {3, 7}, {7, 6}, {6, 5}, {5, 8}, {7, 9}};
277                        model.tree[1].var = new double[]  {1.97, 1.85, 1.89, 3.33, 1.89, 2.06, 3.60, 4.00};             
278                        model.tree[1].MU = new int[][] {{0, 0}, {9, -1}, {10, -1}, {9, -1}, {-5, -2}, {-6, 3}, {2, 11}, {-2, 8}, {4, 7}};
279                        model.tree[1].mix = 0.36;
280                        model.tree[1].scale = new double[] {0, 0.254045277312314, 0.270079620048070, 0.264406629463902, 0.243204625087956, 0.263998194617104, 0.150044711363897, 0.138952250832562, 0.124994774540550};
281                        model.tree[1].parent = new int[] {-1, 0, 1, 2, 5, 6, 2, 4, 6}; //sub 1 from matlab
282                        model.tree[1].children = new int[][] {{1}, {2}, {3, 6}, {}, {7}, {4}, {5, 8}, {}, {}}; //sub 1 from matlab
283                        model.tree[1].depthorder = new int[] {0, 1, 2, 3, 6, 5, 8, 4, 7}; //sub 1 from matlab
284
285                        //TREE 3
286                        model.tree[2] = new Tree();
287                        model.tree[2].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {2, 5}, {5, 6}, {6, 7}, {7, 9}, {5, 8}};
288                        model.tree[2].var = new double[]  {2.15, 1.95, 2.25, 3.77, 1.99, 2.49, 3.88, 4.23};
289                        model.tree[2].MU = new int[][] {{0, 0}, {9, 1}, {10, 1}, {9, 1}, {-1, 11}, {5, 3}, {6, -1}, {-4, 7}, {1, 8}};
290                        model.tree[2].mix = 0.31;
291                        model.tree[2].scale = new double[] {0, 0.232309551084211, 0.256560908278279, 0.221946733311257, 0.132713563105743, 0.251186367002818, 0.200459108303263, 0.118167093811055, 0.128771977977574};
292                        model.tree[2].parent = new int[] {-1, 0, 1, 2, 1, 4, 5, 4, 6}; //sub 1 from matlab
293                        model.tree[2].children = new int[][] {{1}, {2, 4}, {3}, {}, {5, 7}, {6}, {8}, {}, {}}; //sub 1 from matlab
294                        model.tree[2].depthorder = new int[] {0, 1, 2, 4, 3, 5, 7, 6, 8}; //sub 1 from matlab
295
296                        return model;
297                }
298
299                private static double [][] readDoubleDataBin(InputStream is) throws IOException {
300                        DataInputStream input = new DataInputStream(is);
301
302                        int rows = input.readInt();
303                        int cols = input.readInt();
304
305                        double [][] data = new double[rows][cols];
306
307                        for (int r=0; r<rows; r++) {
308                                for (int c=0; c<cols; c++) {
309                                        data[r][c] = input.readDouble();
310                                }
311                        }
312
313                        input.close();
314
315                        return data;
316                }
317
318                private static int [][] readIntDataBin(InputStream is) throws IOException {
319                        DataInputStream input = new DataInputStream(is);
320
321                        int rows = input.readInt();
322                        int cols = input.readInt();
323
324                        int [][] data = new int[rows][cols];
325
326                        for (int r=0; r<rows; r++) {
327                                for (int c=0; c<cols; c++) {
328                                        data[r][c] = input.readInt();
329                                }
330                        }
331
332                        input.close();
333
334                        return data;
335                }
336
337                private static boolean [][] readBooleanDataBin(InputStream is) throws IOException {
338                        DataInputStream input = new DataInputStream(is);
339
340                        int rows = input.readInt();
341                        int cols = input.readInt();
342
343                        boolean [][] data = new boolean[rows][cols];
344
345                        for (int r=0; r<rows; r++) {
346                                for (int c=0; c<cols; c++) {
347                                        data[r][c] = input.readInt() == 1;
348                                }
349                        }
350
351                        input.close();
352
353                        return data;
354                }
355        }
356        
357        @Override
358        public int hashCode() {
359                int hashCode = HashCodeUtil.SEED;
360                //HashCodeUtil.hash(hashCode, this.model); //TODO: for now model is constant 
361                return hashCode;
362        }
363}
364