001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.processing.face.detection.keypoints; 031 032import java.io.DataInputStream; 033import java.io.IOException; 034import java.io.InputStream; 035import java.io.Serializable; 036 037import org.openimaj.citation.annotation.Reference; 038import org.openimaj.citation.annotation.ReferenceType; 039import org.openimaj.image.FImage; 040import org.openimaj.image.analysis.algorithm.EuclideanDistanceTransform; 041import org.openimaj.image.analysis.algorithm.SummedAreaTable; 042import org.openimaj.image.pixel.FValuePixel; 043import org.openimaj.image.processing.face.detection.keypoints.FacialKeypoint.FacialKeypointType; 044import org.openimaj.util.hash.HashCodeUtil; 045 046/** 047 * A class capable of finding likely facial keypoints using 048 * a masked Haar cascade for each keypoint, and then picking 049 * the best combination of points based on a model. 050 * <p> 051 * Implementation and data is based on Mark Everingham's 052 * <a href="http://www.robots.ox.ac.uk/~vgg/research/nface/">Oxford VGG 053 * Baseline Face Processing Code</a> 054 * 055 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 056 */ 057@Reference( 058 type = ReferenceType.Inproceedings, 059 author = { "Mark Everingham", "Josef Sivic", "Andrew Zisserman" }, 060 title = "Hello! My name is... Buffy - Automatic naming of characters in TV video", 061 year = "2006", 062 booktitle = "In BMVC" 063 ) 064public class FacialKeypointExtractor { 065 protected Model model; 066 067 /** 068 * Construct a new facial keypoint extractor. 069 */ 070 public FacialKeypointExtractor() { 071 model = Model.DEFAULT_MODEL; 072 } 073 074 /** 075 * Get the size of the face image that the keypoint extractor 076 * can work with. 077 * @return the size (both height and width) 078 */ 079 public int getCanonicalImageDimension() { 080 return model.imgsize; 081 } 082 083 /** 084 * Extract the facial keypoints from a canonical image. 085 * The image must contain a face and have the dimensions 086 * specified by {@link #getCanonicalImageDimension()}. 087 * 088 * @param canonicalImage the canonical image containing a detected face. 089 * @return A list of facial keypoints for the image. 090 */ 091 public FacialKeypoint[] extractFacialKeypoints(FImage canonicalImage) { 092 SummedAreaTable sat = new SummedAreaTable(canonicalImage); 093 094 //run the haar cascade detector for each facial keypoint type 095 FImage [] AC = new FImage[9]; 096 for (int i=0; i<9; i++) { 097 FImage map = MaskedHaarCascade.maskedHaarCascade(sat, model.winsize, model.winsize, model.part[i].HCas, model.part[i].talpha, model.part[i].M); 098 AC[i] = map.multiplyInplace(-(float)model.appwt); 099 } 100 101 //and then fit the model to find the best keypoints 102 return findParts(AC); 103 } 104 105 protected FacialKeypoint[] findParts(FImage [] AC) { 106 float maxconf=Float.NEGATIVE_INFINITY; 107 float conf = 0; 108 FacialKeypoint [] Pbest = null; 109 110 for (int t=0; t < model.tree.length; t++) { 111 FImage [] B = new FImage[model.part.length]; 112 int [][][] argB = new int[model.part.length][model.imgsize][model.imgsize]; 113 114 FacialKeypoint [] P = new FacialKeypoint[model.part.length]; 115 for (int i=0; i<model.part.length; i++) P[i] = new FacialKeypoint(FacialKeypointType.valueOf(i)); 116 117 for (int ci=model.tree[t].depthorder.length-1; ci>=0; ci--) { 118 int c = model.tree[t].depthorder[ci]; 119 120 int [] off = model.tree[t].MU[c]; 121 int [] bb = new int[4]; 122 if (off[0]>=0) { 123 bb[0]=0; 124 bb[1]=model.imgsize-off[0]-1; 125 } else { 126 bb[0]=-off[0]; 127 bb[1]=model.imgsize-1; 128 } 129 if (off[1]>=0) { 130 bb[2]=0; 131 bb[3]=model.imgsize-off[1]-1; 132 } else { 133 bb[2]=-off[1]; 134 bb[3]=model.imgsize-1; 135 } 136 137 FImage C = new FImage(model.imgsize, model.imgsize); 138 C.fill(Float.POSITIVE_INFINITY); 139 140 for (int rr=bb[2]; rr<bb[3]; rr++) { 141 for (int cc=bb[0]; cc<bb[1]; cc++) { 142 int tc = cc + off[0]; 143 int tr = rr + off[1]; 144 145 C.pixels[rr][cc] = AC[c].pixels[tr][tc]; 146 147 for (int g : model.tree[t].children[c]) { 148 C.pixels[rr][cc] += B[g].pixels[tr][tc]; 149 } 150 } 151 } 152 153 if (model.tree[t].parent[c] != -1) { 154 C.divideInplace((float)model.tree[t].scale[c]); 155 156 FImage D = new FImage(C.width, C.height); 157 int [][] L = new int[C.height][C.width]; 158 EuclideanDistanceTransform.squaredEuclideanDistance(C, D, L); 159 160 B[c] = D.multiplyInplace((float)model.tree[t].scale[c]); 161 162 for (int rr=0; rr<L.length; rr++) 163 for (int cc=0; cc<L[0].length; cc++) 164 argB[c][rr][cc] = L[rr][cc] + off[1] * model.imgsize + off[0]; 165 } else { 166 FValuePixel min = C.minPixel(); 167 P[c].position.x = min.x; 168 P[c].position.y = min.y; 169 170 conf=-min.value; 171 } 172 } 173 174 for (int ci=1; ci<model.tree[t].depthorder.length; ci++) { 175 int c = model.tree[t].depthorder[ci]; 176 177 int p = model.tree[t].parent[c]; 178 int mini = argB[c][(int) P[p].position.y][(int) P[p].position.x]; 179 180 P[c].position.y = mini / model.imgsize; 181 P[c].position.x = mini - P[c].position.y * model.imgsize; 182 } 183 184 conf = (float) (conf + Math.log(model.tree[t].mix)); 185 186 if (conf>maxconf) { 187 maxconf=conf; 188 Pbest=P; 189 } 190 } 191 192 return Pbest; 193 } 194 195 /** 196 * A model of the positions of the parts of a face. 197 * 198 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 199 * 200 */ 201 static class Model implements Serializable { 202 private static final long serialVersionUID = 1L; 203 204 public static final Model DEFAULT_MODEL = loadDefaultModel(); 205 206 Tree [] tree; 207 Part [] part; 208 int imgsize; 209 int border; 210 int winsize; 211 double appwt; 212 213 static class Tree implements Serializable { 214 private static final long serialVersionUID = 1L; 215 216 double [][] E; 217 double [] var; 218 int [][] MU; 219 double mix; 220 double [] scale; 221 int [] parent; 222 int [][] children; 223 int [] depthorder; 224 } 225 226 static class Part implements Serializable { 227 private static final long serialVersionUID = 1L; 228 229 double [][] talpha; 230 int [][] HCas; //haar cascade 231 int [] bb; //bounding box 232 boolean [][] M; //mask 233 FacialKeypointType type; 234 } 235 236 private static Model loadDefaultModel() { 237 try { 238 //TODO: could build a serialised version instead and load that 239 return buildDefaultModel(); 240 } catch (IOException e) { 241 throw new RuntimeException(e); 242 } 243 } 244 245 private static Model buildDefaultModel() throws IOException { 246 Model model = new Model(); 247 model.appwt = 2; 248 model.winsize = 15; 249 model.border = 15; 250 model.imgsize = 80; 251 252 model.part = new Part[9]; 253 for (int i=1; i<=9; i++) { 254 model.part[i-1] = new Part(); 255 model.part[i-1].HCas = readIntDataBin(Model.class.getResourceAsStream("haar"+i+".bin")); 256 model.part[i-1].talpha = readDoubleDataBin(Model.class.getResourceAsStream("talpha"+i+".bin")); 257 model.part[i-1].M = readBooleanDataBin(Model.class.getResourceAsStream("mask"+i+".bin")); 258 model.part[i-1].type = FacialKeypointType.valueOf(i-1); 259 } 260 261 model.tree = new Tree[3]; 262 263 //TREE 1 264 model.tree[0] = new Tree(); 265 model.tree[0].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {2, 5}, {5, 6}, {6, 7}, {5, 8}, {7, 9}}; 266 model.tree[0].var = new double[] {1.16, 1.03, 1.19, 2.49, 0.88, 0.92, 1.71, 1.81}; 267 model.tree[0].MU = new int[][] {{0, 0}, {9, 0}, {10,0}, {9, 0}, {1, 12}, {5, 0}, {5, 0}, {-3, 7}, {3, 7}}; 268 model.tree[0].mix = 0.33; 269 model.tree[0].scale = new double[] {0, 0.431203093871548, 0.484492467521240, 0.420546069849079, 0.200844044022372, 0.571191103033717, 0.546043956131382, 0.292395762807960, 0.275798316570445}; 270 model.tree[0].parent = new int[] {-1, 0, 1, 2, 1, 4, 5, 4, 6}; //sub 1 from matlab 271 model.tree[0].children = new int[][] {{1}, {2, 4}, {3}, {}, {5, 7}, {6}, {8}, {}, {}}; //sub 1 from matlab 272 model.tree[0].depthorder = new int[] {0, 1, 2, 4, 3, 5, 7, 6, 8}; //sub 1 from matlab 273 274 //TREE 2 275 model.tree[1] = new Tree(); 276 model.tree[1].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {3, 7}, {7, 6}, {6, 5}, {5, 8}, {7, 9}}; 277 model.tree[1].var = new double[] {1.97, 1.85, 1.89, 3.33, 1.89, 2.06, 3.60, 4.00}; 278 model.tree[1].MU = new int[][] {{0, 0}, {9, -1}, {10, -1}, {9, -1}, {-5, -2}, {-6, 3}, {2, 11}, {-2, 8}, {4, 7}}; 279 model.tree[1].mix = 0.36; 280 model.tree[1].scale = new double[] {0, 0.254045277312314, 0.270079620048070, 0.264406629463902, 0.243204625087956, 0.263998194617104, 0.150044711363897, 0.138952250832562, 0.124994774540550}; 281 model.tree[1].parent = new int[] {-1, 0, 1, 2, 5, 6, 2, 4, 6}; //sub 1 from matlab 282 model.tree[1].children = new int[][] {{1}, {2}, {3, 6}, {}, {7}, {4}, {5, 8}, {}, {}}; //sub 1 from matlab 283 model.tree[1].depthorder = new int[] {0, 1, 2, 3, 6, 5, 8, 4, 7}; //sub 1 from matlab 284 285 //TREE 3 286 model.tree[2] = new Tree(); 287 model.tree[2].E = new double[][] {{1, 2}, {2, 3}, {3, 4}, {2, 5}, {5, 6}, {6, 7}, {7, 9}, {5, 8}}; 288 model.tree[2].var = new double[] {2.15, 1.95, 2.25, 3.77, 1.99, 2.49, 3.88, 4.23}; 289 model.tree[2].MU = new int[][] {{0, 0}, {9, 1}, {10, 1}, {9, 1}, {-1, 11}, {5, 3}, {6, -1}, {-4, 7}, {1, 8}}; 290 model.tree[2].mix = 0.31; 291 model.tree[2].scale = new double[] {0, 0.232309551084211, 0.256560908278279, 0.221946733311257, 0.132713563105743, 0.251186367002818, 0.200459108303263, 0.118167093811055, 0.128771977977574}; 292 model.tree[2].parent = new int[] {-1, 0, 1, 2, 1, 4, 5, 4, 6}; //sub 1 from matlab 293 model.tree[2].children = new int[][] {{1}, {2, 4}, {3}, {}, {5, 7}, {6}, {8}, {}, {}}; //sub 1 from matlab 294 model.tree[2].depthorder = new int[] {0, 1, 2, 4, 3, 5, 7, 6, 8}; //sub 1 from matlab 295 296 return model; 297 } 298 299 private static double [][] readDoubleDataBin(InputStream is) throws IOException { 300 DataInputStream input = new DataInputStream(is); 301 302 int rows = input.readInt(); 303 int cols = input.readInt(); 304 305 double [][] data = new double[rows][cols]; 306 307 for (int r=0; r<rows; r++) { 308 for (int c=0; c<cols; c++) { 309 data[r][c] = input.readDouble(); 310 } 311 } 312 313 input.close(); 314 315 return data; 316 } 317 318 private static int [][] readIntDataBin(InputStream is) throws IOException { 319 DataInputStream input = new DataInputStream(is); 320 321 int rows = input.readInt(); 322 int cols = input.readInt(); 323 324 int [][] data = new int[rows][cols]; 325 326 for (int r=0; r<rows; r++) { 327 for (int c=0; c<cols; c++) { 328 data[r][c] = input.readInt(); 329 } 330 } 331 332 input.close(); 333 334 return data; 335 } 336 337 private static boolean [][] readBooleanDataBin(InputStream is) throws IOException { 338 DataInputStream input = new DataInputStream(is); 339 340 int rows = input.readInt(); 341 int cols = input.readInt(); 342 343 boolean [][] data = new boolean[rows][cols]; 344 345 for (int r=0; r<rows; r++) { 346 for (int c=0; c<cols; c++) { 347 data[r][c] = input.readInt() == 1; 348 } 349 } 350 351 input.close(); 352 353 return data; 354 } 355 } 356 357 @Override 358 public int hashCode() { 359 int hashCode = HashCodeUtil.SEED; 360 //HashCodeUtil.hash(hashCode, this.model); //TODO: for now model is constant 361 return hashCode; 362 } 363} 364