001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.processing.face.detection.keypoints; 031 032import java.io.DataInput; 033import java.io.DataOutput; 034import java.io.IOException; 035import java.util.ArrayList; 036import java.util.List; 037 038import org.openimaj.citation.annotation.Reference; 039import org.openimaj.citation.annotation.ReferenceType; 040import org.openimaj.image.FImage; 041import org.openimaj.image.analysis.pyramid.SimplePyramid; 042import org.openimaj.image.colour.RGBColour; 043import org.openimaj.image.processing.face.detection.DetectedFace; 044import org.openimaj.image.processing.face.detection.FaceDetector; 045import org.openimaj.image.processing.face.detection.HaarCascadeDetector; 046import org.openimaj.image.processing.transform.ProjectionProcessor; 047import org.openimaj.io.IOUtils; 048import org.openimaj.math.geometry.shape.Rectangle; 049import org.openimaj.math.geometry.transforms.TransformUtilities; 050import org.openimaj.util.hash.HashCodeUtil; 051 052import Jama.Matrix; 053import Jama.SingularValueDecomposition; 054 055/** 056 * F(rontal)K(eypoint)E(nriched)FaceDetector uses an underlying face detector to 057 * detect frontal faces in an image, and then looks for facial keypoints within 058 * the detections. 059 * <p> 060 * Implementation and data is based on Mark Everingham's <a 061 * href="http://www.robots.ox.ac.uk/~vgg/research/nface/">Oxford VGG Baseline 062 * Face Processing Code</a> 063 * 064 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 065 */ 066@Reference( 067 type = ReferenceType.Inproceedings, 068 author = { "Mark Everingham", "Josef Sivic", "Andrew Zisserman" }, 069 title = "Hello! My name is... Buffy - Automatic naming of characters in TV video", 070 year = "2006", 071 booktitle = "In BMVC") 072public class FKEFaceDetector implements FaceDetector<KEDetectedFace, FImage> { 073 protected FaceDetector<? extends DetectedFace, FImage> faceDetector; 074 protected FacialKeypointExtractor facialKeypointExtractor = new FacialKeypointExtractor(); 075 private float patchScale = 1.0f; 076 077 /** 078 * Default constructor. Uses the standard {@link HaarCascadeDetector} with a 079 * minimum search size of 80 pixels. 080 */ 081 public FKEFaceDetector() { 082 this(new HaarCascadeDetector(80)); 083 } 084 085 /** 086 * Construct with a standard {@link HaarCascadeDetector} and the given 087 * minimum search size. 088 * 089 * @param size 090 * minimum detection size. 091 */ 092 public FKEFaceDetector(int size) { 093 this(new HaarCascadeDetector(size)); 094 } 095 096 /** 097 * Default constructor. Uses the standard {@link HaarCascadeDetector} with a 098 * minimum search size of 80 pixels, and the given scale-factor for 099 * extracting the face patch. 100 * 101 * @param patchScale 102 * the scale of the patch compared to the patch extracted by the 103 * internal detector. 104 */ 105 public FKEFaceDetector(float patchScale) { 106 this(new HaarCascadeDetector(80), patchScale); 107 } 108 109 /** 110 * Construct with a standard {@link HaarCascadeDetector} and the given 111 * minimum search size, and the given scale-factor for extracting the face 112 * patch. 113 * 114 * @param patchScale 115 * the scale of the patch compared to the patch extracted by the 116 * internal detector. 117 * @param size 118 * minimum detection size. 119 */ 120 public FKEFaceDetector(int size, float patchScale) { 121 this(new HaarCascadeDetector(size), patchScale); 122 } 123 124 /** 125 * Construct with the given underlying (frontal) face detector. 126 * 127 * @param detector 128 * the face detector. 129 */ 130 public FKEFaceDetector(FaceDetector<? extends DetectedFace, FImage> detector) { 131 this.faceDetector = detector; 132 } 133 134 /** 135 * Construct with the given underlying (frontal) face detector, and the 136 * given scale-factor for extracting the face patch. 137 * 138 * @param patchScale 139 * the scale of the patch compared to the patch extracted by the 140 * internal detector. 141 * 142 * @param detector 143 * the face detector. 144 */ 145 public FKEFaceDetector(FaceDetector<? extends DetectedFace, FImage> detector, float patchScale) { 146 this.faceDetector = detector; 147 this.patchScale = patchScale; 148 } 149 150 /** 151 * Resize the image using a pyramid. 152 * 153 * @param image 154 * the image 155 * @param transform 156 * the resize transform 157 * @return the resized image 158 */ 159 public static FImage pyramidResize(FImage image, Matrix transform) { 160 // estimate the scale change 161 final SingularValueDecomposition svd = transform.getMatrix(0, 1, 0, 1).svd(); 162 final double sv[] = svd.getSingularValues(); 163 final double scale = ((sv[0] + sv[1]) / 2); 164 165 // calculate the pyramid level 166 final int lev = (int) (Math.max(Math.floor(Math.log(scale) / Math.log(1.5)), 0) + 1); 167 final double pyramidScale = Math.pow(1.5, (lev - 1)); 168 169 // setup the new transformed transform matrix 170 final Matrix scaleMatrix = TransformUtilities.scaleMatrix(1 / pyramidScale, 1 / pyramidScale); 171 final Matrix newTransform = scaleMatrix.times(transform); 172 transform.setMatrix(0, 2, 0, 2, newTransform); 173 174 return image.process(new SimplePyramid<FImage>(1.5f, lev)); 175 } 176 177 /** 178 * Extract a patch from the image based on the parameters. 179 * 180 * @param image 181 * the image 182 * @param transform 183 * the transform 184 * @param size 185 * the patch size 186 * @param border 187 * the size of the border 188 * @return the patch 189 */ 190 public static FImage extractPatch(FImage image, Matrix transform, int size, int border) { 191 final ProjectionProcessor<Float, FImage> pp = new ProjectionProcessor<Float, FImage>(); 192 193 pp.setMatrix(transform.inverse()); 194 image.accumulateWith(pp); 195 196 return pp.performProjection(border, size - border, border, size - border, RGBColour.BLACK[0]); 197 } 198 199 @Override 200 public List<KEDetectedFace> detectFaces(FImage image) { 201 final List<? extends DetectedFace> faces = faceDetector.detectFaces(image); 202 203 final List<KEDetectedFace> descriptors = new ArrayList<KEDetectedFace>(faces.size()); 204 for (final DetectedFace df : faces) { 205 final int canonicalSize = facialKeypointExtractor.getCanonicalImageDimension(); 206 final Rectangle r = df.getBounds(); 207 208 // calculate a scaled version of the image and extract a patch of 209 // canonicalSize 210 final float scale = (r.width / 2) / ((canonicalSize / 2) - facialKeypointExtractor.model.border); 211 float tx = (r.x + (r.width / 2)) - scale * canonicalSize / 2; 212 float ty = (r.y + (r.height / 2)) - scale * canonicalSize / 2; 213 214 final Matrix T0 = new Matrix(new double[][] { { scale, 0, tx }, { 0, scale, ty }, { 0, 0, 1 } }); 215 final Matrix T = (Matrix) T0.clone(); 216 217 final FImage subsampled = pyramidResize(image, T); 218 final FImage smallpatch = extractPatch(subsampled, T, canonicalSize, 0); 219 220 // extract the keypoints 221 final FacialKeypoint[] kpts = facialKeypointExtractor.extractFacialKeypoints(smallpatch); 222 223 // calculate the transform to take the canonical coordinates to the 224 // roi coordinates 225 tx = (r.width / 2) - scale * canonicalSize / 2; 226 ty = (r.height / 2) - scale * canonicalSize / 2; 227 final Matrix T1 = new Matrix(new double[][] { { scale, 0, tx }, { 0, scale, ty }, { 0, 0, 1 } }); 228 FacialKeypoint.updateImagePosition(kpts, T1); 229 230 // recompute the bounding box based on the positions of the facial 231 // keypoints 232 final FacialKeypoint eyeLL = FacialKeypoint.getKeypoint(kpts, 233 FacialKeypoint.FacialKeypointType.EYE_LEFT_LEFT); 234 final FacialKeypoint eyeRR = FacialKeypoint.getKeypoint(kpts, 235 FacialKeypoint.FacialKeypointType.EYE_RIGHT_RIGHT); 236 final FacialKeypoint eyeLR = FacialKeypoint.getKeypoint(kpts, 237 FacialKeypoint.FacialKeypointType.EYE_LEFT_RIGHT); 238 final FacialKeypoint eyeRL = FacialKeypoint.getKeypoint(kpts, 239 FacialKeypoint.FacialKeypointType.EYE_RIGHT_LEFT); 240 241 final float eyeSpace = (0.5f * (eyeRR.position.x + eyeRL.position.x)) 242 - (0.5f * (eyeLR.position.x + eyeLL.position.x)); 243 final float deltaX = (0.5f * (eyeLR.position.x + eyeLL.position.x)) - eyeSpace; 244 r.x = r.x + deltaX; 245 r.width = eyeSpace * 3; 246 247 final float eyeVavg = 0.5f * ((0.5f * (eyeRR.position.y + eyeRL.position.y)) + (0.5f * (eyeLR.position.y + eyeLL.position.y))); 248 249 r.height = 1.28f * r.width; 250 final float deltaY = eyeVavg - 0.4f * r.height; 251 r.y = r.y + deltaY; 252 253 float dx = r.x; 254 float dy = r.y; 255 r.scaleCentroid(patchScale); 256 dx = dx - r.x; 257 dy = dy - r.y; 258 FacialKeypoint.updateImagePosition(kpts, TransformUtilities.translateMatrix(-deltaX + dx, -deltaY + dy)); 259 260 // final KEDetectedFace kedf = new KEDetectedFace(r, 261 // df.getFacePatch(), kpts, df.getConfidence()); 262 // final Rectangle scr = r.clone(); 263 final KEDetectedFace kedf = new KEDetectedFace(r, image.extractROI(r), kpts, df.getConfidence()); 264 descriptors.add(kedf); 265 } 266 267 return descriptors; 268 } 269 270 @Override 271 public int hashCode() { 272 final int hashCode = HashCodeUtil.SEED; 273 HashCodeUtil.hash(hashCode, this.faceDetector); 274 HashCodeUtil.hash(hashCode, this.facialKeypointExtractor); 275 HashCodeUtil.hash(hashCode, this.patchScale); 276 return hashCode; 277 } 278 279 @Override 280 public void readBinary(DataInput in) throws IOException { 281 faceDetector = IOUtils.newInstance(in.readUTF()); 282 faceDetector.readBinary(in); 283 // facialKeypointExtractor; 284 this.patchScale = in.readFloat(); 285 } 286 287 @Override 288 public byte[] binaryHeader() { 289 return "FKED".getBytes(); 290 } 291 292 @Override 293 public void writeBinary(DataOutput out) throws IOException { 294 out.writeUTF(faceDetector.getClass().getName()); 295 faceDetector.writeBinary(out); 296 // facialKeypointExtractor; 297 out.writeFloat(patchScale); 298 } 299 300 @Override 301 public String toString() { 302 return String.format("FKEFaceDetector[innerDetector=%s]", faceDetector); 303 } 304}