Source code

001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.processing.face.detection.keypoints;
031
032import java.io.DataInput;
033import java.io.DataOutput;
034import java.io.IOException;
035import java.util.ArrayList;
036import java.util.List;
037
038import org.openimaj.citation.annotation.Reference;
039import org.openimaj.citation.annotation.ReferenceType;
040import org.openimaj.image.FImage;
041import org.openimaj.image.analysis.pyramid.SimplePyramid;
042import org.openimaj.image.colour.RGBColour;
043import org.openimaj.image.processing.face.detection.DetectedFace;
044import org.openimaj.image.processing.face.detection.FaceDetector;
045import org.openimaj.image.processing.face.detection.HaarCascadeDetector;
046import org.openimaj.image.processing.transform.ProjectionProcessor;
047import org.openimaj.io.IOUtils;
048import org.openimaj.math.geometry.shape.Rectangle;
049import org.openimaj.math.geometry.transforms.TransformUtilities;
050import org.openimaj.util.hash.HashCodeUtil;
051
052import Jama.Matrix;
053import Jama.SingularValueDecomposition;
054
055/**
056 * F(rontal)K(eypoint)E(nriched)FaceDetector uses an underlying face detector to
057 * detect frontal faces in an image, and then looks for facial keypoints within
058 * the detections.
059 * <p>
060 * Implementation and data is based on Mark Everingham's <a
061 * href="http://www.robots.ox.ac.uk/~vgg/research/nface/">Oxford VGG Baseline
062 * Face Processing Code</a>
063 * 
064 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
065 */
066@Reference(
067                type = ReferenceType.Inproceedings,
068                author = { "Mark Everingham", "Josef Sivic", "Andrew Zisserman" },
069                title = "Hello! My name is... Buffy - Automatic naming of characters in TV video",
070                year = "2006",
071                booktitle = "In BMVC")
072public class FKEFaceDetector implements FaceDetector<KEDetectedFace, FImage> {
073        protected FaceDetector<? extends DetectedFace, FImage> faceDetector;
074        protected FacialKeypointExtractor facialKeypointExtractor = new FacialKeypointExtractor();
075        private float patchScale = 1.0f;
076
077        /**
078         * Default constructor. Uses the standard {@link HaarCascadeDetector} with a
079         * minimum search size of 80 pixels.
080         */
081        public FKEFaceDetector() {
082                this(new HaarCascadeDetector(80));
083        }
084
085        /**
086         * Construct with a standard {@link HaarCascadeDetector} and the given
087         * minimum search size.
088         * 
089         * @param size
090         *            minimum detection size.
091         */
092        public FKEFaceDetector(int size) {
093                this(new HaarCascadeDetector(size));
094        }
095
096        /**
097         * Default constructor. Uses the standard {@link HaarCascadeDetector} with a
098         * minimum search size of 80 pixels, and the given scale-factor for
099         * extracting the face patch.
100         * 
101         * @param patchScale
102         *            the scale of the patch compared to the patch extracted by the
103         *            internal detector.
104         */
105        public FKEFaceDetector(float patchScale) {
106                this(new HaarCascadeDetector(80), patchScale);
107        }
108
109        /**
110         * Construct with a standard {@link HaarCascadeDetector} and the given
111         * minimum search size, and the given scale-factor for extracting the face
112         * patch.
113         * 
114         * @param patchScale
115         *            the scale of the patch compared to the patch extracted by the
116         *            internal detector.
117         * @param size
118         *            minimum detection size.
119         */
120        public FKEFaceDetector(int size, float patchScale) {
121                this(new HaarCascadeDetector(size), patchScale);
122        }
123
124        /**
125         * Construct with the given underlying (frontal) face detector.
126         * 
127         * @param detector
128         *            the face detector.
129         */
130        public FKEFaceDetector(FaceDetector<? extends DetectedFace, FImage> detector) {
131                this.faceDetector = detector;
132        }
133
134        /**
135         * Construct with the given underlying (frontal) face detector, and the
136         * given scale-factor for extracting the face patch.
137         * 
138         * @param patchScale
139         *            the scale of the patch compared to the patch extracted by the
140         *            internal detector.
141         * 
142         * @param detector
143         *            the face detector.
144         */
145        public FKEFaceDetector(FaceDetector<? extends DetectedFace, FImage> detector, float patchScale) {
146                this.faceDetector = detector;
147                this.patchScale = patchScale;
148        }
149
150        /**
151         * Resize the image using a pyramid.
152         * 
153         * @param image
154         *            the image
155         * @param transform
156         *            the resize transform
157         * @return the resized image
158         */
159        public static FImage pyramidResize(FImage image, Matrix transform) {
160                // estimate the scale change
161                final SingularValueDecomposition svd = transform.getMatrix(0, 1, 0, 1).svd();
162                final double sv[] = svd.getSingularValues();
163                final double scale = ((sv[0] + sv[1]) / 2);
164
165                // calculate the pyramid level
166                final int lev = (int) (Math.max(Math.floor(Math.log(scale) / Math.log(1.5)), 0) + 1);
167                final double pyramidScale = Math.pow(1.5, (lev - 1));
168
169                // setup the new transformed transform matrix
170                final Matrix scaleMatrix = TransformUtilities.scaleMatrix(1 / pyramidScale, 1 / pyramidScale);
171                final Matrix newTransform = scaleMatrix.times(transform);
172                transform.setMatrix(0, 2, 0, 2, newTransform);
173
174                return image.process(new SimplePyramid<FImage>(1.5f, lev));
175        }
176
177        /**
178         * Extract a patch from the image based on the parameters.
179         * 
180         * @param image
181         *            the image
182         * @param transform
183         *            the transform
184         * @param size
185         *            the patch size
186         * @param border
187         *            the size of the border
188         * @return the patch
189         */
190        public static FImage extractPatch(FImage image, Matrix transform, int size, int border) {
191                final ProjectionProcessor<Float, FImage> pp = new ProjectionProcessor<Float, FImage>();
192
193                pp.setMatrix(transform.inverse());
194                image.accumulateWith(pp);
195
196                return pp.performProjection(border, size - border, border, size - border, RGBColour.BLACK[0]);
197        }
198
199        @Override
200        public List<KEDetectedFace> detectFaces(FImage image) {
201                final List<? extends DetectedFace> faces = faceDetector.detectFaces(image);
202
203                final List<KEDetectedFace> descriptors = new ArrayList<KEDetectedFace>(faces.size());
204                for (final DetectedFace df : faces) {
205                        final int canonicalSize = facialKeypointExtractor.getCanonicalImageDimension();
206                        final Rectangle r = df.getBounds();
207
208                        // calculate a scaled version of the image and extract a patch of
209                        // canonicalSize
210                        final float scale = (r.width / 2) / ((canonicalSize / 2) - facialKeypointExtractor.model.border);
211                        float tx = (r.x + (r.width / 2)) - scale * canonicalSize / 2;
212                        float ty = (r.y + (r.height / 2)) - scale * canonicalSize / 2;
213
214                        final Matrix T0 = new Matrix(new double[][] { { scale, 0, tx }, { 0, scale, ty }, { 0, 0, 1 } });
215                        final Matrix T = (Matrix) T0.clone();
216
217                        final FImage subsampled = pyramidResize(image, T);
218                        final FImage smallpatch = extractPatch(subsampled, T, canonicalSize, 0);
219
220                        // extract the keypoints
221                        final FacialKeypoint[] kpts = facialKeypointExtractor.extractFacialKeypoints(smallpatch);
222
223                        // calculate the transform to take the canonical coordinates to the
224                        // roi coordinates
225                        tx = (r.width / 2) - scale * canonicalSize / 2;
226                        ty = (r.height / 2) - scale * canonicalSize / 2;
227                        final Matrix T1 = new Matrix(new double[][] { { scale, 0, tx }, { 0, scale, ty }, { 0, 0, 1 } });
228                        FacialKeypoint.updateImagePosition(kpts, T1);
229
230                        // recompute the bounding box based on the positions of the facial
231                        // keypoints
232                        final FacialKeypoint eyeLL = FacialKeypoint.getKeypoint(kpts,
233                                        FacialKeypoint.FacialKeypointType.EYE_LEFT_LEFT);
234                        final FacialKeypoint eyeRR = FacialKeypoint.getKeypoint(kpts,
235                                        FacialKeypoint.FacialKeypointType.EYE_RIGHT_RIGHT);
236                        final FacialKeypoint eyeLR = FacialKeypoint.getKeypoint(kpts,
237                                        FacialKeypoint.FacialKeypointType.EYE_LEFT_RIGHT);
238                        final FacialKeypoint eyeRL = FacialKeypoint.getKeypoint(kpts,
239                                        FacialKeypoint.FacialKeypointType.EYE_RIGHT_LEFT);
240
241                        final float eyeSpace = (0.5f * (eyeRR.position.x + eyeRL.position.x))
242                                        - (0.5f * (eyeLR.position.x + eyeLL.position.x));
243                        final float deltaX = (0.5f * (eyeLR.position.x + eyeLL.position.x)) - eyeSpace;
244                        r.x = r.x + deltaX;
245                        r.width = eyeSpace * 3;
246
247                        final float eyeVavg = 0.5f * ((0.5f * (eyeRR.position.y + eyeRL.position.y)) + (0.5f * (eyeLR.position.y + eyeLL.position.y)));
248
249                        r.height = 1.28f * r.width;
250                        final float deltaY = eyeVavg - 0.4f * r.height;
251                        r.y = r.y + deltaY;
252
253                        float dx = r.x;
254                        float dy = r.y;
255                        r.scaleCentroid(patchScale);
256                        dx = dx - r.x;
257                        dy = dy - r.y;
258                        FacialKeypoint.updateImagePosition(kpts, TransformUtilities.translateMatrix(-deltaX + dx, -deltaY + dy));
259
260                        // final KEDetectedFace kedf = new KEDetectedFace(r,
261                        // df.getFacePatch(), kpts, df.getConfidence());
262                        // final Rectangle scr = r.clone();
263                        final KEDetectedFace kedf = new KEDetectedFace(r, image.extractROI(r), kpts, df.getConfidence());
264                        descriptors.add(kedf);
265                }
266
267                return descriptors;
268        }
269
270        @Override
271        public int hashCode() {
272                final int hashCode = HashCodeUtil.SEED;
273                HashCodeUtil.hash(hashCode, this.faceDetector);
274                HashCodeUtil.hash(hashCode, this.facialKeypointExtractor);
275                HashCodeUtil.hash(hashCode, this.patchScale);
276                return hashCode;
277        }
278
279        @Override
280        public void readBinary(DataInput in) throws IOException {
281                faceDetector = IOUtils.newInstance(in.readUTF());
282                faceDetector.readBinary(in);
283                // facialKeypointExtractor;
284                this.patchScale = in.readFloat();
285        }
286
287        @Override
288        public byte[] binaryHeader() {
289                return "FKED".getBytes();
290        }
291
292        @Override
293        public void writeBinary(DataOutput out) throws IOException {
294                out.writeUTF(faceDetector.getClass().getName());
295                faceDetector.writeBinary(out);
296                // facialKeypointExtractor;
297                out.writeFloat(patchScale);
298        }
299
300        @Override
301        public String toString() {
302                return String.format("FKEFaceDetector[innerDetector=%s]", faceDetector);
303        }
304}