001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.dense.gradient;
031
032import org.openimaj.citation.annotation.Reference;
033import org.openimaj.citation.annotation.ReferenceType;
034import org.openimaj.feature.DoubleFV;
035import org.openimaj.feature.FeatureVectorProvider;
036import org.openimaj.image.FImage;
037import org.openimaj.image.analyser.ImageAnalyser;
038import org.openimaj.image.analysis.algorithm.histogram.BinnedWindowedExtractor;
039import org.openimaj.image.analysis.algorithm.histogram.GradientOrientationHistogramExtractor;
040import org.openimaj.image.analysis.algorithm.histogram.InterpolatedBinnedWindowedExtractor;
041import org.openimaj.image.analysis.algorithm.histogram.binning.QuadtreeStrategy;
042import org.openimaj.image.pixel.sampling.QuadtreeSampler;
043import org.openimaj.image.processing.convolution.FImageGradients;
044import org.openimaj.image.processing.convolution.FImageGradients.Mode;
045import org.openimaj.image.processing.edges.CannyEdgeDetector;
046import org.openimaj.image.processor.ImageProcessor;
047import org.openimaj.math.geometry.shape.Rectangle;
048import org.openimaj.math.statistics.distribution.Histogram;
049
050/**
051 * This class is an implementation of an extractor for the PHOG (Pyramid
052 * Histograms of Orientation Gradients) feature described by Bosch et al. The
053 * PHOG feature is computed by creating a quadtree of orientation histograms
054 * over the entire image and appending the histograms for each cell of the
055 * quadtree into a single vector which is then l1 normalised (sum to unity).
056 * <p>
057 * In the original description, only orientations at edge pixels were counted;
058 * that restriction is optional in this implementation. If only edge pixels are
059 * used, then the feature describes the distribution of <b>shape</b> in the
060 * image. Conversely, if all pixels are used, the feature essentially describes
061 * the texture of the image.
062 * <p>
063 * As this class will typically be used to only construct a single feature from
064 * an image, it is built around a {@link BinnedWindowedExtractor} (or
065 * {@link InterpolatedBinnedWindowedExtractor} if interpolation is used). This
066 * will be much more efficient than a
067 * {@link GradientOrientationHistogramExtractor} in the single window case. If
068 * you do need to extract many PHOG-like features different rectangles of the
069 * same image, use a {@link GradientOrientationHistogramExtractor} coupled with
070 * a {@link QuadtreeStrategy} to achieve the desired effect.
071 *
072 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
073 *
074 */
075@Reference(
076                type = ReferenceType.Inproceedings,
077                author = { "Bosch, Anna", "Zisserman, Andrew", "Munoz, Xavier" },
078                title = "Representing shape with a spatial pyramid kernel",
079                year = "2007",
080                booktitle = "Proceedings of the 6th ACM international conference on Image and video retrieval",
081                pages = { "401", "", "408" },
082                url = "http://doi.acm.org/10.1145/1282280.1282340",
083                publisher = "ACM",
084                series = "CIVR '07",
085                customData = {
086                                "isbn", "978-1-59593-733-9",
087                                "location", "Amsterdam, The Netherlands",
088                                "numpages", "8",
089                                "doi", "10.1145/1282280.1282340",
090                                "acmid", "1282340",
091                                "address", "New York, NY, USA",
092                                "keywords", "object and video retrieval, shape features, spatial pyramid kernel"
093                })
094public class PHOG implements ImageAnalyser<FImage>, FeatureVectorProvider<DoubleFV> {
095        private int nlevels = 3;
096        private ImageProcessor<FImage> edgeDetector;
097        private Mode orientationMode;
098
099        private BinnedWindowedExtractor histExtractor;
100        private Rectangle lastBounds;
101        private FImage magnitudes;
102
103        /**
104         * Construct with the values used in the paper: 4 levels (corresponds to l=3
105         * in the paper), 40 orientation bins (interpolated), signed gradients
106         * (called "shape360" in the original paper) and Canny edge detection.
107         */
108        public PHOG() {
109                this(4, 40, FImageGradients.Mode.Signed);
110        }
111
112        /**
113         * Construct with the given values, using Canny edge detection and gradient
114         * histogram interpolation.
115         *
116         * @param nlevels
117         *            number of pyramid levels (note this includes l0, so you might
118         *            need 1 more)
119         * @param nbins
120         *            number of bins
121         * @param orientationMode
122         *            the orientation mode
123         */
124        public PHOG(int nlevels, int nbins, FImageGradients.Mode orientationMode)
125        {
126                this(nlevels, nbins, true, orientationMode, new CannyEdgeDetector());
127        }
128
129        /**
130         * Construct with the given parameters. The <code>edgeDetector</code>
131         * parameter can be <code>null</code> if you don't want to filter out
132         * non-edge pixels from the histograms.
133         *
134         * @param nlevels
135         *            number of pyramid levels (note this includes l0, so you might
136         *            need 1 more)
137         * @param nbins
138         *            number of bins
139         * @param histogramInterpolation
140         *            should the gradient orientations be interpolated?
141         * @param orientationMode
142         *            the orientation mode
143         * @param edgeDetector
144         *            the edge detector to use (may be <code>null</code> for
145         *            gradient features)
146         */
147        public PHOG(int nlevels, int nbins, boolean histogramInterpolation, FImageGradients.Mode orientationMode,
148                        ImageProcessor<FImage> edgeDetector)
149        {
150                this.nlevels = nlevels;
151                this.edgeDetector = edgeDetector;
152                this.orientationMode = orientationMode;
153
154                if (histogramInterpolation)
155                        histExtractor = new InterpolatedBinnedWindowedExtractor(nbins, true);
156                else
157                        histExtractor = new BinnedWindowedExtractor(nbins);
158
159                histExtractor.setMax(orientationMode.maxAngle());
160                histExtractor.setMin(orientationMode.minAngle());
161        }
162
163        @Override
164        public void analyseImage(FImage image) {
165                lastBounds = image.getBounds();
166
167                final FImageGradients gradMag = FImageGradients.getGradientMagnitudesAndOrientations(image, orientationMode);
168                this.magnitudes = gradMag.magnitudes;
169
170                histExtractor.analyseImage(gradMag.orientations);
171
172                if (edgeDetector != null) {
173                        magnitudes.multiplyInplace(image.process(edgeDetector));
174                }
175        }
176
177        /**
178         * Extract the PHOG feature for the specified region of the image last
179         * analysed with {@link #analyseImage(FImage)}.
180         *
181         * @param rect
182         *            the region
183         * @return the PHOG feature
184         */
185        public Histogram getFeatureVector(Rectangle rect) {
186                final QuadtreeSampler sampler = new QuadtreeSampler(rect, nlevels + 1);
187                Histogram hist = new Histogram(0);
188
189                for (final Rectangle r : sampler) {
190                        final Histogram h = histExtractor.computeHistogram(r, magnitudes);
191                        hist = hist.combine(h);
192                }
193
194                hist.normaliseL1();
195
196                return hist;
197        }
198
199        /**
200         * Extract the PHOG feature for the whole of the image last analysed with
201         * {@link #analyseImage(FImage)}.
202         *
203         * @return the PHOG feature
204         *
205         * @see org.openimaj.feature.FeatureVectorProvider#getFeatureVector()
206         */
207        @Override
208        public Histogram getFeatureVector() {
209                return getFeatureVector(lastBounds);
210        }
211}