001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.dense.gradient.dsift;
031
032import org.openimaj.image.FImage;
033import org.openimaj.image.processing.convolution.FTriangleFilter;
034
035/**
036 * Implementation of an approximate dense SIFT feature extractor. Extracts
037 * approximate upright SIFT features at a single scale on a grid. Implementation
038 * is approximate because instead of using an exact Gaussian weighting, samples
039 * are weighted using a flat windowing function for speed, and then after
040 * accumulation are re-weighted by the average of the Gaussian window over the
041 * spatial support of the sampling region. The end result is that the extracted
042 * features are similar to the exact dense SIFT implementation, but computation
043 * is much faster.
044 * <p>
045 * Implementation directly based on the
046 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat
047 * extractor</a>.
048 * <p>
049 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
050 * safe to reuse the analyser. In multi-threaded environments, a separate
051 * instance must be made for each thread. Internally, this implementation
052 * allocates memory for the gradient images, and if possible re-uses these
053 * between calls. Re-use requires that the input image is the same size between
054 * calls to the analyser.
055 *
056 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
057 *
058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
059 *
060 */
061public class ApproximateDenseSIFT extends DenseSIFT {
062        /**
063         * Construct with the default configuration: standard SIFT geometry (4x4x8),
064         * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
065         * value threshold of 0.2.
066         */
067        public ApproximateDenseSIFT() {
068                super();
069        }
070
071        /**
072         * Construct with the given step size (for both x and y) and binSize. All
073         * other values are the defaults.
074         *
075         * @param step
076         *            the step size
077         * @param binSize
078         *            the spatial bin size
079         */
080        public ApproximateDenseSIFT(int step, int binSize) {
081                super(step, binSize);
082        }
083
084        /**
085         * Construct with the given configuration. The gaussian window size is set
086         * to 2, and value threshold to 0.2.
087         *
088         * @param stepX
089         *            step size in x direction
090         * @param stepY
091         *            step size in y direction
092         * @param binWidth
093         *            width of spatial bins
094         * @param binHeight
095         *            height of spatial bins
096         * @param numBinsX
097         *            number of bins in x direction for each descriptor
098         * @param numBinsY
099         *            number of bins in y direction for each descriptor
100         * @param numOriBins
101         *            number of orientation bins for each descriptor
102         */
103        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
104                        int numOriBins)
105        {
106                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins);
107        }
108
109        /**
110         * Construct with the given configuration. The value threshold is set to
111         * 0.2.
112         *
113         * @param stepX
114         *            step size in x direction
115         * @param stepY
116         *            step size in y direction
117         * @param binWidth
118         *            width of spatial bins
119         * @param binHeight
120         *            height of spatial bins
121         * @param numBinsX
122         *            number of bins in x direction for each descriptor
123         * @param numBinsY
124         *            number of bins in y direction for each descriptor
125         * @param numOriBins
126         *            number of orientation bins for each descriptor
127         * @param gaussianWindowSize
128         *            the size of the gaussian weighting window
129         */
130        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
131                        int numOriBins,
132                        float gaussianWindowSize)
133        {
134                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize);
135        }
136
137        /**
138         * Construct with the given configuration.
139         *
140         * @param stepX
141         *            step size in x direction
142         * @param stepY
143         *            step size in y direction
144         * @param binWidth
145         *            width of spatial bins
146         * @param binHeight
147         *            height of spatial bins
148         * @param numBinsX
149         *            number of bins in x direction for each descriptor
150         * @param numBinsY
151         *            number of bins in y direction for each descriptor
152         * @param numOriBins
153         *            number of orientation bins for each descriptor
154         * @param gaussianWindowSize
155         *            the size of the gaussian weighting window
156         * @param valueThreshold
157         *            the threshold for clipping features
158         */
159        public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
160                        int numOriBins,
161                        float gaussianWindowSize, float valueThreshold)
162        {
163                super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold);
164        }
165
166        private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) {
167                final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
168                /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */
169                final float sigma = binSize * (float) windowSize;
170                int x;
171
172                float acc = 0.0f;
173                for (x = -binSize + 1; x <= +binSize - 1; ++x) {
174                        final float z = (x - delta) / sigma;
175                        acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
176                }
177                return acc /= (2 * binSize - 1);
178        }
179
180        @Override
181        protected void extractFeatures() {
182                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
183                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
184
185                for (int bint = 0; bint < numOriBins; bint++) {
186                        final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight));
187                        final float[][] src = conv.pixels;
188
189                        for (int biny = 0; biny < numBinsY; biny++) {
190
191                                // This approximate version of DSIFT does not use a proper
192                                // Gaussian weighting scheme for the gradients that are
193                                // accumulated on the spatial bins. Instead each spatial bins is
194                                // accumulated based on the triangular kernel only, equivalent
195                                // to bilinear interpolation plus a flat, rather than Gaussian,
196                                // window. Eventually, however, the magnitude of the spatial
197                                // bins in the SIFT descriptor is reweighted by the average of
198                                // the Gaussian window on each bin.
199                                float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize);
200
201                                // The triangular convolution functions convolve by a triangular
202                                // kernel with unit integral; instead for SIFT the triangular
203                                // kernel should have unit height. This is compensated for by
204                                // multiplying by the bin size:
205                                wy *= binHeight;
206
207                                for (int binx = 0; binx < numBinsX; ++binx) {
208                                        float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize);
209                                        wx *= binWidth;
210                                        final float w = wx * wy;
211
212                                        final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
213                                        int descriptorIndex = 0;
214
215                                        for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
216                                                for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
217                                                        descriptors[descriptorIndex][descriptorOffset] = w
218                                                                        * src[framey + biny * binHeight][framex + binx * binWidth];
219                                                        descriptorIndex++;
220                                                }
221                                        }
222                                }
223                        }
224                }
225        }
226
227        @Override
228        public ApproximateDenseSIFT clone() {
229                return (ApproximateDenseSIFT) super.clone();
230        }
231}