001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.dense.gradient.dsift;
031
032import org.openimaj.feature.local.list.LocalFeatureList;
033import org.openimaj.feature.local.list.MemoryLocalFeatureList;
034import org.openimaj.image.FImage;
035import org.openimaj.image.processing.convolution.FImageConvolveSeparable;
036import org.openimaj.image.processing.convolution.FImageGradients;
037import org.openimaj.math.geometry.shape.Rectangle;
038import org.openimaj.util.array.ArrayUtils;
039
040/**
041 * Implementation of a dense SIFT feature extractor for {@link FImage}s.
042 * Extracts upright SIFT features at a single scale on a grid.
043 * <p>
044 * Implementation inspired by the
045 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat
046 * extractor</a>.
047 * <p>
048 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
049 * safe to reuse the analyser. In multi-threaded environments, a separate
050 * instance must be made for each thread. Internally, this implementation
051 * allocates memory for the gradient images, and if possible re-uses these
052 * between calls. Re-use requires that the input image is the same size between
053 * calls to the analyser.
054 *
055 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
056 *
057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
058 *
059 */
060public class DenseSIFT extends AbstractDenseSIFT<FImage> {
061        static class WorkingData {
062                /**
063                 * minimum X bound for sampling the descriptors (inclusive)
064                 */
065                protected int boundMinX;
066
067                /**
068                 * maximum X bound for sampling the descriptors (inclusive)
069                 */
070                protected int boundMaxX;
071
072                /**
073                 * minimum Y bound for sampling the descriptors (inclusive)
074                 */
075                protected int boundMinY;
076
077                /**
078                 * maximum X bound for sampling the descriptors (inclusive)
079                 */
080                protected int boundMaxY;
081
082                /**
083                 * Quantised orientation/gradient magnitude data. Each element
084                 * corresponds to an angular bin of the orientations, and the individual
085                 * images correspond to the gradient magnitudes for the respective
086                 * orientations.
087                 */
088                protected FImage[] gradientMagnitudes;
089
090                /**
091                 * Setup the required space for holding gradient maps and descriptors
092                 *
093                 * @param image
094                 *            the image being analysed
095                 */
096                protected void setupWorkingSpace(FImage image, DenseSIFT dsift) {
097                        if (gradientMagnitudes == null) {
098                                gradientMagnitudes = new FImage[dsift.numOriBins];
099                        }
100
101                        if (gradientMagnitudes[0] == null || gradientMagnitudes[0].width != image.width
102                                        || gradientMagnitudes[0].height != image.height)
103                        {
104                                for (int i = 0; i < dsift.numOriBins; i++)
105                                        gradientMagnitudes[i] = new FImage(image.width, image.height);
106                        }
107
108                        final int rangeX = boundMaxX - boundMinX - (dsift.numBinsX - 1) * dsift.binWidth;
109                        final int rangeY = boundMaxY - boundMinY - (dsift.numBinsY - 1) * dsift.binHeight;
110
111                        final int numWindowsX = (rangeX >= 0) ? rangeX / dsift.stepX + 1 : 0;
112                        final int numWindowsY = (rangeY >= 0) ? rangeY / dsift.stepY + 1 : 0;
113
114                        final int numFeatures = numWindowsX * numWindowsY;
115
116                        dsift.descriptors = new float[numFeatures][dsift.numOriBins * dsift.numBinsX * dsift.numBinsY];
117                        dsift.energies = new float[numFeatures];
118                }
119        }
120
121        /**
122         * Step size of sampling window in x-direction (in pixels)
123         */
124        protected int stepX = 5;
125
126        /**
127         * Step size of sampling window in y-direction (in pixels)
128         */
129        protected int stepY = 5;
130
131        /**
132         * Width of a single bin of the sampling window (in pixels). Sampling window
133         * width is this multiplied by #numBinX.
134         */
135        protected int binWidth = 5;
136
137        /**
138         * Height of a single bin of the sampling window (in pixels). Sampling
139         * window height is this multiplied by #numBinY.
140         */
141        protected int binHeight = 5;
142
143        /**
144         * Number of spatial bins in the X direction
145         */
146        protected int numBinsX = 4;
147
148        /**
149         * Number of spatial bins in the Y direction
150         */
151        protected int numBinsY = 4;
152
153        /** The number of orientation bins */
154        protected int numOriBins = 8;
155
156        /**
157         * Size of the Gaussian window (in relative to of the size of a bin)
158         */
159        protected float gaussianWindowSize = 2f;
160
161        /**
162         * Threshold for clipping the SIFT features
163         */
164        protected float valueThreshold = 0.2f;
165
166        protected volatile WorkingData data = new WorkingData();
167
168        /**
169         * Extracted descriptors
170         */
171        protected volatile float[][] descriptors;
172
173        /**
174         * Descriptor energies
175         */
176        protected volatile float[] energies;
177
178        /**
179         * Construct with the default configuration: standard SIFT geometry (4x4x8),
180         * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
181         * value threshold of 0.2.
182         */
183        public DenseSIFT() {
184        }
185
186        /**
187         * Construct with the given step size (for both x and y) and binSize. All
188         * other values are the defaults.
189         *
190         * @param step
191         *            the step size
192         * @param binSize
193         *            the spatial bin size
194         */
195        public DenseSIFT(int step, int binSize) {
196                this.binWidth = binSize;
197                this.binHeight = binSize;
198                this.stepX = step;
199                this.stepY = step;
200        }
201
202        /**
203         * Construct with the given configuration. The gaussian window size is set
204         * to 2, and value threshold to 0.2.
205         *
206         * @param stepX
207         *            step size in x direction
208         * @param stepY
209         *            step size in y direction
210         * @param binWidth
211         *            width of spatial bins
212         * @param binHeight
213         *            height of spatial bins
214         * @param numBinsX
215         *            number of bins in x direction for each descriptor
216         * @param numBinsY
217         *            number of bins in y direction for each descriptor
218         * @param numOriBins
219         *            number of orientation bins for each descriptor
220         */
221        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins) {
222                this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, 2f);
223        }
224
225        /**
226         * Construct with the given configuration. The value threshold is set to
227         * 0.2.
228         *
229         * @param stepX
230         *            step size in x direction
231         * @param stepY
232         *            step size in y direction
233         * @param binWidth
234         *            width of spatial bins
235         * @param binHeight
236         *            height of spatial bins
237         * @param numBinsX
238         *            number of bins in x direction for each descriptor
239         * @param numBinsY
240         *            number of bins in y direction for each descriptor
241         * @param numOriBins
242         *            number of orientation bins for each descriptor
243         * @param gaussianWindowSize
244         *            the size of the gaussian weighting window
245         */
246        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
247                        float gaussianWindowSize)
248        {
249                this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, 0.2f);
250        }
251
252        /**
253         * Construct with the given configuration.
254         *
255         * @param stepX
256         *            step size in x direction
257         * @param stepY
258         *            step size in y direction
259         * @param binWidth
260         *            width of spatial bins
261         * @param binHeight
262         *            height of spatial bins
263         * @param numBinsX
264         *            number of bins in x direction for each descriptor
265         * @param numBinsY
266         *            number of bins in y direction for each descriptor
267         * @param numOriBins
268         *            number of orientation bins for each descriptor
269         * @param gaussianWindowSize
270         *            the size of the gaussian weighting window
271         * @param valueThreshold
272         *            the threshold for clipping features
273         */
274        public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
275                        float gaussianWindowSize, float valueThreshold)
276        {
277                this.binWidth = binWidth;
278                this.binHeight = binHeight;
279                this.stepX = stepX;
280                this.stepY = stepY;
281                this.numBinsX = numBinsX;
282                this.numBinsY = numBinsY;
283                this.numOriBins = numOriBins;
284                this.gaussianWindowSize = gaussianWindowSize;
285                this.valueThreshold = valueThreshold;
286        }
287
288        private float[] buildKernel(int binSize, int numBins, int binIndex, float windowSize) {
289                final int kernelSize = 2 * binSize - 1;
290                final float[] kernel = new float[kernelSize];
291                final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
292
293                final float sigma = binSize * windowSize;
294
295                for (int x = -binSize + 1, i = 0; x <= +binSize - 1; x++, i++) {
296                        final float z = (x - delta) / sigma;
297
298                        kernel[i] = (1.0F - Math.abs(x) / binSize) * ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
299                }
300
301                return kernel;
302        }
303
304        /**
305         * Extract the DSIFT features
306         */
307        protected void extractFeatures() {
308                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
309                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
310
311                for (int biny = 0; biny < numBinsY; biny++) {
312                        final float[] yker = buildKernel(binHeight, numBinsY, biny, gaussianWindowSize);
313
314                        for (int binx = 0; binx < numBinsX; binx++) {
315                                final float[] xker = buildKernel(binWidth, numBinsX, binx, gaussianWindowSize);
316
317                                for (int bint = 0; bint < numOriBins; bint++) {
318                                        final FImage conv = data.gradientMagnitudes[bint].process(new FImageConvolveSeparable(xker, yker));
319                                        final float[][] src = conv.pixels;
320
321                                        final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
322                                        int descriptorIndex = 0;
323
324                                        for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
325                                                for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
326                                                        descriptors[descriptorIndex][descriptorOffset] = src[framey + biny * binHeight][framex
327                                                                        + binx * binWidth];
328                                                        descriptorIndex++;
329                                                }
330                                        }
331                                }
332                        }
333                }
334        }
335
336        @Override
337        public void analyseImage(FImage image, Rectangle bounds) {
338                if (data == null)
339                        data = new WorkingData();
340
341                data.boundMinX = (int) bounds.x;
342                data.boundMaxX = (int) (bounds.width - 1);
343                data.boundMinY = (int) bounds.y;
344                data.boundMaxY = (int) (bounds.height - 1);
345
346                data.setupWorkingSpace(image, this);
347
348                FImageGradients.gradientMagnitudesAndQuantisedOrientations(image, data.gradientMagnitudes);
349
350                extractFeatures();
351
352                normaliseDescriptors();
353        }
354
355        private void normaliseDescriptors() {
356                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
357                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
358                final float energyNorm = frameSizeX * frameSizeY;
359
360                for (int j = 0; j < descriptors.length; j++) {
361                        final float[] arr = descriptors[j];
362
363                        energies[j] = ArrayUtils.sumValues(arr) / energyNorm;
364
365                        ArrayUtils.normalise(arr);
366
367                        boolean changed = false;
368                        for (int i = 0; i < arr.length; i++) {
369                                if (arr[i] > valueThreshold) {
370                                        arr[i] = valueThreshold;
371                                        changed = true;
372                                }
373                        }
374
375                        if (changed)
376                                ArrayUtils.normalise(arr);
377                }
378        }
379
380        @Override
381        public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints() {
382                final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
383                                * numBinsX * numBinsY, descriptors.length);
384
385                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
386                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
387
388                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
389                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
390
391                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
392                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
393                                keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
394                                                energies[i]));
395                        }
396                }
397
398                return keys;
399        }
400
401        @Override
402        public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints() {
403                final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
404                                * numBinsX * numBinsY, descriptors.length);
405
406                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
407                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
408
409                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
410                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
411
412                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
413                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
414                                keys.add(
415                                                new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i]));
416                        }
417                }
418
419                return keys;
420        }
421
422        @Override
423        public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints(float energyThreshold) {
424                final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
425                                * numBinsX * numBinsY);
426
427                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
428                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
429
430                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
431                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
432
433                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
434                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
435                                if (energies[i] >= energyThreshold)
436                                        keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
437                                                        energies[i]));
438                        }
439                }
440
441                return keys;
442        }
443
444        @Override
445        public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints(float energyThreshold) {
446                final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
447                                * numBinsX * numBinsY);
448
449                final int frameSizeX = binWidth * (numBinsX - 1) + 1;
450                final int frameSizeY = binHeight * (numBinsY - 1) + 1;
451
452                final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
453                final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
454
455                for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
456                        for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
457                                if (energies[i] >= energyThreshold)
458                                        keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
459                                                        energies[i]));
460                        }
461                }
462
463                return keys;
464        }
465
466        /**
467         * Get the computed raw dense SIFT descriptors from the previous call to
468         * {@link #analyseImage(FImage)} or {@link #analyseImage(FImage, Rectangle)}
469         * .
470         *
471         * @return the descriptors.
472         */
473        @Override
474        public float[][] getDescriptors() {
475                return descriptors;
476        }
477
478        @Override
479        public DenseSIFT clone() {
480                final DenseSIFT clone = (DenseSIFT) super.clone();
481
482                clone.descriptors = null;
483                clone.energies = null;
484                clone.data = null;
485
486                return clone;
487        }
488
489        @Override
490        public void setBinWidth(int size) {
491                this.binWidth = size;
492        }
493
494        @Override
495        public void setBinHeight(int size) {
496                this.binHeight = size;
497        }
498
499        @Override
500        public int getBinWidth() {
501                return binWidth;
502        }
503
504        @Override
505        public int getBinHeight() {
506                return binHeight;
507        }
508
509        @Override
510        public int getNumBinsX() {
511                return numBinsX;
512        }
513
514        @Override
515        public int getNumBinsY() {
516                return numBinsY;
517        }
518
519        @Override
520        public int getNumOriBins() {
521                return numOriBins;
522        }
523}