1 /** 2 * Copyright (c) 2011, The University of Southampton and the individual contributors. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * * Neither the name of the University of Southampton nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 package org.openimaj.image.feature.dense.gradient.dsift; 31 32 import org.openimaj.image.FImage; 33 import org.openimaj.image.processing.convolution.FTriangleFilter; 34 35 /** 36 * Implementation of an approximate dense SIFT feature extractor. Extracts 37 * approximate upright SIFT features at a single scale on a grid. Implementation 38 * is approximate because instead of using an exact Gaussian weighting, samples 39 * are weighted using a flat windowing function for speed, and then after 40 * accumulation are re-weighted by the average of the Gaussian window over the 41 * spatial support of the sampling region. The end result is that the extracted 42 * features are similar to the exact dense SIFT implementation, but computation 43 * is much faster. 44 * <p> 45 * Implementation directly based on the 46 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat 47 * extractor</a>. 48 * <p> 49 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is 50 * safe to reuse the analyser. In multi-threaded environments, a separate 51 * instance must be made for each thread. Internally, this implementation 52 * allocates memory for the gradient images, and if possible re-uses these 53 * between calls. Re-use requires that the input image is the same size between 54 * calls to the analyser. 55 * 56 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage" 57 * 58 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 59 * 60 */ 61 public class ApproximateDenseSIFT extends DenseSIFT { 62 /** 63 * Construct with the default configuration: standard SIFT geometry (4x4x8), 64 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and 65 * value threshold of 0.2. 66 */ 67 public ApproximateDenseSIFT() { 68 super(); 69 } 70 71 /** 72 * Construct with the given step size (for both x and y) and binSize. All 73 * other values are the defaults. 74 * 75 * @param step 76 * the step size 77 * @param binSize 78 * the spatial bin size 79 */ 80 public ApproximateDenseSIFT(int step, int binSize) { 81 super(step, binSize); 82 } 83 84 /** 85 * Construct with the given configuration. The gaussian window size is set 86 * to 2, and value threshold to 0.2. 87 * 88 * @param stepX 89 * step size in x direction 90 * @param stepY 91 * step size in y direction 92 * @param binWidth 93 * width of spatial bins 94 * @param binHeight 95 * height of spatial bins 96 * @param numBinsX 97 * number of bins in x direction for each descriptor 98 * @param numBinsY 99 * number of bins in y direction for each descriptor 100 * @param numOriBins 101 * number of orientation bins for each descriptor 102 */ 103 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 104 int numOriBins) 105 { 106 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins); 107 } 108 109 /** 110 * Construct with the given configuration. The value threshold is set to 111 * 0.2. 112 * 113 * @param stepX 114 * step size in x direction 115 * @param stepY 116 * step size in y direction 117 * @param binWidth 118 * width of spatial bins 119 * @param binHeight 120 * height of spatial bins 121 * @param numBinsX 122 * number of bins in x direction for each descriptor 123 * @param numBinsY 124 * number of bins in y direction for each descriptor 125 * @param numOriBins 126 * number of orientation bins for each descriptor 127 * @param gaussianWindowSize 128 * the size of the gaussian weighting window 129 */ 130 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 131 int numOriBins, 132 float gaussianWindowSize) 133 { 134 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize); 135 } 136 137 /** 138 * Construct with the given configuration. 139 * 140 * @param stepX 141 * step size in x direction 142 * @param stepY 143 * step size in y direction 144 * @param binWidth 145 * width of spatial bins 146 * @param binHeight 147 * height of spatial bins 148 * @param numBinsX 149 * number of bins in x direction for each descriptor 150 * @param numBinsY 151 * number of bins in y direction for each descriptor 152 * @param numOriBins 153 * number of orientation bins for each descriptor 154 * @param gaussianWindowSize 155 * the size of the gaussian weighting window 156 * @param valueThreshold 157 * the threshold for clipping features 158 */ 159 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 160 int numOriBins, 161 float gaussianWindowSize, float valueThreshold) 162 { 163 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold); 164 } 165 166 private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) { 167 final float delta = binSize * (binIndex - 0.5F * (numBins - 1)); 168 /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */ 169 final float sigma = binSize * (float) windowSize; 170 int x; 171 172 float acc = 0.0f; 173 for (x = -binSize + 1; x <= +binSize - 1; ++x) { 174 final float z = (x - delta) / sigma; 175 acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F); 176 } 177 return acc /= (2 * binSize - 1); 178 } 179 180 @Override 181 protected void extractFeatures() { 182 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 183 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 184 185 for (int bint = 0; bint < numOriBins; bint++) { 186 final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight)); 187 final float[][] src = conv.pixels; 188 189 for (int biny = 0; biny < numBinsY; biny++) { 190 191 // This approximate version of DSIFT does not use a proper 192 // Gaussian weighting scheme for the gradients that are 193 // accumulated on the spatial bins. Instead each spatial bins is 194 // accumulated based on the triangular kernel only, equivalent 195 // to bilinear interpolation plus a flat, rather than Gaussian, 196 // window. Eventually, however, the magnitude of the spatial 197 // bins in the SIFT descriptor is reweighted by the average of 198 // the Gaussian window on each bin. 199 float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize); 200 201 // The triangular convolution functions convolve by a triangular 202 // kernel with unit integral; instead for SIFT the triangular 203 // kernel should have unit height. This is compensated for by 204 // multiplying by the bin size: 205 wy *= binHeight; 206 207 for (int binx = 0; binx < numBinsX; ++binx) { 208 float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize); 209 wx *= binWidth; 210 final float w = wx * wy; 211 212 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins); 213 int descriptorIndex = 0; 214 215 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 216 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) { 217 descriptors[descriptorIndex][descriptorOffset] = w 218 * src[framey + biny * binHeight][framex + binx * binWidth]; 219 descriptorIndex++; 220 } 221 } 222 } 223 } 224 } 225 } 226 227 @Override 228 public ApproximateDenseSIFT clone() { 229 return (ApproximateDenseSIFT) super.clone(); 230 } 231 }