001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.dense.gradient.dsift; 031 032import org.openimaj.image.FImage; 033import org.openimaj.image.processing.convolution.FTriangleFilter; 034 035/** 036 * Implementation of an approximate dense SIFT feature extractor. Extracts 037 * approximate upright SIFT features at a single scale on a grid. Implementation 038 * is approximate because instead of using an exact Gaussian weighting, samples 039 * are weighted using a flat windowing function for speed, and then after 040 * accumulation are re-weighted by the average of the Gaussian window over the 041 * spatial support of the sampling region. The end result is that the extracted 042 * features are similar to the exact dense SIFT implementation, but computation 043 * is much faster. 044 * <p> 045 * Implementation directly based on the 046 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat 047 * extractor</a>. 048 * <p> 049 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is 050 * safe to reuse the analyser. In multi-threaded environments, a separate 051 * instance must be made for each thread. Internally, this implementation 052 * allocates memory for the gradient images, and if possible re-uses these 053 * between calls. Re-use requires that the input image is the same size between 054 * calls to the analyser. 055 * 056 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage" 057 * 058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 059 * 060 */ 061public class ApproximateDenseSIFT extends DenseSIFT { 062 /** 063 * Construct with the default configuration: standard SIFT geometry (4x4x8), 064 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and 065 * value threshold of 0.2. 066 */ 067 public ApproximateDenseSIFT() { 068 super(); 069 } 070 071 /** 072 * Construct with the given step size (for both x and y) and binSize. All 073 * other values are the defaults. 074 * 075 * @param step 076 * the step size 077 * @param binSize 078 * the spatial bin size 079 */ 080 public ApproximateDenseSIFT(int step, int binSize) { 081 super(step, binSize); 082 } 083 084 /** 085 * Construct with the given configuration. The gaussian window size is set 086 * to 2, and value threshold to 0.2. 087 * 088 * @param stepX 089 * step size in x direction 090 * @param stepY 091 * step size in y direction 092 * @param binWidth 093 * width of spatial bins 094 * @param binHeight 095 * height of spatial bins 096 * @param numBinsX 097 * number of bins in x direction for each descriptor 098 * @param numBinsY 099 * number of bins in y direction for each descriptor 100 * @param numOriBins 101 * number of orientation bins for each descriptor 102 */ 103 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 104 int numOriBins) 105 { 106 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins); 107 } 108 109 /** 110 * Construct with the given configuration. The value threshold is set to 111 * 0.2. 112 * 113 * @param stepX 114 * step size in x direction 115 * @param stepY 116 * step size in y direction 117 * @param binWidth 118 * width of spatial bins 119 * @param binHeight 120 * height of spatial bins 121 * @param numBinsX 122 * number of bins in x direction for each descriptor 123 * @param numBinsY 124 * number of bins in y direction for each descriptor 125 * @param numOriBins 126 * number of orientation bins for each descriptor 127 * @param gaussianWindowSize 128 * the size of the gaussian weighting window 129 */ 130 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 131 int numOriBins, 132 float gaussianWindowSize) 133 { 134 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize); 135 } 136 137 /** 138 * Construct with the given configuration. 139 * 140 * @param stepX 141 * step size in x direction 142 * @param stepY 143 * step size in y direction 144 * @param binWidth 145 * width of spatial bins 146 * @param binHeight 147 * height of spatial bins 148 * @param numBinsX 149 * number of bins in x direction for each descriptor 150 * @param numBinsY 151 * number of bins in y direction for each descriptor 152 * @param numOriBins 153 * number of orientation bins for each descriptor 154 * @param gaussianWindowSize 155 * the size of the gaussian weighting window 156 * @param valueThreshold 157 * the threshold for clipping features 158 */ 159 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, 160 int numOriBins, 161 float gaussianWindowSize, float valueThreshold) 162 { 163 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold); 164 } 165 166 private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) { 167 final float delta = binSize * (binIndex - 0.5F * (numBins - 1)); 168 /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */ 169 final float sigma = binSize * (float) windowSize; 170 int x; 171 172 float acc = 0.0f; 173 for (x = -binSize + 1; x <= +binSize - 1; ++x) { 174 final float z = (x - delta) / sigma; 175 acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F); 176 } 177 return acc /= (2 * binSize - 1); 178 } 179 180 @Override 181 protected void extractFeatures() { 182 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 183 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 184 185 for (int bint = 0; bint < numOriBins; bint++) { 186 final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight)); 187 final float[][] src = conv.pixels; 188 189 for (int biny = 0; biny < numBinsY; biny++) { 190 191 // This approximate version of DSIFT does not use a proper 192 // Gaussian weighting scheme for the gradients that are 193 // accumulated on the spatial bins. Instead each spatial bins is 194 // accumulated based on the triangular kernel only, equivalent 195 // to bilinear interpolation plus a flat, rather than Gaussian, 196 // window. Eventually, however, the magnitude of the spatial 197 // bins in the SIFT descriptor is reweighted by the average of 198 // the Gaussian window on each bin. 199 float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize); 200 201 // The triangular convolution functions convolve by a triangular 202 // kernel with unit integral; instead for SIFT the triangular 203 // kernel should have unit height. This is compensated for by 204 // multiplying by the bin size: 205 wy *= binHeight; 206 207 for (int binx = 0; binx < numBinsX; ++binx) { 208 float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize); 209 wx *= binWidth; 210 final float w = wx * wy; 211 212 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins); 213 int descriptorIndex = 0; 214 215 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 216 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) { 217 descriptors[descriptorIndex][descriptorOffset] = w 218 * src[framey + biny * binHeight][framex + binx * binWidth]; 219 descriptorIndex++; 220 } 221 } 222 } 223 } 224 } 225 } 226 227 @Override 228 public ApproximateDenseSIFT clone() { 229 return (ApproximateDenseSIFT) super.clone(); 230 } 231}