1 /**
2 * Copyright (c) 2011, The University of Southampton and the individual contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * * Neither the name of the University of Southampton nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 package org.openimaj.image.feature.dense.gradient.dsift;
31
32 import org.openimaj.image.FImage;
33 import org.openimaj.image.processing.convolution.FTriangleFilter;
34
35 /**
36 * Implementation of an approximate dense SIFT feature extractor. Extracts
37 * approximate upright SIFT features at a single scale on a grid. Implementation
38 * is approximate because instead of using an exact Gaussian weighting, samples
39 * are weighted using a flat windowing function for speed, and then after
40 * accumulation are re-weighted by the average of the Gaussian window over the
41 * spatial support of the sampling region. The end result is that the extracted
42 * features are similar to the exact dense SIFT implementation, but computation
43 * is much faster.
44 * <p>
45 * Implementation directly based on the
46 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat
47 * extractor</a>.
48 * <p>
49 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
50 * safe to reuse the analyser. In multi-threaded environments, a separate
51 * instance must be made for each thread. Internally, this implementation
52 * allocates memory for the gradient images, and if possible re-uses these
53 * between calls. Re-use requires that the input image is the same size between
54 * calls to the analyser.
55 *
56 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
57 *
58 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
59 *
60 */
61 public class ApproximateDenseSIFT extends DenseSIFT {
62 /**
63 * Construct with the default configuration: standard SIFT geometry (4x4x8),
64 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
65 * value threshold of 0.2.
66 */
67 public ApproximateDenseSIFT() {
68 super();
69 }
70
71 /**
72 * Construct with the given step size (for both x and y) and binSize. All
73 * other values are the defaults.
74 *
75 * @param step
76 * the step size
77 * @param binSize
78 * the spatial bin size
79 */
80 public ApproximateDenseSIFT(int step, int binSize) {
81 super(step, binSize);
82 }
83
84 /**
85 * Construct with the given configuration. The gaussian window size is set
86 * to 2, and value threshold to 0.2.
87 *
88 * @param stepX
89 * step size in x direction
90 * @param stepY
91 * step size in y direction
92 * @param binWidth
93 * width of spatial bins
94 * @param binHeight
95 * height of spatial bins
96 * @param numBinsX
97 * number of bins in x direction for each descriptor
98 * @param numBinsY
99 * number of bins in y direction for each descriptor
100 * @param numOriBins
101 * number of orientation bins for each descriptor
102 */
103 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
104 int numOriBins)
105 {
106 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins);
107 }
108
109 /**
110 * Construct with the given configuration. The value threshold is set to
111 * 0.2.
112 *
113 * @param stepX
114 * step size in x direction
115 * @param stepY
116 * step size in y direction
117 * @param binWidth
118 * width of spatial bins
119 * @param binHeight
120 * height of spatial bins
121 * @param numBinsX
122 * number of bins in x direction for each descriptor
123 * @param numBinsY
124 * number of bins in y direction for each descriptor
125 * @param numOriBins
126 * number of orientation bins for each descriptor
127 * @param gaussianWindowSize
128 * the size of the gaussian weighting window
129 */
130 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
131 int numOriBins,
132 float gaussianWindowSize)
133 {
134 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize);
135 }
136
137 /**
138 * Construct with the given configuration.
139 *
140 * @param stepX
141 * step size in x direction
142 * @param stepY
143 * step size in y direction
144 * @param binWidth
145 * width of spatial bins
146 * @param binHeight
147 * height of spatial bins
148 * @param numBinsX
149 * number of bins in x direction for each descriptor
150 * @param numBinsY
151 * number of bins in y direction for each descriptor
152 * @param numOriBins
153 * number of orientation bins for each descriptor
154 * @param gaussianWindowSize
155 * the size of the gaussian weighting window
156 * @param valueThreshold
157 * the threshold for clipping features
158 */
159 public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
160 int numOriBins,
161 float gaussianWindowSize, float valueThreshold)
162 {
163 super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold);
164 }
165
166 private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) {
167 final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
168 /* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */
169 final float sigma = binSize * (float) windowSize;
170 int x;
171
172 float acc = 0.0f;
173 for (x = -binSize + 1; x <= +binSize - 1; ++x) {
174 final float z = (x - delta) / sigma;
175 acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
176 }
177 return acc /= (2 * binSize - 1);
178 }
179
180 @Override
181 protected void extractFeatures() {
182 final int frameSizeX = binWidth * (numBinsX - 1) + 1;
183 final int frameSizeY = binHeight * (numBinsY - 1) + 1;
184
185 for (int bint = 0; bint < numOriBins; bint++) {
186 final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight));
187 final float[][] src = conv.pixels;
188
189 for (int biny = 0; biny < numBinsY; biny++) {
190
191 // This approximate version of DSIFT does not use a proper
192 // Gaussian weighting scheme for the gradients that are
193 // accumulated on the spatial bins. Instead each spatial bins is
194 // accumulated based on the triangular kernel only, equivalent
195 // to bilinear interpolation plus a flat, rather than Gaussian,
196 // window. Eventually, however, the magnitude of the spatial
197 // bins in the SIFT descriptor is reweighted by the average of
198 // the Gaussian window on each bin.
199 float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize);
200
201 // The triangular convolution functions convolve by a triangular
202 // kernel with unit integral; instead for SIFT the triangular
203 // kernel should have unit height. This is compensated for by
204 // multiplying by the bin size:
205 wy *= binHeight;
206
207 for (int binx = 0; binx < numBinsX; ++binx) {
208 float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize);
209 wx *= binWidth;
210 final float w = wx * wy;
211
212 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
213 int descriptorIndex = 0;
214
215 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
216 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
217 descriptors[descriptorIndex][descriptorOffset] = w
218 * src[framey + biny * binHeight][framex + binx * binWidth];
219 descriptorIndex++;
220 }
221 }
222 }
223 }
224 }
225 }
226
227 @Override
228 public ApproximateDenseSIFT clone() {
229 return (ApproximateDenseSIFT) super.clone();
230 }
231 }