View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.image.feature.dense.gradient.dsift;
31  
32  import org.openimaj.feature.local.list.LocalFeatureList;
33  import org.openimaj.feature.local.list.MemoryLocalFeatureList;
34  import org.openimaj.image.FImage;
35  import org.openimaj.image.processing.convolution.FImageConvolveSeparable;
36  import org.openimaj.image.processing.convolution.FImageGradients;
37  import org.openimaj.math.geometry.shape.Rectangle;
38  import org.openimaj.util.array.ArrayUtils;
39  
40  /**
41   * Implementation of a dense SIFT feature extractor for {@link FImage}s.
42   * Extracts upright SIFT features at a single scale on a grid.
43   * <p>
44   * Implementation inspired by the
45   * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat
46   * extractor</a>.
47   * <p>
48   * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
49   * safe to reuse the analyser. In multi-threaded environments, a separate
50   * instance must be made for each thread. Internally, this implementation
51   * allocates memory for the gradient images, and if possible re-uses these
52   * between calls. Re-use requires that the input image is the same size between
53   * calls to the analyser.
54   *
55   * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
56   *
57   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
58   *
59   */
60  public class DenseSIFT extends AbstractDenseSIFT<FImage> {
61  	static class WorkingData {
62  		/**
63  		 * minimum X bound for sampling the descriptors (inclusive)
64  		 */
65  		protected int boundMinX;
66  
67  		/**
68  		 * maximum X bound for sampling the descriptors (inclusive)
69  		 */
70  		protected int boundMaxX;
71  
72  		/**
73  		 * minimum Y bound for sampling the descriptors (inclusive)
74  		 */
75  		protected int boundMinY;
76  
77  		/**
78  		 * maximum X bound for sampling the descriptors (inclusive)
79  		 */
80  		protected int boundMaxY;
81  
82  		/**
83  		 * Quantised orientation/gradient magnitude data. Each element
84  		 * corresponds to an angular bin of the orientations, and the individual
85  		 * images correspond to the gradient magnitudes for the respective
86  		 * orientations.
87  		 */
88  		protected FImage[] gradientMagnitudes;
89  
90  		/**
91  		 * Setup the required space for holding gradient maps and descriptors
92  		 *
93  		 * @param image
94  		 *            the image being analysed
95  		 */
96  		protected void setupWorkingSpace(FImage image, DenseSIFT dsift) {
97  			if (gradientMagnitudes == null) {
98  				gradientMagnitudes = new FImage[dsift.numOriBins];
99  			}
100 
101 			if (gradientMagnitudes[0] == null || gradientMagnitudes[0].width != image.width
102 					|| gradientMagnitudes[0].height != image.height)
103 			{
104 				for (int i = 0; i < dsift.numOriBins; i++)
105 					gradientMagnitudes[i] = new FImage(image.width, image.height);
106 			}
107 
108 			final int rangeX = boundMaxX - boundMinX - (dsift.numBinsX - 1) * dsift.binWidth;
109 			final int rangeY = boundMaxY - boundMinY - (dsift.numBinsY - 1) * dsift.binHeight;
110 
111 			final int numWindowsX = (rangeX >= 0) ? rangeX / dsift.stepX + 1 : 0;
112 			final int numWindowsY = (rangeY >= 0) ? rangeY / dsift.stepY + 1 : 0;
113 
114 			final int numFeatures = numWindowsX * numWindowsY;
115 
116 			dsift.descriptors = new float[numFeatures][dsift.numOriBins * dsift.numBinsX * dsift.numBinsY];
117 			dsift.energies = new float[numFeatures];
118 		}
119 	}
120 
121 	/**
122 	 * Step size of sampling window in x-direction (in pixels)
123 	 */
124 	protected int stepX = 5;
125 
126 	/**
127 	 * Step size of sampling window in y-direction (in pixels)
128 	 */
129 	protected int stepY = 5;
130 
131 	/**
132 	 * Width of a single bin of the sampling window (in pixels). Sampling window
133 	 * width is this multiplied by #numBinX.
134 	 */
135 	protected int binWidth = 5;
136 
137 	/**
138 	 * Height of a single bin of the sampling window (in pixels). Sampling
139 	 * window height is this multiplied by #numBinY.
140 	 */
141 	protected int binHeight = 5;
142 
143 	/**
144 	 * Number of spatial bins in the X direction
145 	 */
146 	protected int numBinsX = 4;
147 
148 	/**
149 	 * Number of spatial bins in the Y direction
150 	 */
151 	protected int numBinsY = 4;
152 
153 	/** The number of orientation bins */
154 	protected int numOriBins = 8;
155 
156 	/**
157 	 * Size of the Gaussian window (in relative to of the size of a bin)
158 	 */
159 	protected float gaussianWindowSize = 2f;
160 
161 	/**
162 	 * Threshold for clipping the SIFT features
163 	 */
164 	protected float valueThreshold = 0.2f;
165 
166 	protected volatile WorkingData data = new WorkingData();
167 
168 	/**
169 	 * Extracted descriptors
170 	 */
171 	protected volatile float[][] descriptors;
172 
173 	/**
174 	 * Descriptor energies
175 	 */
176 	protected volatile float[] energies;
177 
178 	/**
179 	 * Construct with the default configuration: standard SIFT geometry (4x4x8),
180 	 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
181 	 * value threshold of 0.2.
182 	 */
183 	public DenseSIFT() {
184 	}
185 
186 	/**
187 	 * Construct with the given step size (for both x and y) and binSize. All
188 	 * other values are the defaults.
189 	 *
190 	 * @param step
191 	 *            the step size
192 	 * @param binSize
193 	 *            the spatial bin size
194 	 */
195 	public DenseSIFT(int step, int binSize) {
196 		this.binWidth = binSize;
197 		this.binHeight = binSize;
198 		this.stepX = step;
199 		this.stepY = step;
200 	}
201 
202 	/**
203 	 * Construct with the given configuration. The gaussian window size is set
204 	 * to 2, and value threshold to 0.2.
205 	 *
206 	 * @param stepX
207 	 *            step size in x direction
208 	 * @param stepY
209 	 *            step size in y direction
210 	 * @param binWidth
211 	 *            width of spatial bins
212 	 * @param binHeight
213 	 *            height of spatial bins
214 	 * @param numBinsX
215 	 *            number of bins in x direction for each descriptor
216 	 * @param numBinsY
217 	 *            number of bins in y direction for each descriptor
218 	 * @param numOriBins
219 	 *            number of orientation bins for each descriptor
220 	 */
221 	public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins) {
222 		this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, 2f);
223 	}
224 
225 	/**
226 	 * Construct with the given configuration. The value threshold is set to
227 	 * 0.2.
228 	 *
229 	 * @param stepX
230 	 *            step size in x direction
231 	 * @param stepY
232 	 *            step size in y direction
233 	 * @param binWidth
234 	 *            width of spatial bins
235 	 * @param binHeight
236 	 *            height of spatial bins
237 	 * @param numBinsX
238 	 *            number of bins in x direction for each descriptor
239 	 * @param numBinsY
240 	 *            number of bins in y direction for each descriptor
241 	 * @param numOriBins
242 	 *            number of orientation bins for each descriptor
243 	 * @param gaussianWindowSize
244 	 *            the size of the gaussian weighting window
245 	 */
246 	public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
247 			float gaussianWindowSize)
248 	{
249 		this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, 0.2f);
250 	}
251 
252 	/**
253 	 * Construct with the given configuration.
254 	 *
255 	 * @param stepX
256 	 *            step size in x direction
257 	 * @param stepY
258 	 *            step size in y direction
259 	 * @param binWidth
260 	 *            width of spatial bins
261 	 * @param binHeight
262 	 *            height of spatial bins
263 	 * @param numBinsX
264 	 *            number of bins in x direction for each descriptor
265 	 * @param numBinsY
266 	 *            number of bins in y direction for each descriptor
267 	 * @param numOriBins
268 	 *            number of orientation bins for each descriptor
269 	 * @param gaussianWindowSize
270 	 *            the size of the gaussian weighting window
271 	 * @param valueThreshold
272 	 *            the threshold for clipping features
273 	 */
274 	public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins,
275 			float gaussianWindowSize, float valueThreshold)
276 	{
277 		this.binWidth = binWidth;
278 		this.binHeight = binHeight;
279 		this.stepX = stepX;
280 		this.stepY = stepY;
281 		this.numBinsX = numBinsX;
282 		this.numBinsY = numBinsY;
283 		this.numOriBins = numOriBins;
284 		this.gaussianWindowSize = gaussianWindowSize;
285 		this.valueThreshold = valueThreshold;
286 	}
287 
288 	private float[] buildKernel(int binSize, int numBins, int binIndex, float windowSize) {
289 		final int kernelSize = 2 * binSize - 1;
290 		final float[] kernel = new float[kernelSize];
291 		final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
292 
293 		final float sigma = binSize * windowSize;
294 
295 		for (int x = -binSize + 1, i = 0; x <= +binSize - 1; x++, i++) {
296 			final float z = (x - delta) / sigma;
297 
298 			kernel[i] = (1.0F - Math.abs(x) / binSize) * ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
299 		}
300 
301 		return kernel;
302 	}
303 
304 	/**
305 	 * Extract the DSIFT features
306 	 */
307 	protected void extractFeatures() {
308 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
309 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
310 
311 		for (int biny = 0; biny < numBinsY; biny++) {
312 			final float[] yker = buildKernel(binHeight, numBinsY, biny, gaussianWindowSize);
313 
314 			for (int binx = 0; binx < numBinsX; binx++) {
315 				final float[] xker = buildKernel(binWidth, numBinsX, binx, gaussianWindowSize);
316 
317 				for (int bint = 0; bint < numOriBins; bint++) {
318 					final FImage conv = data.gradientMagnitudes[bint].process(new FImageConvolveSeparable(xker, yker));
319 					final float[][] src = conv.pixels;
320 
321 					final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
322 					int descriptorIndex = 0;
323 
324 					for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
325 						for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
326 							descriptors[descriptorIndex][descriptorOffset] = src[framey + biny * binHeight][framex
327 									+ binx * binWidth];
328 							descriptorIndex++;
329 						}
330 					}
331 				}
332 			}
333 		}
334 	}
335 
336 	@Override
337 	public void analyseImage(FImage image, Rectangle bounds) {
338 		if (data == null)
339 			data = new WorkingData();
340 
341 		data.boundMinX = (int) bounds.x;
342 		data.boundMaxX = (int) (bounds.width - 1);
343 		data.boundMinY = (int) bounds.y;
344 		data.boundMaxY = (int) (bounds.height - 1);
345 
346 		data.setupWorkingSpace(image, this);
347 
348 		FImageGradients.gradientMagnitudesAndQuantisedOrientations(image, data.gradientMagnitudes);
349 
350 		extractFeatures();
351 
352 		normaliseDescriptors();
353 	}
354 
355 	private void normaliseDescriptors() {
356 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
357 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
358 		final float energyNorm = frameSizeX * frameSizeY;
359 
360 		for (int j = 0; j < descriptors.length; j++) {
361 			final float[] arr = descriptors[j];
362 
363 			energies[j] = ArrayUtils.sumValues(arr) / energyNorm;
364 
365 			ArrayUtils.normalise(arr);
366 
367 			boolean changed = false;
368 			for (int i = 0; i < arr.length; i++) {
369 				if (arr[i] > valueThreshold) {
370 					arr[i] = valueThreshold;
371 					changed = true;
372 				}
373 			}
374 
375 			if (changed)
376 				ArrayUtils.normalise(arr);
377 		}
378 	}
379 
380 	@Override
381 	public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints() {
382 		final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
383 				* numBinsX * numBinsY, descriptors.length);
384 
385 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
386 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
387 
388 		final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
389 		final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
390 
391 		for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
392 			for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
393 				keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
394 						energies[i]));
395 			}
396 		}
397 
398 		return keys;
399 	}
400 
401 	@Override
402 	public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints() {
403 		final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
404 				* numBinsX * numBinsY, descriptors.length);
405 
406 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
407 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
408 
409 		final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
410 		final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
411 
412 		for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
413 			for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
414 				keys.add(
415 						new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i]));
416 			}
417 		}
418 
419 		return keys;
420 	}
421 
422 	@Override
423 	public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints(float energyThreshold) {
424 		final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins
425 				* numBinsX * numBinsY);
426 
427 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
428 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
429 
430 		final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
431 		final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
432 
433 		for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
434 			for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
435 				if (energies[i] >= energyThreshold)
436 					keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
437 							energies[i]));
438 			}
439 		}
440 
441 		return keys;
442 	}
443 
444 	@Override
445 	public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints(float energyThreshold) {
446 		final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins
447 				* numBinsX * numBinsY);
448 
449 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
450 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
451 
452 		final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1);
453 		final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1);
454 
455 		for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
456 			for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) {
457 				if (energies[i] >= energyThreshold)
458 					keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i],
459 							energies[i]));
460 			}
461 		}
462 
463 		return keys;
464 	}
465 
466 	/**
467 	 * Get the computed raw dense SIFT descriptors from the previous call to
468 	 * {@link #analyseImage(FImage)} or {@link #analyseImage(FImage, Rectangle)}
469 	 * .
470 	 *
471 	 * @return the descriptors.
472 	 */
473 	@Override
474 	public float[][] getDescriptors() {
475 		return descriptors;
476 	}
477 
478 	@Override
479 	public DenseSIFT clone() {
480 		final DenseSIFT clone = (DenseSIFT) super.clone();
481 
482 		clone.descriptors = null;
483 		clone.energies = null;
484 		clone.data = null;
485 
486 		return clone;
487 	}
488 
489 	@Override
490 	public void setBinWidth(int size) {
491 		this.binWidth = size;
492 	}
493 
494 	@Override
495 	public void setBinHeight(int size) {
496 		this.binHeight = size;
497 	}
498 
499 	@Override
500 	public int getBinWidth() {
501 		return binWidth;
502 	}
503 
504 	@Override
505 	public int getBinHeight() {
506 		return binHeight;
507 	}
508 
509 	@Override
510 	public int getNumBinsX() {
511 		return numBinsX;
512 	}
513 
514 	@Override
515 	public int getNumBinsY() {
516 		return numBinsY;
517 	}
518 
519 	@Override
520 	public int getNumOriBins() {
521 		return numOriBins;
522 	}
523 }