View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.image.feature.dense.gradient.dsift;
31  
32  import org.openimaj.image.FImage;
33  import org.openimaj.image.processing.convolution.FTriangleFilter;
34  
35  /**
36   * Implementation of an approximate dense SIFT feature extractor. Extracts
37   * approximate upright SIFT features at a single scale on a grid. Implementation
38   * is approximate because instead of using an exact Gaussian weighting, samples
39   * are weighted using a flat windowing function for speed, and then after
40   * accumulation are re-weighted by the average of the Gaussian window over the
41   * spatial support of the sampling region. The end result is that the extracted
42   * features are similar to the exact dense SIFT implementation, but computation
43   * is much faster.
44   * <p>
45   * Implementation directly based on the
46   * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat
47   * extractor</a>.
48   * <p>
49   * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is
50   * safe to reuse the analyser. In multi-threaded environments, a separate
51   * instance must be made for each thread. Internally, this implementation
52   * allocates memory for the gradient images, and if possible re-uses these
53   * between calls. Re-use requires that the input image is the same size between
54   * calls to the analyser.
55   *
56   * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage"
57   *
58   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
59   *
60   */
61  public class ApproximateDenseSIFT extends DenseSIFT {
62  	/**
63  	 * Construct with the default configuration: standard SIFT geometry (4x4x8),
64  	 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and
65  	 * value threshold of 0.2.
66  	 */
67  	public ApproximateDenseSIFT() {
68  		super();
69  	}
70  
71  	/**
72  	 * Construct with the given step size (for both x and y) and binSize. All
73  	 * other values are the defaults.
74  	 *
75  	 * @param step
76  	 *            the step size
77  	 * @param binSize
78  	 *            the spatial bin size
79  	 */
80  	public ApproximateDenseSIFT(int step, int binSize) {
81  		super(step, binSize);
82  	}
83  
84  	/**
85  	 * Construct with the given configuration. The gaussian window size is set
86  	 * to 2, and value threshold to 0.2.
87  	 *
88  	 * @param stepX
89  	 *            step size in x direction
90  	 * @param stepY
91  	 *            step size in y direction
92  	 * @param binWidth
93  	 *            width of spatial bins
94  	 * @param binHeight
95  	 *            height of spatial bins
96  	 * @param numBinsX
97  	 *            number of bins in x direction for each descriptor
98  	 * @param numBinsY
99  	 *            number of bins in y direction for each descriptor
100 	 * @param numOriBins
101 	 *            number of orientation bins for each descriptor
102 	 */
103 	public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
104 			int numOriBins)
105 	{
106 		super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins);
107 	}
108 
109 	/**
110 	 * Construct with the given configuration. The value threshold is set to
111 	 * 0.2.
112 	 *
113 	 * @param stepX
114 	 *            step size in x direction
115 	 * @param stepY
116 	 *            step size in y direction
117 	 * @param binWidth
118 	 *            width of spatial bins
119 	 * @param binHeight
120 	 *            height of spatial bins
121 	 * @param numBinsX
122 	 *            number of bins in x direction for each descriptor
123 	 * @param numBinsY
124 	 *            number of bins in y direction for each descriptor
125 	 * @param numOriBins
126 	 *            number of orientation bins for each descriptor
127 	 * @param gaussianWindowSize
128 	 *            the size of the gaussian weighting window
129 	 */
130 	public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
131 			int numOriBins,
132 			float gaussianWindowSize)
133 	{
134 		super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize);
135 	}
136 
137 	/**
138 	 * Construct with the given configuration.
139 	 *
140 	 * @param stepX
141 	 *            step size in x direction
142 	 * @param stepY
143 	 *            step size in y direction
144 	 * @param binWidth
145 	 *            width of spatial bins
146 	 * @param binHeight
147 	 *            height of spatial bins
148 	 * @param numBinsX
149 	 *            number of bins in x direction for each descriptor
150 	 * @param numBinsY
151 	 *            number of bins in y direction for each descriptor
152 	 * @param numOriBins
153 	 *            number of orientation bins for each descriptor
154 	 * @param gaussianWindowSize
155 	 *            the size of the gaussian weighting window
156 	 * @param valueThreshold
157 	 *            the threshold for clipping features
158 	 */
159 	public ApproximateDenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY,
160 			int numOriBins,
161 			float gaussianWindowSize, float valueThreshold)
162 	{
163 		super(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, valueThreshold);
164 	}
165 
166 	private float computeWindowMean(int binSize, int numBins, int binIndex, double windowSize) {
167 		final float delta = binSize * (binIndex - 0.5F * (numBins - 1));
168 		/* float sigma = 0.5F * ((numBins - 1) * binSize + 1) ; */
169 		final float sigma = binSize * (float) windowSize;
170 		int x;
171 
172 		float acc = 0.0f;
173 		for (x = -binSize + 1; x <= +binSize - 1; ++x) {
174 			final float z = (x - delta) / sigma;
175 			acc += ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F);
176 		}
177 		return acc /= (2 * binSize - 1);
178 	}
179 
180 	@Override
181 	protected void extractFeatures() {
182 		final int frameSizeX = binWidth * (numBinsX - 1) + 1;
183 		final int frameSizeY = binHeight * (numBinsY - 1) + 1;
184 
185 		for (int bint = 0; bint < numOriBins; bint++) {
186 			final FImage conv = data.gradientMagnitudes[bint].process(new FTriangleFilter(binWidth, binHeight));
187 			final float[][] src = conv.pixels;
188 
189 			for (int biny = 0; biny < numBinsY; biny++) {
190 
191 				// This approximate version of DSIFT does not use a proper
192 				// Gaussian weighting scheme for the gradients that are
193 				// accumulated on the spatial bins. Instead each spatial bins is
194 				// accumulated based on the triangular kernel only, equivalent
195 				// to bilinear interpolation plus a flat, rather than Gaussian,
196 				// window. Eventually, however, the magnitude of the spatial
197 				// bins in the SIFT descriptor is reweighted by the average of
198 				// the Gaussian window on each bin.
199 				float wy = computeWindowMean(binHeight, numBinsY, biny, gaussianWindowSize);
200 
201 				// The triangular convolution functions convolve by a triangular
202 				// kernel with unit integral; instead for SIFT the triangular
203 				// kernel should have unit height. This is compensated for by
204 				// multiplying by the bin size:
205 				wy *= binHeight;
206 
207 				for (int binx = 0; binx < numBinsX; ++binx) {
208 					float wx = computeWindowMean(binWidth, numBinsX, binx, gaussianWindowSize);
209 					wx *= binWidth;
210 					final float w = wx * wy;
211 
212 					final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins);
213 					int descriptorIndex = 0;
214 
215 					for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) {
216 						for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) {
217 							descriptors[descriptorIndex][descriptorOffset] = w
218 									* src[framey + biny * binHeight][framex + binx * binWidth];
219 							descriptorIndex++;
220 						}
221 					}
222 				}
223 			}
224 		}
225 	}
226 
227 	@Override
228 	public ApproximateDenseSIFT clone() {
229 		return (ApproximateDenseSIFT) super.clone();
230 	}
231 }