001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.dense.gradient.dsift; 031 032import org.openimaj.feature.local.list.LocalFeatureList; 033import org.openimaj.feature.local.list.MemoryLocalFeatureList; 034import org.openimaj.image.FImage; 035import org.openimaj.image.processing.convolution.FImageConvolveSeparable; 036import org.openimaj.image.processing.convolution.FImageGradients; 037import org.openimaj.math.geometry.shape.Rectangle; 038import org.openimaj.util.array.ArrayUtils; 039 040/** 041 * Implementation of a dense SIFT feature extractor for {@link FImage}s. 042 * Extracts upright SIFT features at a single scale on a grid. 043 * <p> 044 * Implementation inspired by the 045 * <a href="http://www.vlfeat.org/api/dsift.html#dsift-usage">VLFeat 046 * extractor</a>. 047 * <p> 048 * <b>Implementation Notes</b>. The analyser is not thread-safe, however, it is 049 * safe to reuse the analyser. In multi-threaded environments, a separate 050 * instance must be made for each thread. Internally, this implementation 051 * allocates memory for the gradient images, and if possible re-uses these 052 * between calls. Re-use requires that the input image is the same size between 053 * calls to the analyser. 054 * 055 * @see "http://www.vlfeat.org/api/dsift.html#dsift-usage" 056 * 057 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 058 * 059 */ 060public class DenseSIFT extends AbstractDenseSIFT<FImage> { 061 static class WorkingData { 062 /** 063 * minimum X bound for sampling the descriptors (inclusive) 064 */ 065 protected int boundMinX; 066 067 /** 068 * maximum X bound for sampling the descriptors (inclusive) 069 */ 070 protected int boundMaxX; 071 072 /** 073 * minimum Y bound for sampling the descriptors (inclusive) 074 */ 075 protected int boundMinY; 076 077 /** 078 * maximum X bound for sampling the descriptors (inclusive) 079 */ 080 protected int boundMaxY; 081 082 /** 083 * Quantised orientation/gradient magnitude data. Each element 084 * corresponds to an angular bin of the orientations, and the individual 085 * images correspond to the gradient magnitudes for the respective 086 * orientations. 087 */ 088 protected FImage[] gradientMagnitudes; 089 090 /** 091 * Setup the required space for holding gradient maps and descriptors 092 * 093 * @param image 094 * the image being analysed 095 */ 096 protected void setupWorkingSpace(FImage image, DenseSIFT dsift) { 097 if (gradientMagnitudes == null) { 098 gradientMagnitudes = new FImage[dsift.numOriBins]; 099 } 100 101 if (gradientMagnitudes[0] == null || gradientMagnitudes[0].width != image.width 102 || gradientMagnitudes[0].height != image.height) 103 { 104 for (int i = 0; i < dsift.numOriBins; i++) 105 gradientMagnitudes[i] = new FImage(image.width, image.height); 106 } 107 108 final int rangeX = boundMaxX - boundMinX - (dsift.numBinsX - 1) * dsift.binWidth; 109 final int rangeY = boundMaxY - boundMinY - (dsift.numBinsY - 1) * dsift.binHeight; 110 111 final int numWindowsX = (rangeX >= 0) ? rangeX / dsift.stepX + 1 : 0; 112 final int numWindowsY = (rangeY >= 0) ? rangeY / dsift.stepY + 1 : 0; 113 114 final int numFeatures = numWindowsX * numWindowsY; 115 116 dsift.descriptors = new float[numFeatures][dsift.numOriBins * dsift.numBinsX * dsift.numBinsY]; 117 dsift.energies = new float[numFeatures]; 118 } 119 } 120 121 /** 122 * Step size of sampling window in x-direction (in pixels) 123 */ 124 protected int stepX = 5; 125 126 /** 127 * Step size of sampling window in y-direction (in pixels) 128 */ 129 protected int stepY = 5; 130 131 /** 132 * Width of a single bin of the sampling window (in pixels). Sampling window 133 * width is this multiplied by #numBinX. 134 */ 135 protected int binWidth = 5; 136 137 /** 138 * Height of a single bin of the sampling window (in pixels). Sampling 139 * window height is this multiplied by #numBinY. 140 */ 141 protected int binHeight = 5; 142 143 /** 144 * Number of spatial bins in the X direction 145 */ 146 protected int numBinsX = 4; 147 148 /** 149 * Number of spatial bins in the Y direction 150 */ 151 protected int numBinsY = 4; 152 153 /** The number of orientation bins */ 154 protected int numOriBins = 8; 155 156 /** 157 * Size of the Gaussian window (in relative to of the size of a bin) 158 */ 159 protected float gaussianWindowSize = 2f; 160 161 /** 162 * Threshold for clipping the SIFT features 163 */ 164 protected float valueThreshold = 0.2f; 165 166 protected volatile WorkingData data = new WorkingData(); 167 168 /** 169 * Extracted descriptors 170 */ 171 protected volatile float[][] descriptors; 172 173 /** 174 * Descriptor energies 175 */ 176 protected volatile float[] energies; 177 178 /** 179 * Construct with the default configuration: standard SIFT geometry (4x4x8), 180 * 5px x 5px spatial bins, 5px step size, gaussian window size of 2 and 181 * value threshold of 0.2. 182 */ 183 public DenseSIFT() { 184 } 185 186 /** 187 * Construct with the given step size (for both x and y) and binSize. All 188 * other values are the defaults. 189 * 190 * @param step 191 * the step size 192 * @param binSize 193 * the spatial bin size 194 */ 195 public DenseSIFT(int step, int binSize) { 196 this.binWidth = binSize; 197 this.binHeight = binSize; 198 this.stepX = step; 199 this.stepY = step; 200 } 201 202 /** 203 * Construct with the given configuration. The gaussian window size is set 204 * to 2, and value threshold to 0.2. 205 * 206 * @param stepX 207 * step size in x direction 208 * @param stepY 209 * step size in y direction 210 * @param binWidth 211 * width of spatial bins 212 * @param binHeight 213 * height of spatial bins 214 * @param numBinsX 215 * number of bins in x direction for each descriptor 216 * @param numBinsY 217 * number of bins in y direction for each descriptor 218 * @param numOriBins 219 * number of orientation bins for each descriptor 220 */ 221 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins) { 222 this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, 2f); 223 } 224 225 /** 226 * Construct with the given configuration. The value threshold is set to 227 * 0.2. 228 * 229 * @param stepX 230 * step size in x direction 231 * @param stepY 232 * step size in y direction 233 * @param binWidth 234 * width of spatial bins 235 * @param binHeight 236 * height of spatial bins 237 * @param numBinsX 238 * number of bins in x direction for each descriptor 239 * @param numBinsY 240 * number of bins in y direction for each descriptor 241 * @param numOriBins 242 * number of orientation bins for each descriptor 243 * @param gaussianWindowSize 244 * the size of the gaussian weighting window 245 */ 246 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins, 247 float gaussianWindowSize) 248 { 249 this(stepX, stepY, binWidth, binHeight, numBinsX, numBinsY, numOriBins, gaussianWindowSize, 0.2f); 250 } 251 252 /** 253 * Construct with the given configuration. 254 * 255 * @param stepX 256 * step size in x direction 257 * @param stepY 258 * step size in y direction 259 * @param binWidth 260 * width of spatial bins 261 * @param binHeight 262 * height of spatial bins 263 * @param numBinsX 264 * number of bins in x direction for each descriptor 265 * @param numBinsY 266 * number of bins in y direction for each descriptor 267 * @param numOriBins 268 * number of orientation bins for each descriptor 269 * @param gaussianWindowSize 270 * the size of the gaussian weighting window 271 * @param valueThreshold 272 * the threshold for clipping features 273 */ 274 public DenseSIFT(int stepX, int stepY, int binWidth, int binHeight, int numBinsX, int numBinsY, int numOriBins, 275 float gaussianWindowSize, float valueThreshold) 276 { 277 this.binWidth = binWidth; 278 this.binHeight = binHeight; 279 this.stepX = stepX; 280 this.stepY = stepY; 281 this.numBinsX = numBinsX; 282 this.numBinsY = numBinsY; 283 this.numOriBins = numOriBins; 284 this.gaussianWindowSize = gaussianWindowSize; 285 this.valueThreshold = valueThreshold; 286 } 287 288 private float[] buildKernel(int binSize, int numBins, int binIndex, float windowSize) { 289 final int kernelSize = 2 * binSize - 1; 290 final float[] kernel = new float[kernelSize]; 291 final float delta = binSize * (binIndex - 0.5F * (numBins - 1)); 292 293 final float sigma = binSize * windowSize; 294 295 for (int x = -binSize + 1, i = 0; x <= +binSize - 1; x++, i++) { 296 final float z = (x - delta) / sigma; 297 298 kernel[i] = (1.0F - Math.abs(x) / binSize) * ((binIndex >= 0) ? (float) Math.exp(-0.5F * z * z) : 1.0F); 299 } 300 301 return kernel; 302 } 303 304 /** 305 * Extract the DSIFT features 306 */ 307 protected void extractFeatures() { 308 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 309 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 310 311 for (int biny = 0; biny < numBinsY; biny++) { 312 final float[] yker = buildKernel(binHeight, numBinsY, biny, gaussianWindowSize); 313 314 for (int binx = 0; binx < numBinsX; binx++) { 315 final float[] xker = buildKernel(binWidth, numBinsX, binx, gaussianWindowSize); 316 317 for (int bint = 0; bint < numOriBins; bint++) { 318 final FImage conv = data.gradientMagnitudes[bint].process(new FImageConvolveSeparable(xker, yker)); 319 final float[][] src = conv.pixels; 320 321 final int descriptorOffset = bint + binx * numOriBins + biny * (numBinsX * numOriBins); 322 int descriptorIndex = 0; 323 324 for (int framey = data.boundMinY; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 325 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX) { 326 descriptors[descriptorIndex][descriptorOffset] = src[framey + biny * binHeight][framex 327 + binx * binWidth]; 328 descriptorIndex++; 329 } 330 } 331 } 332 } 333 } 334 } 335 336 @Override 337 public void analyseImage(FImage image, Rectangle bounds) { 338 if (data == null) 339 data = new WorkingData(); 340 341 data.boundMinX = (int) bounds.x; 342 data.boundMaxX = (int) (bounds.width - 1); 343 data.boundMinY = (int) bounds.y; 344 data.boundMaxY = (int) (bounds.height - 1); 345 346 data.setupWorkingSpace(image, this); 347 348 FImageGradients.gradientMagnitudesAndQuantisedOrientations(image, data.gradientMagnitudes); 349 350 extractFeatures(); 351 352 normaliseDescriptors(); 353 } 354 355 private void normaliseDescriptors() { 356 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 357 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 358 final float energyNorm = frameSizeX * frameSizeY; 359 360 for (int j = 0; j < descriptors.length; j++) { 361 final float[] arr = descriptors[j]; 362 363 energies[j] = ArrayUtils.sumValues(arr) / energyNorm; 364 365 ArrayUtils.normalise(arr); 366 367 boolean changed = false; 368 for (int i = 0; i < arr.length; i++) { 369 if (arr[i] > valueThreshold) { 370 arr[i] = valueThreshold; 371 changed = true; 372 } 373 } 374 375 if (changed) 376 ArrayUtils.normalise(arr); 377 } 378 } 379 380 @Override 381 public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints() { 382 final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins 383 * numBinsX * numBinsY, descriptors.length); 384 385 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 386 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 387 388 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 389 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 390 391 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 392 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 393 keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], 394 energies[i])); 395 } 396 } 397 398 return keys; 399 } 400 401 @Override 402 public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints() { 403 final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins 404 * numBinsX * numBinsY, descriptors.length); 405 406 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 407 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 408 409 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 410 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 411 412 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 413 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 414 keys.add( 415 new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], energies[i])); 416 } 417 } 418 419 return keys; 420 } 421 422 @Override 423 public LocalFeatureList<FloatDSIFTKeypoint> getFloatKeypoints(float energyThreshold) { 424 final MemoryLocalFeatureList<FloatDSIFTKeypoint> keys = new MemoryLocalFeatureList<FloatDSIFTKeypoint>(numOriBins 425 * numBinsX * numBinsY); 426 427 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 428 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 429 430 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 431 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 432 433 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 434 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 435 if (energies[i] >= energyThreshold) 436 keys.add(new FloatDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], 437 energies[i])); 438 } 439 } 440 441 return keys; 442 } 443 444 @Override 445 public LocalFeatureList<ByteDSIFTKeypoint> getByteKeypoints(float energyThreshold) { 446 final MemoryLocalFeatureList<ByteDSIFTKeypoint> keys = new MemoryLocalFeatureList<ByteDSIFTKeypoint>(numOriBins 447 * numBinsX * numBinsY); 448 449 final int frameSizeX = binWidth * (numBinsX - 1) + 1; 450 final int frameSizeY = binHeight * (numBinsY - 1) + 1; 451 452 final float deltaCenterX = 0.5F * binWidth * (numBinsX - 1); 453 final float deltaCenterY = 0.5F * binHeight * (numBinsY - 1); 454 455 for (int framey = data.boundMinY, i = 0; framey <= data.boundMaxY - frameSizeY + 1; framey += stepY) { 456 for (int framex = data.boundMinX; framex <= data.boundMaxX - frameSizeX + 1; framex += stepX, i++) { 457 if (energies[i] >= energyThreshold) 458 keys.add(new ByteDSIFTKeypoint(framex + deltaCenterX, framey + deltaCenterY, descriptors[i], 459 energies[i])); 460 } 461 } 462 463 return keys; 464 } 465 466 /** 467 * Get the computed raw dense SIFT descriptors from the previous call to 468 * {@link #analyseImage(FImage)} or {@link #analyseImage(FImage, Rectangle)} 469 * . 470 * 471 * @return the descriptors. 472 */ 473 @Override 474 public float[][] getDescriptors() { 475 return descriptors; 476 } 477 478 @Override 479 public DenseSIFT clone() { 480 final DenseSIFT clone = (DenseSIFT) super.clone(); 481 482 clone.descriptors = null; 483 clone.energies = null; 484 clone.data = null; 485 486 return clone; 487 } 488 489 @Override 490 public void setBinWidth(int size) { 491 this.binWidth = size; 492 } 493 494 @Override 495 public void setBinHeight(int size) { 496 this.binHeight = size; 497 } 498 499 @Override 500 public int getBinWidth() { 501 return binWidth; 502 } 503 504 @Override 505 public int getBinHeight() { 506 return binHeight; 507 } 508 509 @Override 510 public int getNumBinsX() { 511 return numBinsX; 512 } 513 514 @Override 515 public int getNumBinsY() { 516 return numBinsY; 517 } 518 519 @Override 520 public int getNumOriBins() { 521 return numOriBins; 522 } 523}