001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.global; 031 032import org.openimaj.citation.annotation.Reference; 033import org.openimaj.citation.annotation.ReferenceType; 034import org.openimaj.feature.FloatFV; 035import org.openimaj.image.FImage; 036import org.openimaj.image.Image; 037import org.openimaj.image.MBFImage; 038import org.openimaj.image.analyser.ImageAnalyser; 039import org.openimaj.image.colour.ColourMap; 040import org.openimaj.image.colour.ColourSpace; 041import org.openimaj.image.colour.RGBColour; 042import org.openimaj.image.processing.algorithm.FourierTransform; 043import org.openimaj.image.processing.convolution.FourierConvolve; 044import org.openimaj.image.processing.convolution.GaborFilters; 045import org.openimaj.image.processing.resize.ResizeProcessor; 046import org.openimaj.image.processor.SinglebandImageProcessor; 047 048import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D; 049 050/** 051 * Implementation of the "Gist" spatial envelope feature. Based on (and tested 052 * against) the original Matlab implementation from <a 053 * href="http://people.csail.mit.edu/torralba/code/spatialenvelope/" 054 * >http://people.csail.mit.edu/torralba/code/spatialenvelope/</a>, and designed 055 * to produce comparable features. 056 * <p> 057 * The Gist or Spatial Envelope is a very low dimensional representation of the 058 * scene. Gist encodes a set of perceptual dimensions (naturalness, openness, 059 * roughness, expansion, ruggedness) that represents the dominant spatial 060 * structure of a scene. These perceptual dimensions are reliably estimated 061 * using coarsely localized spectral information from the image. 062 * <p> 063 * <b>Implementation notes:</b> This class supports both fixed and variable size 064 * Gist. In the fixed size mode, the image is resized and centre-cropped to a 065 * pre-defined size (allowing the Gabor filters to be pre-computed and reused). 066 * In the variable size mode the Gabor filters will be re-computed as necessary 067 * if the image size changes. For computing features to compare images, fixed 068 * size mode makes more sense. 069 * <p> 070 * <b>Example usage for image comparison:</b><br> 071 * </br> <code> 072 * <pre> 073 * FImage i1, i2 = ... 074 * Gist<FImage> fsg = new Gist<FImage>(256, 256); 075 * 076 * fsg.analyseImage(i1); 077 * FloatFV f1 = fsg.getResponse(); 078 * 079 * fsg.analyseImage(i2); 080 * FloatFV f2 = fsg.getResponse(); 081 * 082 * double d = FloatFVComparison.SUM_SQUARE.compare(f1, f2); 083 * </pre> 084 * </code> 085 * <p> 086 * The ability to generate a visualisation of the current descriptor in the same 087 * way as provided by the original Matlab code is given by the 088 * {@link #visualiseDescriptor(int)} method. 089 * 090 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 091 * 092 * @param <IMAGE> 093 * The type of image. {@link MBFImage} and {@link FImage} are 094 * supported; other types might not work. 095 */ 096@Reference( 097 type = ReferenceType.Article, 098 author = { "Oliva, Aude", "Torralba, Antonio" }, 099 title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope", 100 year = "2001", 101 journal = "Int. J. Comput. Vision", 102 pages = { "145", "", "175" }, 103 url = "http://dx.doi.org/10.1023/A:1011139631724", 104 month = "May", 105 number = "3", 106 publisher = "Kluwer Academic Publishers", 107 volume = "42", 108 customData = { 109 "issn", "0920-5691", 110 "numpages", "31", 111 "doi", "10.1023/A:1011139631724", 112 "acmid", "598462", 113 "address", "Hingham, MA, USA", 114 "keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout" 115 }) 116public class Gist<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>> 117implements 118ImageAnalyser<IMAGE> 119{ 120 /** 121 * The default number of filter orientations per scale (from HF to LF) 122 */ 123 public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 }; 124 125 /** 126 * The default number spatial blocks 127 */ 128 public static final int DEFAULT_NUMBER_OF_BLOCKS = 4; 129 130 /** 131 * The default number of cycles per image for the pre-filter Gaussian 132 */ 133 public static final int DEFAULT_PREFILTER_FC = 4; 134 135 /** 136 * The default amount of padding to apply before convolving with the Gabor 137 * filters 138 */ 139 public static final int DEFAULT_BOUNDARY_EXTENSION = 32; 140 141 /** 142 * The number of blocks in each direction 143 */ 144 public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS; 145 146 /** 147 * The number of cycles per image for the pre-filter Gaussian 148 */ 149 public int prefilterFC = DEFAULT_PREFILTER_FC; 150 151 /** 152 * The amount of padding to add before convolving with the Gabor functions 153 */ 154 public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION; 155 156 /** 157 * Default image size (both height and width) for fixed size Gist 158 */ 159 public static final int DEFAULT_SIZE = 128; 160 161 protected FImage[] gaborFilters; 162 protected FloatFV response; 163 protected int[] orientationsPerScale; 164 protected boolean fixedSize; 165 protected int imageWidth; 166 protected int imageHeight; 167 168 /** 169 * Construct a fixed size Gist extractor using the default values. 170 */ 171 public Gist() { 172 this(true); 173 } 174 175 /** 176 * Construct a variable or fixed size Gist extractor using the default 177 * values. 178 * 179 * @param fixedSize 180 * if true the images should be resized and cropped 181 */ 182 public Gist(boolean fixedSize) { 183 this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize); 184 } 185 186 /** 187 * Construct a fixed-size Gist extractor, with the given image size and the 188 * default number of orientations per scale for the Gabor jet. 189 * 190 * @param width 191 * the image width 192 * @param height 193 * the image height 194 */ 195 public Gist(int width, int height) { 196 this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE); 197 } 198 199 /** 200 * Construct a fixed-size Gist extractor, with the given image size and 201 * number of orientations per scale for the Gabor jet. 202 * 203 * @param width 204 * the image width 205 * @param height 206 * the image height 207 * @param orientationsPerScale 208 * the number of Gabor orientations per scale (from HF to LF) 209 */ 210 public Gist(int width, int height, int[] orientationsPerScale) { 211 this(width, height, orientationsPerScale, true); 212 } 213 214 /** 215 * Construct a variable or fixed size Gist extractor with the given number 216 * of orientations per scale for the Gabor jet. 217 * 218 * @param orientationsPerScale 219 * the number of Gabor orientations per scale (from HF to LF) 220 * @param fixedSize 221 * if true the images should be resized and cropped 222 */ 223 public Gist(int[] orientationsPerScale, boolean fixedSize) { 224 this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize); 225 } 226 227 /** 228 * Construct a variable or fixed size Gist extractor with the given number 229 * of orientations per scale for the Gabor jet. The given height and width 230 * are ignored in variable size mode. 231 * 232 * @param width 233 * the image width 234 * @param height 235 * the image height 236 * @param orientationsPerScale 237 * the number of Gabor orientations per scale (from HF to LF) 238 * @param fixedSize 239 * if true the images should be resized and cropped 240 */ 241 public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) { 242 this.fixedSize = fixedSize; 243 this.orientationsPerScale = orientationsPerScale; 244 this.imageWidth = width; 245 this.imageHeight = height; 246 247 if (fixedSize) 248 this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2 249 * this.boundaryExtension, orientationsPerScale); 250 } 251 252 @Override 253 public void analyseImage(IMAGE image) { 254 if (fixedSize) { 255 final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight 256 / (double) image.getHeight()); 257 258 final IMAGE resized = image.process(new ResizeProcessor((float) sc)); 259 final IMAGE roi = resized.extractCenter(imageWidth, imageHeight); 260 extractGist(roi); 261 } else { 262 if (gaborFilters == null || gaborFilters[0].width != image.getWidth() 263 || gaborFilters[0].height != image.getHeight()) 264 { 265 gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension, 266 image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale); 267 } 268 269 extractGist(image.clone()); // clone to stop side effects from 270 // normalisation further down 271 } 272 } 273 274 protected Gist(int[] orientationsPerScale) { 275 this.orientationsPerScale = orientationsPerScale; 276 } 277 278 protected void extractGist(IMAGE image) { 279 MBFImage mbfimage; 280 if (image instanceof FImage) { 281 mbfimage = new MBFImage((FImage) image); 282 } else if (image instanceof MBFImage) { 283 mbfimage = (MBFImage) image; 284 } else { 285 throw new UnsupportedOperationException("Image type " + image.getClass() 286 + " is not currently supported. Please file a bug report."); 287 } 288 289 final MBFImage o = prefilter(mbfimage.normalise()); 290 this.response = gistGabor(o); 291 } 292 293 private MBFImage prefilter(MBFImage img) { 294 final int w = 5; 295 final double s1 = this.prefilterFC / Math.sqrt(Math.log(2)); 296 297 final int sw = img.getWidth() + 2 * w; 298 final int sh = img.getHeight() + 2 * w; 299 int n = Math.max(sw, sh); 300 n = n + n % 2; 301 img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh); 302 303 final FImage filter = new FImage(2 * n, n); 304 for (int j = 0; j < n; j++) { 305 final int fy = j - n / 2; 306 307 for (int i = 0; i < n * 2; i += 2) { 308 final int fx = (i / 2) - n / 2; 309 310 filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1)); 311 } 312 } 313 314 final MBFImage output = new MBFImage(); 315 for (int b = 0; b < img.numBands(); b++) { 316 final FImage band = img.getBand(b); 317 for (int y = 0; y < band.height; y++) { 318 for (int x = 0; x < band.width; x++) { 319 band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255); 320 } 321 } 322 323 output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true))); 324 } 325 final FImage mean = output.flatten(); 326 final FImage meansq = mean.multiply(mean); 327 final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true); 328 329 for (int b = 0; b < img.numBands(); b++) { 330 final FImage band = output.getBand(b); 331 for (int y = 0; y < localstd.height; y++) 332 for (int x = 0; x < localstd.width; x++) 333 band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math 334 .sqrt(Math.abs(localstd.pixels[y][x])))); 335 } 336 337 return output.extractROI(w, w, sw - w - w, sh - w - w); 338 } 339 340 private FloatFV gistGabor(MBFImage img) { 341 final int blocksPerFilter = computeNumberOfSamplingBlocks(); 342 final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter; 343 final int nFilters = this.gaborFilters.length; 344 345 // pad the image 346 img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension); 347 348 final int cols = img.getCols(); 349 final int rows = img.getRows(); 350 final FloatFFT_2D fft = new FloatFFT_2D(rows, cols); 351 352 final float[][] workingSpace = new float[rows][cols * 2]; 353 final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands()); 354 355 for (int b = 0; b < img.numBands(); b++) { 356 final FImage band = img.bands.get(b); 357 358 final float[][] preparedImage = 359 FourierTransform.prepareData(band.pixels, rows, cols, true); 360 fft.complexForward(preparedImage); 361 362 for (int i = 0; i < nFilters; i++) { 363 // convolve with the filter 364 FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols); 365 366 // remove padding 367 ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height 368 - 2 369 * boundaryExtension); 370 371 sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter); 372 } 373 } 374 375 return fv; 376 } 377 378 /** 379 * Compute the number of sampling blocks that are used for every filter. The 380 * default implementation returns {@link #numberOfBlocks}* 381 * {@link #numberOfBlocks}, but can be overridden in combination with 382 * {@link #sampleResponses(FImage, float[], int)} in a subclass to support 383 * different spatial sampling strategies. 384 * 385 * @return the number of sampling blocks per filter. 386 */ 387 protected int computeNumberOfSamplingBlocks() { 388 return numberOfBlocks * numberOfBlocks; 389 } 390 391 /** 392 * Sample the average response from each of the blocks in the image and 393 * insert into the vector. This method could be overridden to support 394 * different spatial aggregation strategies (in which case 395 * {@link #computeNumberOfSamplingBlocks()} should also be overridden). 396 * 397 * @param image 398 * the image to sample 399 * @param v 400 * the vector to write into 401 * @param offset 402 * the offset from which to sample 403 */ 404 protected void sampleResponses(FImage image, float[] v, int offset) { 405 final int gridWidth = image.width / this.numberOfBlocks; 406 final int gridHeight = image.height / this.numberOfBlocks; 407 408 for (int iy = 0; iy < this.numberOfBlocks; iy++) { 409 final int starty = gridHeight * iy; 410 final int stopy = Math.min(starty + gridHeight, image.height); 411 412 for (int ix = 0; ix < this.numberOfBlocks; ix++) { 413 final int startx = gridWidth * ix; 414 final int stopx = Math.min(startx + gridWidth, image.width); 415 416 float avg = 0; 417 for (int y = starty; y < stopy; y++) { 418 for (int x = startx; x < stopx; x++) { 419 avg += image.pixels[y][x]; 420 } 421 } 422 avg /= ((stopx - startx) * (stopy - starty)); 423 424 // note y and x transposed to conform to the matlab 425 // implementation 426 v[offset + iy + ix * this.numberOfBlocks] = avg; 427 } 428 } 429 } 430 431 /* 432 * Perform convolution in the frequency domain an reconstruct the resultant 433 * image as the magnitudes of the complex components from the ifft. 434 */ 435 private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft, 436 int rows, int cols) 437 { 438 final float[][] preparedKernel = filterfft.pixels; 439 440 for (int y = 0; y < rows; y++) { 441 for (int x = 0; x < cols; x++) { 442 final float reImage = preparedImage[y][x * 2]; 443 final float imImage = preparedImage[y][1 + x * 2]; 444 445 final float reKernel = preparedKernel[y][x * 2]; 446 final float imKernel = preparedKernel[y][1 + x * 2]; 447 448 final float re = reImage * reKernel - imImage * imKernel; 449 final float im = reImage * imKernel + imImage * reKernel; 450 451 workingSpace[y][x * 2] = re; 452 workingSpace[y][1 + x * 2] = im; 453 } 454 } 455 456 fft.complexInverse(workingSpace, true); 457 458 final FImage out = new FImage(cols, rows); 459 for (int r = 0; r < rows; r++) { 460 for (int c = 0; c < cols; c++) { 461 out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2] 462 + workingSpace[r][1 + c * 2] 463 * workingSpace[r][1 + c * 2]); 464 } 465 } 466 return out; 467 } 468 469 /** 470 * Compute the descriptor visualisation in the same form as the original 471 * matlab code. The resultant image illustrates the dominant filter 472 * responses for each spatial bin. The filter scales are drawn with 473 * differing hues, and brightness is proportional to response value. 474 * 475 * @param height 476 * the desired height of the produced image 477 * @return the visualisation TODO: colour descriptors 478 */ 479 public MBFImage visualiseDescriptor(int height) { 480 final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length); 481 482 final FImage[] G = new FImage[this.gaborFilters.length]; 483 for (int i = 0; i < gaborFilters.length; i++) { 484 G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height); 485 FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false); 486 G[i] = ResizeProcessor.halfSize(G[i]); 487 G[i].addInplace(G[i].clone().flipY().flipX()); 488 } 489 490 final int blocksPerFilter = computeNumberOfSamplingBlocks(); 491 final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter; 492 final int nBands = this.response.values.length / nFeaturesPerBand; 493 494 final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB); 495 output.fill(RGBColour.WHITE); 496 497 for (int b = 0; b < nBands; b++) { 498 float max = 0; 499 final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks]; 500 for (int y = 0, k = 0; y < numberOfBlocks; y++) { 501 for (int x = 0; x < numberOfBlocks; x++, k++) { 502 blockImages[k] = new MBFImage(G[0].width, G[0].height, 3); 503 504 for (int s = 0, j = 0; s < orientationsPerScale.length; s++) { 505 for (int i = 0; i < orientationsPerScale[s]; i++, j++) { 506 final MBFImage col = new MBFImage(G[0].width, G[0].height, 3); 507 col.fill(C[s]).multiplyInplace( 508 this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]); 509 510 blockImages[k].addInplace(G[j].toRGB().multiply(col)); 511 } 512 } 513 514 for (int i = 0; i < blockImages[k].numBands(); i++) 515 max = Math.max(max, blockImages[k].bands.get(i).max()); 516 } 517 } 518 519 final int ts = (height / 4); 520 final ResizeProcessor rp = new ResizeProcessor(ts, ts, true); 521 for (int y = 0, k = 0; y < numberOfBlocks; y++) { 522 for (int x = 0; x < numberOfBlocks; x++, k++) { 523 blockImages[k].divideInplace(max); 524 final MBFImage tmp = blockImages[k].process(rp); 525 tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE); 526 tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE); 527 tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE); 528 tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE); 529 output.drawImage(tmp, x * ts + b * height, y * ts); 530 } 531 } 532 } 533 534 return output; 535 } 536 537 /** 538 * Get the response vector from the previous call to 539 * {@link #analyseImage(Image)}. 540 * 541 * @return the response 542 */ 543 public FloatFV getResponse() { 544 return response; 545 } 546}