View Javadoc

1   /**
2    * Copyright (c) 2011, The University of Southampton and the individual contributors.
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without modification,
6    * are permitted provided that the following conditions are met:
7    *
8    *   * 	Redistributions of source code must retain the above copyright notice,
9    * 	this list of conditions and the following disclaimer.
10   *
11   *   *	Redistributions in binary form must reproduce the above copyright notice,
12   * 	this list of conditions and the following disclaimer in the documentation
13   * 	and/or other materials provided with the distribution.
14   *
15   *   *	Neither the name of the University of Southampton nor the names of its
16   * 	contributors may be used to endorse or promote products derived from this
17   * 	software without specific prior written permission.
18   *
19   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22   * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26   * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29   */
30  package org.openimaj.image.feature.global;
31  
32  import org.openimaj.citation.annotation.Reference;
33  import org.openimaj.citation.annotation.ReferenceType;
34  import org.openimaj.feature.FloatFV;
35  import org.openimaj.image.FImage;
36  import org.openimaj.image.Image;
37  import org.openimaj.image.MBFImage;
38  import org.openimaj.image.analyser.ImageAnalyser;
39  import org.openimaj.image.colour.ColourMap;
40  import org.openimaj.image.colour.ColourSpace;
41  import org.openimaj.image.colour.RGBColour;
42  import org.openimaj.image.processing.algorithm.FourierTransform;
43  import org.openimaj.image.processing.convolution.FourierConvolve;
44  import org.openimaj.image.processing.convolution.GaborFilters;
45  import org.openimaj.image.processing.resize.ResizeProcessor;
46  import org.openimaj.image.processor.SinglebandImageProcessor;
47  
48  import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;
49  
50  /**
51   * Implementation of the "Gist" spatial envelope feature. Based on (and tested
52   * against) the original Matlab implementation from <a
53   * href="http://people.csail.mit.edu/torralba/code/spatialenvelope/"
54   * >http://people.csail.mit.edu/torralba/code/spatialenvelope/</a>, and designed
55   * to produce comparable features.
56   * <p>
57   * The Gist or Spatial Envelope is a very low dimensional representation of the
58   * scene. Gist encodes a set of perceptual dimensions (naturalness, openness,
59   * roughness, expansion, ruggedness) that represents the dominant spatial
60   * structure of a scene. These perceptual dimensions are reliably estimated
61   * using coarsely localized spectral information from the image.
62   * <p>
63   * <b>Implementation notes:</b> This class supports both fixed and variable size
64   * Gist. In the fixed size mode, the image is resized and centre-cropped to a
65   * pre-defined size (allowing the Gabor filters to be pre-computed and reused).
66   * In the variable size mode the Gabor filters will be re-computed as necessary
67   * if the image size changes. For computing features to compare images, fixed
68   * size mode makes more sense.
69   * <p>
70   * <b>Example usage for image comparison:</b><br>
71   * </br> <code>
72   * <pre>
73   * FImage i1, i2 = ...
74   * Gist<FImage> fsg = new Gist<FImage>(256, 256);
75   *
76   * fsg.analyseImage(i1);
77   * FloatFV f1 = fsg.getResponse();
78   *
79   * fsg.analyseImage(i2);
80   * FloatFV f2 = fsg.getResponse();
81   *
82   * double d = FloatFVComparison.SUM_SQUARE.compare(f1, f2);
83   * </pre>
84   * </code>
85   * <p>
86   * The ability to generate a visualisation of the current descriptor in the same
87   * way as provided by the original Matlab code is given by the
88   * {@link #visualiseDescriptor(int)} method.
89   *
90   * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
91   *
92   * @param <IMAGE>
93   *            The type of image. {@link MBFImage} and {@link FImage} are
94   *            supported; other types might not work.
95   */
96  @Reference(
97  		type = ReferenceType.Article,
98  		author = { "Oliva, Aude", "Torralba, Antonio" },
99  		title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope",
100 		year = "2001",
101 		journal = "Int. J. Comput. Vision",
102 		pages = { "145", "", "175" },
103 		url = "http://dx.doi.org/10.1023/A:1011139631724",
104 		month = "May",
105 		number = "3",
106 		publisher = "Kluwer Academic Publishers",
107 		volume = "42",
108 		customData = {
109 				"issn", "0920-5691",
110 				"numpages", "31",
111 				"doi", "10.1023/A:1011139631724",
112 				"acmid", "598462",
113 				"address", "Hingham, MA, USA",
114 				"keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout"
115 		})
116 public class Gist<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>>
117 implements
118 ImageAnalyser<IMAGE>
119 {
120 	/**
121 	 * The default number of filter orientations per scale (from HF to LF)
122 	 */
123 	public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 };
124 
125 	/**
126 	 * The default number spatial blocks
127 	 */
128 	public static final int DEFAULT_NUMBER_OF_BLOCKS = 4;
129 
130 	/**
131 	 * The default number of cycles per image for the pre-filter Gaussian
132 	 */
133 	public static final int DEFAULT_PREFILTER_FC = 4;
134 
135 	/**
136 	 * The default amount of padding to apply before convolving with the Gabor
137 	 * filters
138 	 */
139 	public static final int DEFAULT_BOUNDARY_EXTENSION = 32;
140 
141 	/**
142 	 * The number of blocks in each direction
143 	 */
144 	public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS;
145 
146 	/**
147 	 * The number of cycles per image for the pre-filter Gaussian
148 	 */
149 	public int prefilterFC = DEFAULT_PREFILTER_FC;
150 
151 	/**
152 	 * The amount of padding to add before convolving with the Gabor functions
153 	 */
154 	public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION;
155 
156 	/**
157 	 * Default image size (both height and width) for fixed size Gist
158 	 */
159 	public static final int DEFAULT_SIZE = 128;
160 
161 	protected FImage[] gaborFilters;
162 	protected FloatFV response;
163 	protected int[] orientationsPerScale;
164 	protected boolean fixedSize;
165 	protected int imageWidth;
166 	protected int imageHeight;
167 
168 	/**
169 	 * Construct a fixed size Gist extractor using the default values.
170 	 */
171 	public Gist() {
172 		this(true);
173 	}
174 
175 	/**
176 	 * Construct a variable or fixed size Gist extractor using the default
177 	 * values.
178 	 *
179 	 * @param fixedSize
180 	 *            if true the images should be resized and cropped
181 	 */
182 	public Gist(boolean fixedSize) {
183 		this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize);
184 	}
185 
186 	/**
187 	 * Construct a fixed-size Gist extractor, with the given image size and the
188 	 * default number of orientations per scale for the Gabor jet.
189 	 *
190 	 * @param width
191 	 *            the image width
192 	 * @param height
193 	 *            the image height
194 	 */
195 	public Gist(int width, int height) {
196 		this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE);
197 	}
198 
199 	/**
200 	 * Construct a fixed-size Gist extractor, with the given image size and
201 	 * number of orientations per scale for the Gabor jet.
202 	 *
203 	 * @param width
204 	 *            the image width
205 	 * @param height
206 	 *            the image height
207 	 * @param orientationsPerScale
208 	 *            the number of Gabor orientations per scale (from HF to LF)
209 	 */
210 	public Gist(int width, int height, int[] orientationsPerScale) {
211 		this(width, height, orientationsPerScale, true);
212 	}
213 
214 	/**
215 	 * Construct a variable or fixed size Gist extractor with the given number
216 	 * of orientations per scale for the Gabor jet.
217 	 *
218 	 * @param orientationsPerScale
219 	 *            the number of Gabor orientations per scale (from HF to LF)
220 	 * @param fixedSize
221 	 *            if true the images should be resized and cropped
222 	 */
223 	public Gist(int[] orientationsPerScale, boolean fixedSize) {
224 		this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize);
225 	}
226 
227 	/**
228 	 * Construct a variable or fixed size Gist extractor with the given number
229 	 * of orientations per scale for the Gabor jet. The given height and width
230 	 * are ignored in variable size mode.
231 	 *
232 	 * @param width
233 	 *            the image width
234 	 * @param height
235 	 *            the image height
236 	 * @param orientationsPerScale
237 	 *            the number of Gabor orientations per scale (from HF to LF)
238 	 * @param fixedSize
239 	 *            if true the images should be resized and cropped
240 	 */
241 	public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) {
242 		this.fixedSize = fixedSize;
243 		this.orientationsPerScale = orientationsPerScale;
244 		this.imageWidth = width;
245 		this.imageHeight = height;
246 
247 		if (fixedSize)
248 			this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2
249 					* this.boundaryExtension, orientationsPerScale);
250 	}
251 
252 	@Override
253 	public void analyseImage(IMAGE image) {
254 		if (fixedSize) {
255 			final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight
256 					/ (double) image.getHeight());
257 
258 			final IMAGE resized = image.process(new ResizeProcessor((float) sc));
259 			final IMAGE roi = resized.extractCenter(imageWidth, imageHeight);
260 			extractGist(roi);
261 		} else {
262 			if (gaborFilters == null || gaborFilters[0].width != image.getWidth()
263 					|| gaborFilters[0].height != image.getHeight())
264 			{
265 				gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension,
266 						image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale);
267 			}
268 
269 			extractGist(image.clone()); // clone to stop side effects from
270 			// normalisation further down
271 		}
272 	}
273 
274 	protected Gist(int[] orientationsPerScale) {
275 		this.orientationsPerScale = orientationsPerScale;
276 	}
277 
278 	protected void extractGist(IMAGE image) {
279 		MBFImage mbfimage;
280 		if (image instanceof FImage) {
281 			mbfimage = new MBFImage((FImage) image);
282 		} else if (image instanceof MBFImage) {
283 			mbfimage = (MBFImage) image;
284 		} else {
285 			throw new UnsupportedOperationException("Image type " + image.getClass()
286 					+ " is not currently supported. Please file a bug report.");
287 		}
288 
289 		final MBFImage o = prefilter(mbfimage.normalise());
290 		this.response = gistGabor(o);
291 	}
292 
293 	private MBFImage prefilter(MBFImage img) {
294 		final int w = 5;
295 		final double s1 = this.prefilterFC / Math.sqrt(Math.log(2));
296 
297 		final int sw = img.getWidth() + 2 * w;
298 		final int sh = img.getHeight() + 2 * w;
299 		int n = Math.max(sw, sh);
300 		n = n + n % 2;
301 		img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh);
302 
303 		final FImage filter = new FImage(2 * n, n);
304 		for (int j = 0; j < n; j++) {
305 			final int fy = j - n / 2;
306 
307 			for (int i = 0; i < n * 2; i += 2) {
308 				final int fx = (i / 2) - n / 2;
309 
310 				filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1));
311 			}
312 		}
313 
314 		final MBFImage output = new MBFImage();
315 		for (int b = 0; b < img.numBands(); b++) {
316 			final FImage band = img.getBand(b);
317 			for (int y = 0; y < band.height; y++) {
318 				for (int x = 0; x < band.width; x++) {
319 					band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255);
320 				}
321 			}
322 
323 			output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true)));
324 		}
325 		final FImage mean = output.flatten();
326 		final FImage meansq = mean.multiply(mean);
327 		final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true);
328 
329 		for (int b = 0; b < img.numBands(); b++) {
330 			final FImage band = output.getBand(b);
331 			for (int y = 0; y < localstd.height; y++)
332 				for (int x = 0; x < localstd.width; x++)
333 					band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math
334 							.sqrt(Math.abs(localstd.pixels[y][x]))));
335 		}
336 
337 		return output.extractROI(w, w, sw - w - w, sh - w - w);
338 	}
339 
340 	private FloatFV gistGabor(MBFImage img) {
341 		final int blocksPerFilter = computeNumberOfSamplingBlocks();
342 		final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
343 		final int nFilters = this.gaborFilters.length;
344 
345 		// pad the image
346 		img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension);
347 
348 		final int cols = img.getCols();
349 		final int rows = img.getRows();
350 		final FloatFFT_2D fft = new FloatFFT_2D(rows, cols);
351 
352 		final float[][] workingSpace = new float[rows][cols * 2];
353 		final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands());
354 
355 		for (int b = 0; b < img.numBands(); b++) {
356 			final FImage band = img.bands.get(b);
357 
358 			final float[][] preparedImage =
359 					FourierTransform.prepareData(band.pixels, rows, cols, true);
360 			fft.complexForward(preparedImage);
361 
362 			for (int i = 0; i < nFilters; i++) {
363 				// convolve with the filter
364 				FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols);
365 
366 				// remove padding
367 				ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height
368 						- 2
369 						* boundaryExtension);
370 
371 				sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter);
372 			}
373 		}
374 
375 		return fv;
376 	}
377 
378 	/**
379 	 * Compute the number of sampling blocks that are used for every filter. The
380 	 * default implementation returns {@link #numberOfBlocks}*
381 	 * {@link #numberOfBlocks}, but can be overridden in combination with
382 	 * {@link #sampleResponses(FImage, float[], int)} in a subclass to support
383 	 * different spatial sampling strategies.
384 	 *
385 	 * @return the number of sampling blocks per filter.
386 	 */
387 	protected int computeNumberOfSamplingBlocks() {
388 		return numberOfBlocks * numberOfBlocks;
389 	}
390 
391 	/**
392 	 * Sample the average response from each of the blocks in the image and
393 	 * insert into the vector. This method could be overridden to support
394 	 * different spatial aggregation strategies (in which case
395 	 * {@link #computeNumberOfSamplingBlocks()} should also be overridden).
396 	 *
397 	 * @param image
398 	 *            the image to sample
399 	 * @param v
400 	 *            the vector to write into
401 	 * @param offset
402 	 *            the offset from which to sample
403 	 */
404 	protected void sampleResponses(FImage image, float[] v, int offset) {
405 		final int gridWidth = image.width / this.numberOfBlocks;
406 		final int gridHeight = image.height / this.numberOfBlocks;
407 
408 		for (int iy = 0; iy < this.numberOfBlocks; iy++) {
409 			final int starty = gridHeight * iy;
410 			final int stopy = Math.min(starty + gridHeight, image.height);
411 
412 			for (int ix = 0; ix < this.numberOfBlocks; ix++) {
413 				final int startx = gridWidth * ix;
414 				final int stopx = Math.min(startx + gridWidth, image.width);
415 
416 				float avg = 0;
417 				for (int y = starty; y < stopy; y++) {
418 					for (int x = startx; x < stopx; x++) {
419 						avg += image.pixels[y][x];
420 					}
421 				}
422 				avg /= ((stopx - startx) * (stopy - starty));
423 
424 				// note y and x transposed to conform to the matlab
425 				// implementation
426 				v[offset + iy + ix * this.numberOfBlocks] = avg;
427 			}
428 		}
429 	}
430 
431 	/*
432 	 * Perform convolution in the frequency domain an reconstruct the resultant
433 	 * image as the magnitudes of the complex components from the ifft.
434 	 */
435 	private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft,
436 			int rows, int cols)
437 	{
438 		final float[][] preparedKernel = filterfft.pixels;
439 
440 		for (int y = 0; y < rows; y++) {
441 			for (int x = 0; x < cols; x++) {
442 				final float reImage = preparedImage[y][x * 2];
443 				final float imImage = preparedImage[y][1 + x * 2];
444 
445 				final float reKernel = preparedKernel[y][x * 2];
446 				final float imKernel = preparedKernel[y][1 + x * 2];
447 
448 				final float re = reImage * reKernel - imImage * imKernel;
449 				final float im = reImage * imKernel + imImage * reKernel;
450 
451 				workingSpace[y][x * 2] = re;
452 				workingSpace[y][1 + x * 2] = im;
453 			}
454 		}
455 
456 		fft.complexInverse(workingSpace, true);
457 
458 		final FImage out = new FImage(cols, rows);
459 		for (int r = 0; r < rows; r++) {
460 			for (int c = 0; c < cols; c++) {
461 				out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2]
462 						+ workingSpace[r][1 + c * 2]
463 								* workingSpace[r][1 + c * 2]);
464 			}
465 		}
466 		return out;
467 	}
468 
469 	/**
470 	 * Compute the descriptor visualisation in the same form as the original
471 	 * matlab code. The resultant image illustrates the dominant filter
472 	 * responses for each spatial bin. The filter scales are drawn with
473 	 * differing hues, and brightness is proportional to response value.
474 	 *
475 	 * @param height
476 	 *            the desired height of the produced image
477 	 * @return the visualisation TODO: colour descriptors
478 	 */
479 	public MBFImage visualiseDescriptor(int height) {
480 		final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length);
481 
482 		final FImage[] G = new FImage[this.gaborFilters.length];
483 		for (int i = 0; i < gaborFilters.length; i++) {
484 			G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height);
485 			FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false);
486 			G[i] = ResizeProcessor.halfSize(G[i]);
487 			G[i].addInplace(G[i].clone().flipY().flipX());
488 		}
489 
490 		final int blocksPerFilter = computeNumberOfSamplingBlocks();
491 		final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
492 		final int nBands = this.response.values.length / nFeaturesPerBand;
493 
494 		final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB);
495 		output.fill(RGBColour.WHITE);
496 
497 		for (int b = 0; b < nBands; b++) {
498 			float max = 0;
499 			final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks];
500 			for (int y = 0, k = 0; y < numberOfBlocks; y++) {
501 				for (int x = 0; x < numberOfBlocks; x++, k++) {
502 					blockImages[k] = new MBFImage(G[0].width, G[0].height, 3);
503 
504 					for (int s = 0, j = 0; s < orientationsPerScale.length; s++) {
505 						for (int i = 0; i < orientationsPerScale[s]; i++, j++) {
506 							final MBFImage col = new MBFImage(G[0].width, G[0].height, 3);
507 							col.fill(C[s]).multiplyInplace(
508 									this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]);
509 
510 							blockImages[k].addInplace(G[j].toRGB().multiply(col));
511 						}
512 					}
513 
514 					for (int i = 0; i < blockImages[k].numBands(); i++)
515 						max = Math.max(max, blockImages[k].bands.get(i).max());
516 				}
517 			}
518 
519 			final int ts = (height / 4);
520 			final ResizeProcessor rp = new ResizeProcessor(ts, ts, true);
521 			for (int y = 0, k = 0; y < numberOfBlocks; y++) {
522 				for (int x = 0; x < numberOfBlocks; x++, k++) {
523 					blockImages[k].divideInplace(max);
524 					final MBFImage tmp = blockImages[k].process(rp);
525 					tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE);
526 					tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE);
527 					tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE);
528 					tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE);
529 					output.drawImage(tmp, x * ts + b * height, y * ts);
530 				}
531 			}
532 		}
533 
534 		return output;
535 	}
536 
537 	/**
538 	 * Get the response vector from the previous call to
539 	 * {@link #analyseImage(Image)}.
540 	 *
541 	 * @return the response
542 	 */
543 	public FloatFV getResponse() {
544 		return response;
545 	}
546 }