1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.openimaj.image.feature.global;
31
32 import org.openimaj.citation.annotation.Reference;
33 import org.openimaj.citation.annotation.ReferenceType;
34 import org.openimaj.feature.FloatFV;
35 import org.openimaj.image.FImage;
36 import org.openimaj.image.Image;
37 import org.openimaj.image.MBFImage;
38 import org.openimaj.image.analyser.ImageAnalyser;
39 import org.openimaj.image.colour.ColourMap;
40 import org.openimaj.image.colour.ColourSpace;
41 import org.openimaj.image.colour.RGBColour;
42 import org.openimaj.image.processing.algorithm.FourierTransform;
43 import org.openimaj.image.processing.convolution.FourierConvolve;
44 import org.openimaj.image.processing.convolution.GaborFilters;
45 import org.openimaj.image.processing.resize.ResizeProcessor;
46 import org.openimaj.image.processor.SinglebandImageProcessor;
47
48 import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96 @Reference(
97 type = ReferenceType.Article,
98 author = { "Oliva, Aude", "Torralba, Antonio" },
99 title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope",
100 year = "2001",
101 journal = "Int. J. Comput. Vision",
102 pages = { "145", "", "175" },
103 url = "http://dx.doi.org/10.1023/A:1011139631724",
104 month = "May",
105 number = "3",
106 publisher = "Kluwer Academic Publishers",
107 volume = "42",
108 customData = {
109 "issn", "0920-5691",
110 "numpages", "31",
111 "doi", "10.1023/A:1011139631724",
112 "acmid", "598462",
113 "address", "Hingham, MA, USA",
114 "keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout"
115 })
116 public class Gist<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>>
117 implements
118 ImageAnalyser<IMAGE>
119 {
120
121
122
123 public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 };
124
125
126
127
128 public static final int DEFAULT_NUMBER_OF_BLOCKS = 4;
129
130
131
132
133 public static final int DEFAULT_PREFILTER_FC = 4;
134
135
136
137
138
139 public static final int DEFAULT_BOUNDARY_EXTENSION = 32;
140
141
142
143
144 public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS;
145
146
147
148
149 public int prefilterFC = DEFAULT_PREFILTER_FC;
150
151
152
153
154 public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION;
155
156
157
158
159 public static final int DEFAULT_SIZE = 128;
160
161 protected FImage[] gaborFilters;
162 protected FloatFV response;
163 protected int[] orientationsPerScale;
164 protected boolean fixedSize;
165 protected int imageWidth;
166 protected int imageHeight;
167
168
169
170
171 public Gist() {
172 this(true);
173 }
174
175
176
177
178
179
180
181
182 public Gist(boolean fixedSize) {
183 this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize);
184 }
185
186
187
188
189
190
191
192
193
194
195 public Gist(int width, int height) {
196 this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE);
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210 public Gist(int width, int height, int[] orientationsPerScale) {
211 this(width, height, orientationsPerScale, true);
212 }
213
214
215
216
217
218
219
220
221
222
223 public Gist(int[] orientationsPerScale, boolean fixedSize) {
224 this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize);
225 }
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241 public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) {
242 this.fixedSize = fixedSize;
243 this.orientationsPerScale = orientationsPerScale;
244 this.imageWidth = width;
245 this.imageHeight = height;
246
247 if (fixedSize)
248 this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2
249 * this.boundaryExtension, orientationsPerScale);
250 }
251
252 @Override
253 public void analyseImage(IMAGE image) {
254 if (fixedSize) {
255 final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight
256 / (double) image.getHeight());
257
258 final IMAGE resized = image.process(new ResizeProcessor((float) sc));
259 final IMAGE roi = resized.extractCenter(imageWidth, imageHeight);
260 extractGist(roi);
261 } else {
262 if (gaborFilters == null || gaborFilters[0].width != image.getWidth()
263 || gaborFilters[0].height != image.getHeight())
264 {
265 gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension,
266 image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale);
267 }
268
269 extractGist(image.clone());
270
271 }
272 }
273
274 protected Gist(int[] orientationsPerScale) {
275 this.orientationsPerScale = orientationsPerScale;
276 }
277
278 protected void extractGist(IMAGE image) {
279 MBFImage mbfimage;
280 if (image instanceof FImage) {
281 mbfimage = new MBFImage((FImage) image);
282 } else if (image instanceof MBFImage) {
283 mbfimage = (MBFImage) image;
284 } else {
285 throw new UnsupportedOperationException("Image type " + image.getClass()
286 + " is not currently supported. Please file a bug report.");
287 }
288
289 final MBFImage o = prefilter(mbfimage.normalise());
290 this.response = gistGabor(o);
291 }
292
293 private MBFImage prefilter(MBFImage img) {
294 final int w = 5;
295 final double s1 = this.prefilterFC / Math.sqrt(Math.log(2));
296
297 final int sw = img.getWidth() + 2 * w;
298 final int sh = img.getHeight() + 2 * w;
299 int n = Math.max(sw, sh);
300 n = n + n % 2;
301 img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh);
302
303 final FImage filter = new FImage(2 * n, n);
304 for (int j = 0; j < n; j++) {
305 final int fy = j - n / 2;
306
307 for (int i = 0; i < n * 2; i += 2) {
308 final int fx = (i / 2) - n / 2;
309
310 filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1));
311 }
312 }
313
314 final MBFImage output = new MBFImage();
315 for (int b = 0; b < img.numBands(); b++) {
316 final FImage band = img.getBand(b);
317 for (int y = 0; y < band.height; y++) {
318 for (int x = 0; x < band.width; x++) {
319 band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255);
320 }
321 }
322
323 output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true)));
324 }
325 final FImage mean = output.flatten();
326 final FImage meansq = mean.multiply(mean);
327 final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true);
328
329 for (int b = 0; b < img.numBands(); b++) {
330 final FImage band = output.getBand(b);
331 for (int y = 0; y < localstd.height; y++)
332 for (int x = 0; x < localstd.width; x++)
333 band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math
334 .sqrt(Math.abs(localstd.pixels[y][x]))));
335 }
336
337 return output.extractROI(w, w, sw - w - w, sh - w - w);
338 }
339
340 private FloatFV gistGabor(MBFImage img) {
341 final int blocksPerFilter = computeNumberOfSamplingBlocks();
342 final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
343 final int nFilters = this.gaborFilters.length;
344
345
346 img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension);
347
348 final int cols = img.getCols();
349 final int rows = img.getRows();
350 final FloatFFT_2D fft = new FloatFFT_2D(rows, cols);
351
352 final float[][] workingSpace = new float[rows][cols * 2];
353 final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands());
354
355 for (int b = 0; b < img.numBands(); b++) {
356 final FImage band = img.bands.get(b);
357
358 final float[][] preparedImage =
359 FourierTransform.prepareData(band.pixels, rows, cols, true);
360 fft.complexForward(preparedImage);
361
362 for (int i = 0; i < nFilters; i++) {
363
364 FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols);
365
366
367 ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height
368 - 2
369 * boundaryExtension);
370
371 sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter);
372 }
373 }
374
375 return fv;
376 }
377
378
379
380
381
382
383
384
385
386
387 protected int computeNumberOfSamplingBlocks() {
388 return numberOfBlocks * numberOfBlocks;
389 }
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404 protected void sampleResponses(FImage image, float[] v, int offset) {
405 final int gridWidth = image.width / this.numberOfBlocks;
406 final int gridHeight = image.height / this.numberOfBlocks;
407
408 for (int iy = 0; iy < this.numberOfBlocks; iy++) {
409 final int starty = gridHeight * iy;
410 final int stopy = Math.min(starty + gridHeight, image.height);
411
412 for (int ix = 0; ix < this.numberOfBlocks; ix++) {
413 final int startx = gridWidth * ix;
414 final int stopx = Math.min(startx + gridWidth, image.width);
415
416 float avg = 0;
417 for (int y = starty; y < stopy; y++) {
418 for (int x = startx; x < stopx; x++) {
419 avg += image.pixels[y][x];
420 }
421 }
422 avg /= ((stopx - startx) * (stopy - starty));
423
424
425
426 v[offset + iy + ix * this.numberOfBlocks] = avg;
427 }
428 }
429 }
430
431
432
433
434
435 private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft,
436 int rows, int cols)
437 {
438 final float[][] preparedKernel = filterfft.pixels;
439
440 for (int y = 0; y < rows; y++) {
441 for (int x = 0; x < cols; x++) {
442 final float reImage = preparedImage[y][x * 2];
443 final float imImage = preparedImage[y][1 + x * 2];
444
445 final float reKernel = preparedKernel[y][x * 2];
446 final float imKernel = preparedKernel[y][1 + x * 2];
447
448 final float re = reImage * reKernel - imImage * imKernel;
449 final float im = reImage * imKernel + imImage * reKernel;
450
451 workingSpace[y][x * 2] = re;
452 workingSpace[y][1 + x * 2] = im;
453 }
454 }
455
456 fft.complexInverse(workingSpace, true);
457
458 final FImage out = new FImage(cols, rows);
459 for (int r = 0; r < rows; r++) {
460 for (int c = 0; c < cols; c++) {
461 out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2]
462 + workingSpace[r][1 + c * 2]
463 * workingSpace[r][1 + c * 2]);
464 }
465 }
466 return out;
467 }
468
469
470
471
472
473
474
475
476
477
478
479 public MBFImage visualiseDescriptor(int height) {
480 final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length);
481
482 final FImage[] G = new FImage[this.gaborFilters.length];
483 for (int i = 0; i < gaborFilters.length; i++) {
484 G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height);
485 FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false);
486 G[i] = ResizeProcessor.halfSize(G[i]);
487 G[i].addInplace(G[i].clone().flipY().flipX());
488 }
489
490 final int blocksPerFilter = computeNumberOfSamplingBlocks();
491 final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
492 final int nBands = this.response.values.length / nFeaturesPerBand;
493
494 final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB);
495 output.fill(RGBColour.WHITE);
496
497 for (int b = 0; b < nBands; b++) {
498 float max = 0;
499 final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks];
500 for (int y = 0, k = 0; y < numberOfBlocks; y++) {
501 for (int x = 0; x < numberOfBlocks; x++, k++) {
502 blockImages[k] = new MBFImage(G[0].width, G[0].height, 3);
503
504 for (int s = 0, j = 0; s < orientationsPerScale.length; s++) {
505 for (int i = 0; i < orientationsPerScale[s]; i++, j++) {
506 final MBFImage col = new MBFImage(G[0].width, G[0].height, 3);
507 col.fill(C[s]).multiplyInplace(
508 this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]);
509
510 blockImages[k].addInplace(G[j].toRGB().multiply(col));
511 }
512 }
513
514 for (int i = 0; i < blockImages[k].numBands(); i++)
515 max = Math.max(max, blockImages[k].bands.get(i).max());
516 }
517 }
518
519 final int ts = (height / 4);
520 final ResizeProcessor rp = new ResizeProcessor(ts, ts, true);
521 for (int y = 0, k = 0; y < numberOfBlocks; y++) {
522 for (int x = 0; x < numberOfBlocks; x++, k++) {
523 blockImages[k].divideInplace(max);
524 final MBFImage tmp = blockImages[k].process(rp);
525 tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE);
526 tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE);
527 tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE);
528 tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE);
529 output.drawImage(tmp, x * ts + b * height, y * ts);
530 }
531 }
532 }
533
534 return output;
535 }
536
537
538
539
540
541
542
543 public FloatFV getResponse() {
544 return response;
545 }
546 }