001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.global;
031
032import org.openimaj.citation.annotation.Reference;
033import org.openimaj.citation.annotation.ReferenceType;
034import org.openimaj.feature.FloatFV;
035import org.openimaj.image.FImage;
036import org.openimaj.image.Image;
037import org.openimaj.image.MBFImage;
038import org.openimaj.image.analyser.ImageAnalyser;
039import org.openimaj.image.colour.ColourMap;
040import org.openimaj.image.colour.ColourSpace;
041import org.openimaj.image.colour.RGBColour;
042import org.openimaj.image.processing.algorithm.FourierTransform;
043import org.openimaj.image.processing.convolution.FourierConvolve;
044import org.openimaj.image.processing.convolution.GaborFilters;
045import org.openimaj.image.processing.resize.ResizeProcessor;
046import org.openimaj.image.processor.SinglebandImageProcessor;
047
048import edu.emory.mathcs.jtransforms.fft.FloatFFT_2D;
049
050/**
051 * Implementation of the "Gist" spatial envelope feature. Based on (and tested
052 * against) the original Matlab implementation from <a
053 * href="http://people.csail.mit.edu/torralba/code/spatialenvelope/"
054 * >http://people.csail.mit.edu/torralba/code/spatialenvelope/</a>, and designed
055 * to produce comparable features.
056 * <p>
057 * The Gist or Spatial Envelope is a very low dimensional representation of the
058 * scene. Gist encodes a set of perceptual dimensions (naturalness, openness,
059 * roughness, expansion, ruggedness) that represents the dominant spatial
060 * structure of a scene. These perceptual dimensions are reliably estimated
061 * using coarsely localized spectral information from the image.
062 * <p>
063 * <b>Implementation notes:</b> This class supports both fixed and variable size
064 * Gist. In the fixed size mode, the image is resized and centre-cropped to a
065 * pre-defined size (allowing the Gabor filters to be pre-computed and reused).
066 * In the variable size mode the Gabor filters will be re-computed as necessary
067 * if the image size changes. For computing features to compare images, fixed
068 * size mode makes more sense.
069 * <p>
070 * <b>Example usage for image comparison:</b><br>
071 * </br> <code>
072 * <pre>
073 * FImage i1, i2 = ...
074 * Gist<FImage> fsg = new Gist<FImage>(256, 256);
075 *
076 * fsg.analyseImage(i1);
077 * FloatFV f1 = fsg.getResponse();
078 *
079 * fsg.analyseImage(i2);
080 * FloatFV f2 = fsg.getResponse();
081 *
082 * double d = FloatFVComparison.SUM_SQUARE.compare(f1, f2);
083 * </pre>
084 * </code>
085 * <p>
086 * The ability to generate a visualisation of the current descriptor in the same
087 * way as provided by the original Matlab code is given by the
088 * {@link #visualiseDescriptor(int)} method.
089 *
090 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
091 *
092 * @param <IMAGE>
093 *            The type of image. {@link MBFImage} and {@link FImage} are
094 *            supported; other types might not work.
095 */
096@Reference(
097                type = ReferenceType.Article,
098                author = { "Oliva, Aude", "Torralba, Antonio" },
099                title = "Modeling the Shape of the Scene: A Holistic Representation of the Spatial Envelope",
100                year = "2001",
101                journal = "Int. J. Comput. Vision",
102                pages = { "145", "", "175" },
103                url = "http://dx.doi.org/10.1023/A:1011139631724",
104                month = "May",
105                number = "3",
106                publisher = "Kluwer Academic Publishers",
107                volume = "42",
108                customData = {
109                                "issn", "0920-5691",
110                                "numpages", "31",
111                                "doi", "10.1023/A:1011139631724",
112                                "acmid", "598462",
113                                "address", "Hingham, MA, USA",
114                                "keywords", "energy spectrum, natural images, principal components, scene recognition, spatial layout"
115                })
116public class Gist<IMAGE extends Image<?, IMAGE> & SinglebandImageProcessor.Processable<Float, FImage, IMAGE>>
117implements
118ImageAnalyser<IMAGE>
119{
120        /**
121         * The default number of filter orientations per scale (from HF to LF)
122         */
123        public static final int[] DEFAULT_ORIENTATIONS_PER_SCALE = { 8, 8, 8, 8 };
124
125        /**
126         * The default number spatial blocks
127         */
128        public static final int DEFAULT_NUMBER_OF_BLOCKS = 4;
129
130        /**
131         * The default number of cycles per image for the pre-filter Gaussian
132         */
133        public static final int DEFAULT_PREFILTER_FC = 4;
134
135        /**
136         * The default amount of padding to apply before convolving with the Gabor
137         * filters
138         */
139        public static final int DEFAULT_BOUNDARY_EXTENSION = 32;
140
141        /**
142         * The number of blocks in each direction
143         */
144        public int numberOfBlocks = DEFAULT_NUMBER_OF_BLOCKS;
145
146        /**
147         * The number of cycles per image for the pre-filter Gaussian
148         */
149        public int prefilterFC = DEFAULT_PREFILTER_FC;
150
151        /**
152         * The amount of padding to add before convolving with the Gabor functions
153         */
154        public int boundaryExtension = DEFAULT_BOUNDARY_EXTENSION;
155
156        /**
157         * Default image size (both height and width) for fixed size Gist
158         */
159        public static final int DEFAULT_SIZE = 128;
160
161        protected FImage[] gaborFilters;
162        protected FloatFV response;
163        protected int[] orientationsPerScale;
164        protected boolean fixedSize;
165        protected int imageWidth;
166        protected int imageHeight;
167
168        /**
169         * Construct a fixed size Gist extractor using the default values.
170         */
171        public Gist() {
172                this(true);
173        }
174
175        /**
176         * Construct a variable or fixed size Gist extractor using the default
177         * values.
178         *
179         * @param fixedSize
180         *            if true the images should be resized and cropped
181         */
182        public Gist(boolean fixedSize) {
183                this(DEFAULT_SIZE, DEFAULT_SIZE, DEFAULT_ORIENTATIONS_PER_SCALE, fixedSize);
184        }
185
186        /**
187         * Construct a fixed-size Gist extractor, with the given image size and the
188         * default number of orientations per scale for the Gabor jet.
189         *
190         * @param width
191         *            the image width
192         * @param height
193         *            the image height
194         */
195        public Gist(int width, int height) {
196                this(width, height, DEFAULT_ORIENTATIONS_PER_SCALE);
197        }
198
199        /**
200         * Construct a fixed-size Gist extractor, with the given image size and
201         * number of orientations per scale for the Gabor jet.
202         *
203         * @param width
204         *            the image width
205         * @param height
206         *            the image height
207         * @param orientationsPerScale
208         *            the number of Gabor orientations per scale (from HF to LF)
209         */
210        public Gist(int width, int height, int[] orientationsPerScale) {
211                this(width, height, orientationsPerScale, true);
212        }
213
214        /**
215         * Construct a variable or fixed size Gist extractor with the given number
216         * of orientations per scale for the Gabor jet.
217         *
218         * @param orientationsPerScale
219         *            the number of Gabor orientations per scale (from HF to LF)
220         * @param fixedSize
221         *            if true the images should be resized and cropped
222         */
223        public Gist(int[] orientationsPerScale, boolean fixedSize) {
224                this(DEFAULT_SIZE, DEFAULT_SIZE, orientationsPerScale, fixedSize);
225        }
226
227        /**
228         * Construct a variable or fixed size Gist extractor with the given number
229         * of orientations per scale for the Gabor jet. The given height and width
230         * are ignored in variable size mode.
231         *
232         * @param width
233         *            the image width
234         * @param height
235         *            the image height
236         * @param orientationsPerScale
237         *            the number of Gabor orientations per scale (from HF to LF)
238         * @param fixedSize
239         *            if true the images should be resized and cropped
240         */
241        public Gist(int width, int height, int[] orientationsPerScale, boolean fixedSize) {
242                this.fixedSize = fixedSize;
243                this.orientationsPerScale = orientationsPerScale;
244                this.imageWidth = width;
245                this.imageHeight = height;
246
247                if (fixedSize)
248                        this.gaborFilters = GaborFilters.createGaborJets(width + 2 * this.boundaryExtension, height + 2
249                                        * this.boundaryExtension, orientationsPerScale);
250        }
251
252        @Override
253        public void analyseImage(IMAGE image) {
254                if (fixedSize) {
255                        final double sc = Math.max((double) imageWidth / (double) image.getWidth(), (double) imageHeight
256                                        / (double) image.getHeight());
257
258                        final IMAGE resized = image.process(new ResizeProcessor((float) sc));
259                        final IMAGE roi = resized.extractCenter(imageWidth, imageHeight);
260                        extractGist(roi);
261                } else {
262                        if (gaborFilters == null || gaborFilters[0].width != image.getWidth()
263                                        || gaborFilters[0].height != image.getHeight())
264                        {
265                                gaborFilters = GaborFilters.createGaborJets(image.getWidth() + 2 * this.boundaryExtension,
266                                                image.getHeight() + 2 * this.boundaryExtension, orientationsPerScale);
267                        }
268
269                        extractGist(image.clone()); // clone to stop side effects from
270                        // normalisation further down
271                }
272        }
273
274        protected Gist(int[] orientationsPerScale) {
275                this.orientationsPerScale = orientationsPerScale;
276        }
277
278        protected void extractGist(IMAGE image) {
279                MBFImage mbfimage;
280                if (image instanceof FImage) {
281                        mbfimage = new MBFImage((FImage) image);
282                } else if (image instanceof MBFImage) {
283                        mbfimage = (MBFImage) image;
284                } else {
285                        throw new UnsupportedOperationException("Image type " + image.getClass()
286                                        + " is not currently supported. Please file a bug report.");
287                }
288
289                final MBFImage o = prefilter(mbfimage.normalise());
290                this.response = gistGabor(o);
291        }
292
293        private MBFImage prefilter(MBFImage img) {
294                final int w = 5;
295                final double s1 = this.prefilterFC / Math.sqrt(Math.log(2));
296
297                final int sw = img.getWidth() + 2 * w;
298                final int sh = img.getHeight() + 2 * w;
299                int n = Math.max(sw, sh);
300                n = n + n % 2;
301                img = img.paddingSymmetric(w, w, w + n - sw, w + n - sh);
302
303                final FImage filter = new FImage(2 * n, n);
304                for (int j = 0; j < n; j++) {
305                        final int fy = j - n / 2;
306
307                        for (int i = 0; i < n * 2; i += 2) {
308                                final int fx = (i / 2) - n / 2;
309
310                                filter.pixels[j][i] = (float) Math.exp(-(fx * fx + fy * fy) / (s1 * s1));
311                        }
312                }
313
314                final MBFImage output = new MBFImage();
315                for (int b = 0; b < img.numBands(); b++) {
316                        final FImage band = img.getBand(b);
317                        for (int y = 0; y < band.height; y++) {
318                                for (int x = 0; x < band.width; x++) {
319                                        band.pixels[y][x] = (float) Math.log(1 + band.pixels[y][x] * 255);
320                                }
321                        }
322
323                        output.bands.add(band.subtractInplace(FourierConvolve.convolvePrepared(band, filter, true)));
324                }
325                final FImage mean = output.flatten();
326                final FImage meansq = mean.multiply(mean);
327                final FImage localstd = FourierConvolve.convolvePrepared(meansq, filter, true);
328
329                for (int b = 0; b < img.numBands(); b++) {
330                        final FImage band = output.getBand(b);
331                        for (int y = 0; y < localstd.height; y++)
332                                for (int x = 0; x < localstd.width; x++)
333                                        band.pixels[y][x] = (float) (band.pixels[y][x] / (0.2 + Math
334                                                        .sqrt(Math.abs(localstd.pixels[y][x]))));
335                }
336
337                return output.extractROI(w, w, sw - w - w, sh - w - w);
338        }
339
340        private FloatFV gistGabor(MBFImage img) {
341                final int blocksPerFilter = computeNumberOfSamplingBlocks();
342                final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
343                final int nFilters = this.gaborFilters.length;
344
345                // pad the image
346                img = img.paddingSymmetric(boundaryExtension, boundaryExtension, boundaryExtension, boundaryExtension);
347
348                final int cols = img.getCols();
349                final int rows = img.getRows();
350                final FloatFFT_2D fft = new FloatFFT_2D(rows, cols);
351
352                final float[][] workingSpace = new float[rows][cols * 2];
353                final FloatFV fv = new FloatFV(nFeaturesPerBand * img.numBands());
354
355                for (int b = 0; b < img.numBands(); b++) {
356                        final FImage band = img.bands.get(b);
357
358                        final float[][] preparedImage =
359                                        FourierTransform.prepareData(band.pixels, rows, cols, true);
360                        fft.complexForward(preparedImage);
361
362                        for (int i = 0; i < nFilters; i++) {
363                                // convolve with the filter
364                                FImage ig = performConv(fft, preparedImage, workingSpace, this.gaborFilters[i], rows, cols);
365
366                                // remove padding
367                                ig = ig.extractROI(boundaryExtension, boundaryExtension, band.width - 2 * boundaryExtension, band.height
368                                                - 2
369                                                * boundaryExtension);
370
371                                sampleResponses(ig, fv.values, b * nFeaturesPerBand + i * blocksPerFilter);
372                        }
373                }
374
375                return fv;
376        }
377
378        /**
379         * Compute the number of sampling blocks that are used for every filter. The
380         * default implementation returns {@link #numberOfBlocks}*
381         * {@link #numberOfBlocks}, but can be overridden in combination with
382         * {@link #sampleResponses(FImage, float[], int)} in a subclass to support
383         * different spatial sampling strategies.
384         *
385         * @return the number of sampling blocks per filter.
386         */
387        protected int computeNumberOfSamplingBlocks() {
388                return numberOfBlocks * numberOfBlocks;
389        }
390
391        /**
392         * Sample the average response from each of the blocks in the image and
393         * insert into the vector. This method could be overridden to support
394         * different spatial aggregation strategies (in which case
395         * {@link #computeNumberOfSamplingBlocks()} should also be overridden).
396         *
397         * @param image
398         *            the image to sample
399         * @param v
400         *            the vector to write into
401         * @param offset
402         *            the offset from which to sample
403         */
404        protected void sampleResponses(FImage image, float[] v, int offset) {
405                final int gridWidth = image.width / this.numberOfBlocks;
406                final int gridHeight = image.height / this.numberOfBlocks;
407
408                for (int iy = 0; iy < this.numberOfBlocks; iy++) {
409                        final int starty = gridHeight * iy;
410                        final int stopy = Math.min(starty + gridHeight, image.height);
411
412                        for (int ix = 0; ix < this.numberOfBlocks; ix++) {
413                                final int startx = gridWidth * ix;
414                                final int stopx = Math.min(startx + gridWidth, image.width);
415
416                                float avg = 0;
417                                for (int y = starty; y < stopy; y++) {
418                                        for (int x = startx; x < stopx; x++) {
419                                                avg += image.pixels[y][x];
420                                        }
421                                }
422                                avg /= ((stopx - startx) * (stopy - starty));
423
424                                // note y and x transposed to conform to the matlab
425                                // implementation
426                                v[offset + iy + ix * this.numberOfBlocks] = avg;
427                        }
428                }
429        }
430
431        /*
432         * Perform convolution in the frequency domain an reconstruct the resultant
433         * image as the magnitudes of the complex components from the ifft.
434         */
435        private FImage performConv(FloatFFT_2D fft, float[][] preparedImage, float[][] workingSpace, FImage filterfft,
436                        int rows, int cols)
437        {
438                final float[][] preparedKernel = filterfft.pixels;
439
440                for (int y = 0; y < rows; y++) {
441                        for (int x = 0; x < cols; x++) {
442                                final float reImage = preparedImage[y][x * 2];
443                                final float imImage = preparedImage[y][1 + x * 2];
444
445                                final float reKernel = preparedKernel[y][x * 2];
446                                final float imKernel = preparedKernel[y][1 + x * 2];
447
448                                final float re = reImage * reKernel - imImage * imKernel;
449                                final float im = reImage * imKernel + imImage * reKernel;
450
451                                workingSpace[y][x * 2] = re;
452                                workingSpace[y][1 + x * 2] = im;
453                        }
454                }
455
456                fft.complexInverse(workingSpace, true);
457
458                final FImage out = new FImage(cols, rows);
459                for (int r = 0; r < rows; r++) {
460                        for (int c = 0; c < cols; c++) {
461                                out.pixels[r][c] = (float) Math.sqrt(workingSpace[r][c * 2] * workingSpace[r][c * 2]
462                                                + workingSpace[r][1 + c * 2]
463                                                                * workingSpace[r][1 + c * 2]);
464                        }
465                }
466                return out;
467        }
468
469        /**
470         * Compute the descriptor visualisation in the same form as the original
471         * matlab code. The resultant image illustrates the dominant filter
472         * responses for each spatial bin. The filter scales are drawn with
473         * differing hues, and brightness is proportional to response value.
474         *
475         * @param height
476         *            the desired height of the produced image
477         * @return the visualisation TODO: colour descriptors
478         */
479        public MBFImage visualiseDescriptor(int height) {
480                final Float[][] C = ColourMap.HSV.generateColours(orientationsPerScale.length);
481
482                final FImage[] G = new FImage[this.gaborFilters.length];
483                for (int i = 0; i < gaborFilters.length; i++) {
484                        G[i] = new FImage(this.gaborFilters[i].width / 2, this.gaborFilters[i].height);
485                        FourierTransform.unprepareData(gaborFilters[i].pixels, G[i], false);
486                        G[i] = ResizeProcessor.halfSize(G[i]);
487                        G[i].addInplace(G[i].clone().flipY().flipX());
488                }
489
490                final int blocksPerFilter = computeNumberOfSamplingBlocks();
491                final int nFeaturesPerBand = gaborFilters.length * blocksPerFilter;
492                final int nBands = this.response.values.length / nFeaturesPerBand;
493
494                final MBFImage output = new MBFImage(nBands * height, height, ColourSpace.RGB);
495                output.fill(RGBColour.WHITE);
496
497                for (int b = 0; b < nBands; b++) {
498                        float max = 0;
499                        final MBFImage[] blockImages = new MBFImage[numberOfBlocks * numberOfBlocks];
500                        for (int y = 0, k = 0; y < numberOfBlocks; y++) {
501                                for (int x = 0; x < numberOfBlocks; x++, k++) {
502                                        blockImages[k] = new MBFImage(G[0].width, G[0].height, 3);
503
504                                        for (int s = 0, j = 0; s < orientationsPerScale.length; s++) {
505                                                for (int i = 0; i < orientationsPerScale[s]; i++, j++) {
506                                                        final MBFImage col = new MBFImage(G[0].width, G[0].height, 3);
507                                                        col.fill(C[s]).multiplyInplace(
508                                                                        this.response.values[y + x * numberOfBlocks + j * numberOfBlocks * numberOfBlocks]);
509
510                                                        blockImages[k].addInplace(G[j].toRGB().multiply(col));
511                                                }
512                                        }
513
514                                        for (int i = 0; i < blockImages[k].numBands(); i++)
515                                                max = Math.max(max, blockImages[k].bands.get(i).max());
516                                }
517                        }
518
519                        final int ts = (height / 4);
520                        final ResizeProcessor rp = new ResizeProcessor(ts, ts, true);
521                        for (int y = 0, k = 0; y < numberOfBlocks; y++) {
522                                for (int x = 0; x < numberOfBlocks; x++, k++) {
523                                        blockImages[k].divideInplace(max);
524                                        final MBFImage tmp = blockImages[k].process(rp);
525                                        tmp.drawLine(0, 0, 0, ts - 1, 1, RGBColour.WHITE);
526                                        tmp.drawLine(0, 0, ts - 1, 0, 1, RGBColour.WHITE);
527                                        tmp.drawLine(ts - 1, 0, ts - 1, ts - 1, 1, RGBColour.WHITE);
528                                        tmp.drawLine(0, ts - 1, ts - 1, ts - 1, 1, RGBColour.WHITE);
529                                        output.drawImage(tmp, x * ts + b * height, y * ts);
530                                }
531                        }
532                }
533
534                return output;
535        }
536
537        /**
538         * Get the response vector from the previous call to
539         * {@link #analyseImage(Image)}.
540         *
541         * @return the response
542         */
543        public FloatFV getResponse() {
544                return response;
545        }
546}