001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.local.aggregate;
031
032import java.util.List;
033
034import org.openimaj.citation.annotation.Reference;
035import org.openimaj.citation.annotation.ReferenceType;
036import org.openimaj.citation.annotation.References;
037import org.openimaj.feature.ArrayFeatureVector;
038import org.openimaj.feature.FloatFV;
039import org.openimaj.feature.local.LocalFeature;
040import org.openimaj.math.statistics.distribution.MixtureOfGaussians;
041import org.openimaj.math.statistics.distribution.MultivariateGaussian;
042import org.openimaj.ml.gmm.GaussianMixtureModelEM;
043import org.openimaj.ml.gmm.GaussianMixtureModelEM.CovarianceType;
044
045/**
046 * Implementation of the Fisher Vector (FV) encoding scheme. FV provides a way
047 * of encoding a set of vectors (e.g. local features) as a single vector that
048 * encapsulates the first and second order residuals of the vectors from a
049 * gaussian mixture model.
050 * <p>
051 * The dimensionality of the output vector is 2*K*D where K is the number of
052 * Gaussians in the mixture, and D is the descriptor dimensionality. Note that
053 * only the diagonal values of the gaussian covariance matrices are used, and
054 * thus you probably want to learn a {@link CovarianceType#Diagonal} or
055 * {@link CovarianceType#Spherical} type gaussian with the
056 * {@link GaussianMixtureModelEM} class.
057 *
058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
059 *
060 * @param <T>
061 *            Primitive array type of the {@link ArrayFeatureVector}s used by
062 *            the {@link LocalFeature}s that will be processed.
063 */
064@References(
065                references = {
066                                @Reference(
067                                                type = ReferenceType.Inproceedings,
068                                                author = { "Perronnin, F.", "Dance, C." },
069                                                title = "Fisher Kernels on Visual Vocabularies for Image Categorization",
070                                                year = "2007",
071                                                booktitle = "Computer Vision and Pattern Recognition, 2007. CVPR '07. IEEE Conference on",
072                                                pages = { "1", "8" },
073                                                customData = {
074                                                                "keywords",
075                                                                "Gaussian processes;gradient methods;image classification;Fisher kernels;Gaussian mixture model;generative probability model;gradient vector;image categorization;pattern classification;visual vocabularies;Character generation;Feeds;Image databases;Kernel;Pattern classification;Power generation;Signal generators;Spatial databases;Visual databases;Vocabulary",
076                                                                "doi", "10.1109/CVPR.2007.383266",
077                                                                "ISSN", "1063-6919"
078                                                }
079                                                ),
080                                                @Reference(
081                                                                type = ReferenceType.Inproceedings,
082                                                                author = { "Perronnin, Florent", "S\'{a}nchez, Jorge", "Mensink, Thomas" },
083                                                                title = "Improving the Fisher Kernel for Large-scale Image Classification",
084                                                                year = "2010",
085                                                                booktitle = "Proceedings of the 11th European Conference on Computer Vision: Part IV",
086                                                                pages = { "143", "", "156" },
087                                                                url = "http://dl.acm.org/citation.cfm?id=1888089.1888101",
088                                                                publisher = "Springer-Verlag",
089                                                                series = "ECCV'10",
090                                                                customData = {
091                                                                                "isbn", "3-642-15560-X, 978-3-642-15560-4",
092                                                                                "location", "Heraklion, Crete, Greece",
093                                                                                "numpages", "14",
094                                                                                "acmid", "1888101",
095                                                                                "address", "Berlin, Heidelberg"
096                                                                }
097                                                                )
098                })
099public class FisherVector<T> implements VectorAggregator<ArrayFeatureVector<T>, FloatFV> {
100        private MixtureOfGaussians gmm;
101        private boolean hellinger;
102        private boolean l2normalise;
103
104        /**
105         * Construct with the given mixture of Gaussians and optional improvement
106         * steps. The covariance matrices of the gaussians are all assumed to be
107         * diagonal, and will be treated as such; any non-zero off-diagonal values
108         * will be completely ignored.
109         *
110         * @param gmm
111         *            the mixture of gaussians
112         * @param hellinger
113         *            if true then use Hellinger's kernel rather than the linear one
114         *            by signed square rooting the values in the final vector
115         * @param l2normalise
116         *            if true then apply l2 normalisation to the final vector. This
117         *            occurs after the Hellinger step if it is used.
118         */
119        public FisherVector(MixtureOfGaussians gmm, boolean hellinger, boolean l2normalise) {
120                this.gmm = gmm;
121                this.hellinger = hellinger;
122                this.l2normalise = l2normalise;
123        }
124
125        /**
126         * Construct the standard Fisher Vector encoder with the given mixture of
127         * Gaussians. The covariance matrices of the gaussians are all assumed to be
128         * diagonal, and will be treated as such; any non-zero off-diagonal values
129         * will be completely ignored.
130         *
131         * @param gmm
132         *            the mixture of gaussians
133         */
134        public FisherVector(MixtureOfGaussians gmm) {
135                this(gmm, false);
136        }
137
138        /**
139         * Construct the Fisher Vector encoder with the given mixture of Gaussians
140         * and the optional improvement steps (in the sense of the VLFeat
141         * documentation). The covariance matrices of the gaussians are all assumed
142         * to be diagonal, and will be treated as such; any non-zero off-diagonal
143         * values will be completely ignored. For the improved version, the final
144         * vector is projected into Hellinger's kernel and then l2 normalised.
145         *
146         * @param gmm
147         *            the mixture of gaussians
148         * @param improved
149         *            if true then Hellinger's kernel is used, and the vector is l2
150         *            normalised.
151         */
152        public FisherVector(MixtureOfGaussians gmm, boolean improved) {
153                this(gmm, improved, improved);
154        }
155
156        @Override
157        public FloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) {
158                if (features == null || features.size() <= 0)
159                        return null;
160
161                final int K = this.gmm.gaussians.length;
162                final int D = features.get(0).getFeatureVector().length();
163
164                final float[] vector = new float[2 * K * D];
165
166                // cache all the features in an array
167                final double[][] X = new double[features.size()][];
168                for (int i = 0; i < X.length; i++) {
169                        final LocalFeature<?, ? extends ArrayFeatureVector<T>> f = features.get(i);
170                        X[i] = f.getFeatureVector().asDoubleVector();
171                }
172
173                return computeFisherVector(features.size(), K, D, vector, X);
174        }
175
176        @Override
177        public FloatFV aggregateVectors(List<? extends ArrayFeatureVector<T>> features) {
178                if (features == null || features.size() <= 0)
179                        return null;
180
181                final int K = this.gmm.gaussians.length;
182                final int D = features.get(0).length();
183
184                final float[] vector = new float[2 * K * D];
185
186                // cache all the features in an array
187                final double[][] X = new double[features.size()][];
188                for (int i = 0; i < X.length; i++) {
189                        final ArrayFeatureVector<T> f = features.get(i);
190                        X[i] = f.asDoubleVector();
191                }
192
193                return computeFisherVector(features.size(), K, D, vector, X);
194        }
195
196        private FloatFV computeFisherVector(int nFeatures, final int K,
197                        final int D, final float[] vector, final double[][] X)
198        {
199                // compute posterior probabilities of all features at once (more
200                // efficient than
201                // doing it for each one at a time)
202                final double[][] posteriors = gmm.scoreSamples(X).secondObject();
203
204                for (int p = 0; p < X.length; p++) {
205                        final double[] xp = X[p];
206
207                        for (int k = 0; k < K; k++) {
208                                final double apk = posteriors[p][k];
209
210                                if (apk < 1e-6)
211                                        continue; // speed-up: ignore really small terms...
212
213                                final MultivariateGaussian gauss = gmm.gaussians[k];
214                                final double[] mean = gauss.getMean().getArray()[0];
215
216                                for (int j = 0; j < D; j++) {
217                                        final double var = gauss.getCovariance(j, j);
218                                        final double diff = (xp[j] - mean[j]) / Math.sqrt(var);
219
220                                        vector[k * 2 * D + j] += apk * diff;
221                                        vector[k * 2 * D + j + D] += apk * ((diff * diff) - 1);
222                                }
223                        }
224                }
225
226                for (int k = 0; k < K; k++) {
227                        final double wt1 = 1.0 / (nFeatures * Math.sqrt(gmm.weights[k]));
228                        final double wt2 = 1.0 / (nFeatures * Math.sqrt(2 * gmm.weights[k]));
229
230                        for (int j = 0; j < D; j++) {
231                                vector[k * 2 * D + j] *= wt1;
232                                vector[k * 2 * D + j + D] *= wt2;
233                        }
234                }
235
236                final FloatFV out = new FloatFV(vector);
237
238                if (hellinger) {
239                        for (int i = 0; i < out.values.length; i++) {
240                                out.values[i] = (float) (out.values[i] > 0 ? Math.sqrt(out.values[i]) :
241                                        -1 * Math.sqrt(-1 * out.values[i]));
242                        }
243                }
244
245                if (l2normalise) {
246                        // l2 norm
247                        double sumsq = 0;
248                        for (int i = 0; i < out.values.length; i++) {
249                                sumsq += (out.values[i] * out.values[i]);
250                        }
251                        final float norm = (float) (1.0 / Math.sqrt(sumsq));
252                        for (int i = 0; i < out.values.length; i++) {
253                                out.values[i] *= norm;
254                        }
255                }
256                return out;
257        }
258}