001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.local.aggregate; 031 032import java.util.List; 033 034import org.openimaj.citation.annotation.Reference; 035import org.openimaj.citation.annotation.ReferenceType; 036import org.openimaj.citation.annotation.References; 037import org.openimaj.feature.ArrayFeatureVector; 038import org.openimaj.feature.FloatFV; 039import org.openimaj.feature.local.LocalFeature; 040import org.openimaj.math.statistics.distribution.MixtureOfGaussians; 041import org.openimaj.math.statistics.distribution.MultivariateGaussian; 042import org.openimaj.ml.gmm.GaussianMixtureModelEM; 043import org.openimaj.ml.gmm.GaussianMixtureModelEM.CovarianceType; 044 045/** 046 * Implementation of the Fisher Vector (FV) encoding scheme. FV provides a way 047 * of encoding a set of vectors (e.g. local features) as a single vector that 048 * encapsulates the first and second order residuals of the vectors from a 049 * gaussian mixture model. 050 * <p> 051 * The dimensionality of the output vector is 2*K*D where K is the number of 052 * Gaussians in the mixture, and D is the descriptor dimensionality. Note that 053 * only the diagonal values of the gaussian covariance matrices are used, and 054 * thus you probably want to learn a {@link CovarianceType#Diagonal} or 055 * {@link CovarianceType#Spherical} type gaussian with the 056 * {@link GaussianMixtureModelEM} class. 057 * 058 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 059 * 060 * @param <T> 061 * Primitive array type of the {@link ArrayFeatureVector}s used by 062 * the {@link LocalFeature}s that will be processed. 063 */ 064@References( 065 references = { 066 @Reference( 067 type = ReferenceType.Inproceedings, 068 author = { "Perronnin, F.", "Dance, C." }, 069 title = "Fisher Kernels on Visual Vocabularies for Image Categorization", 070 year = "2007", 071 booktitle = "Computer Vision and Pattern Recognition, 2007. CVPR '07. IEEE Conference on", 072 pages = { "1", "8" }, 073 customData = { 074 "keywords", 075 "Gaussian processes;gradient methods;image classification;Fisher kernels;Gaussian mixture model;generative probability model;gradient vector;image categorization;pattern classification;visual vocabularies;Character generation;Feeds;Image databases;Kernel;Pattern classification;Power generation;Signal generators;Spatial databases;Visual databases;Vocabulary", 076 "doi", "10.1109/CVPR.2007.383266", 077 "ISSN", "1063-6919" 078 } 079 ), 080 @Reference( 081 type = ReferenceType.Inproceedings, 082 author = { "Perronnin, Florent", "S\'{a}nchez, Jorge", "Mensink, Thomas" }, 083 title = "Improving the Fisher Kernel for Large-scale Image Classification", 084 year = "2010", 085 booktitle = "Proceedings of the 11th European Conference on Computer Vision: Part IV", 086 pages = { "143", "", "156" }, 087 url = "http://dl.acm.org/citation.cfm?id=1888089.1888101", 088 publisher = "Springer-Verlag", 089 series = "ECCV'10", 090 customData = { 091 "isbn", "3-642-15560-X, 978-3-642-15560-4", 092 "location", "Heraklion, Crete, Greece", 093 "numpages", "14", 094 "acmid", "1888101", 095 "address", "Berlin, Heidelberg" 096 } 097 ) 098 }) 099public class FisherVector<T> implements VectorAggregator<ArrayFeatureVector<T>, FloatFV> { 100 private MixtureOfGaussians gmm; 101 private boolean hellinger; 102 private boolean l2normalise; 103 104 /** 105 * Construct with the given mixture of Gaussians and optional improvement 106 * steps. The covariance matrices of the gaussians are all assumed to be 107 * diagonal, and will be treated as such; any non-zero off-diagonal values 108 * will be completely ignored. 109 * 110 * @param gmm 111 * the mixture of gaussians 112 * @param hellinger 113 * if true then use Hellinger's kernel rather than the linear one 114 * by signed square rooting the values in the final vector 115 * @param l2normalise 116 * if true then apply l2 normalisation to the final vector. This 117 * occurs after the Hellinger step if it is used. 118 */ 119 public FisherVector(MixtureOfGaussians gmm, boolean hellinger, boolean l2normalise) { 120 this.gmm = gmm; 121 this.hellinger = hellinger; 122 this.l2normalise = l2normalise; 123 } 124 125 /** 126 * Construct the standard Fisher Vector encoder with the given mixture of 127 * Gaussians. The covariance matrices of the gaussians are all assumed to be 128 * diagonal, and will be treated as such; any non-zero off-diagonal values 129 * will be completely ignored. 130 * 131 * @param gmm 132 * the mixture of gaussians 133 */ 134 public FisherVector(MixtureOfGaussians gmm) { 135 this(gmm, false); 136 } 137 138 /** 139 * Construct the Fisher Vector encoder with the given mixture of Gaussians 140 * and the optional improvement steps (in the sense of the VLFeat 141 * documentation). The covariance matrices of the gaussians are all assumed 142 * to be diagonal, and will be treated as such; any non-zero off-diagonal 143 * values will be completely ignored. For the improved version, the final 144 * vector is projected into Hellinger's kernel and then l2 normalised. 145 * 146 * @param gmm 147 * the mixture of gaussians 148 * @param improved 149 * if true then Hellinger's kernel is used, and the vector is l2 150 * normalised. 151 */ 152 public FisherVector(MixtureOfGaussians gmm, boolean improved) { 153 this(gmm, improved, improved); 154 } 155 156 @Override 157 public FloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) { 158 if (features == null || features.size() <= 0) 159 return null; 160 161 final int K = this.gmm.gaussians.length; 162 final int D = features.get(0).getFeatureVector().length(); 163 164 final float[] vector = new float[2 * K * D]; 165 166 // cache all the features in an array 167 final double[][] X = new double[features.size()][]; 168 for (int i = 0; i < X.length; i++) { 169 final LocalFeature<?, ? extends ArrayFeatureVector<T>> f = features.get(i); 170 X[i] = f.getFeatureVector().asDoubleVector(); 171 } 172 173 return computeFisherVector(features.size(), K, D, vector, X); 174 } 175 176 @Override 177 public FloatFV aggregateVectors(List<? extends ArrayFeatureVector<T>> features) { 178 if (features == null || features.size() <= 0) 179 return null; 180 181 final int K = this.gmm.gaussians.length; 182 final int D = features.get(0).length(); 183 184 final float[] vector = new float[2 * K * D]; 185 186 // cache all the features in an array 187 final double[][] X = new double[features.size()][]; 188 for (int i = 0; i < X.length; i++) { 189 final ArrayFeatureVector<T> f = features.get(i); 190 X[i] = f.asDoubleVector(); 191 } 192 193 return computeFisherVector(features.size(), K, D, vector, X); 194 } 195 196 private FloatFV computeFisherVector(int nFeatures, final int K, 197 final int D, final float[] vector, final double[][] X) 198 { 199 // compute posterior probabilities of all features at once (more 200 // efficient than 201 // doing it for each one at a time) 202 final double[][] posteriors = gmm.scoreSamples(X).secondObject(); 203 204 for (int p = 0; p < X.length; p++) { 205 final double[] xp = X[p]; 206 207 for (int k = 0; k < K; k++) { 208 final double apk = posteriors[p][k]; 209 210 if (apk < 1e-6) 211 continue; // speed-up: ignore really small terms... 212 213 final MultivariateGaussian gauss = gmm.gaussians[k]; 214 final double[] mean = gauss.getMean().getArray()[0]; 215 216 for (int j = 0; j < D; j++) { 217 final double var = gauss.getCovariance(j, j); 218 final double diff = (xp[j] - mean[j]) / Math.sqrt(var); 219 220 vector[k * 2 * D + j] += apk * diff; 221 vector[k * 2 * D + j + D] += apk * ((diff * diff) - 1); 222 } 223 } 224 } 225 226 for (int k = 0; k < K; k++) { 227 final double wt1 = 1.0 / (nFeatures * Math.sqrt(gmm.weights[k])); 228 final double wt2 = 1.0 / (nFeatures * Math.sqrt(2 * gmm.weights[k])); 229 230 for (int j = 0; j < D; j++) { 231 vector[k * 2 * D + j] *= wt1; 232 vector[k * 2 * D + j + D] *= wt2; 233 } 234 } 235 236 final FloatFV out = new FloatFV(vector); 237 238 if (hellinger) { 239 for (int i = 0; i < out.values.length; i++) { 240 out.values[i] = (float) (out.values[i] > 0 ? Math.sqrt(out.values[i]) : 241 -1 * Math.sqrt(-1 * out.values[i])); 242 } 243 } 244 245 if (l2normalise) { 246 // l2 norm 247 double sumsq = 0; 248 for (int i = 0; i < out.values.length; i++) { 249 sumsq += (out.values[i] * out.values[i]); 250 } 251 final float norm = (float) (1.0 / Math.sqrt(sumsq)); 252 for (int i = 0; i < out.values.length; i++) { 253 out.values[i] *= norm; 254 } 255 } 256 return out; 257 } 258}