001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.image.feature.local.aggregate;
031
032import java.lang.reflect.Array;
033import java.util.List;
034
035import org.openimaj.citation.annotation.Reference;
036import org.openimaj.citation.annotation.ReferenceType;
037import org.openimaj.feature.ArrayFeatureVector;
038import org.openimaj.feature.MultidimensionalFloatFV;
039import org.openimaj.feature.local.LocalFeature;
040import org.openimaj.image.FImage;
041import org.openimaj.ml.clustering.CentroidsProvider;
042import org.openimaj.ml.clustering.assignment.HardAssigner;
043
044/**
045 * Implementation of VLAD, the "Vector of Locally Aggregated Descriptors"
046 * algorithm. VLAD aggregates descriptors into a vector form based on locality
047 * in feature space. Like in the {@link BagOfVisualWords}, features are assigned
048 * to centroids in a codebook (usually learnt through k-means), but instead of
049 * creating a vector of codeword occurances, VLAD accumulates the residual
050 * vector between each input vector and its assigned centroid. Fundamentally,
051 * VLAD describes the distribution of the image vectors with respect to their
052 * assigned centroids. VLAD is closely related to the more complex
053 * {@link FisherVector}.
054 * <p>
055 * For a given number of centroids, K, and vector length, D, the VLAD descriptor
056 * has dimension K*D. This is obviously longer than the K-dimensional vector
057 * produced by {@link BagOfVisualWords}. However, the VLAD descriptor is can be
058 * useful with a much smaller K (i.e. of the order of 16-64 dimensions versus up
059 * to 1 million (or more) for {@link BagOfVisualWords}).
060 * 
061 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
062 * 
063 * @param <T>
064 *            Primitive array type of the {@link ArrayFeatureVector}s used by
065 *            the {@link LocalFeature}s that will be processed.
066 */
067@Reference(
068                type = ReferenceType.Inproceedings,
069                author = { "Jegou, H.", "Douze, M.", "Schmid, C.", "Perez, P." },
070                title = "Aggregating local descriptors into a compact image representation",
071                year = "2010",
072                booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on",
073                pages = { "3304 ", "3311" },
074                month = "june",
075                customData = {
076                                "doi", "10.1109/CVPR.2010.5540039",
077                                "ISSN", "1063-6919"
078                })
079public class VLAD<T> implements VectorAggregator<ArrayFeatureVector<T>, MultidimensionalFloatFV> {
080        private HardAssigner<T, ?, ?> assigner;
081        private T[] centroids;
082        private boolean normalise;
083
084        /**
085         * Construct with the given assigner and the centroids associated with the
086         * assigner.
087         * 
088         * @param assigner
089         *            the assigner
090         * @param centroids
091         *            the centroids associated with the assigner
092         * @param normalise
093         *            if true then output feature is l2 normalised
094         */
095        public VLAD(HardAssigner<T, ?, ?> assigner, T[] centroids, boolean normalise) {
096                this.assigner = assigner;
097                this.centroids = centroids;
098                this.normalise = normalise;
099        }
100
101        /**
102         * Construct with the given assigner and the centroids associated with the
103         * assigner.
104         * 
105         * @param assigner
106         *            the assigner
107         * @param centroids
108         *            the centroids associated with the assigner
109         * @param normalise
110         *            if true then output feature is l2 normalised
111         */
112        public VLAD(HardAssigner<T, ?, ?> assigner, CentroidsProvider<T> centroids, boolean normalise) {
113                this(assigner, centroids.getCentroids(), normalise);
114        }
115
116        @Override
117        public MultidimensionalFloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) {
118                if (features == null || features.size() <= 0)
119                        return null;
120
121                final int K = this.centroids.length;
122                final int D = features.get(0).getFeatureVector().length();
123
124                final float[][] vector = new float[K][D];
125
126                for (final LocalFeature<?, ? extends ArrayFeatureVector<T>> f : features) {
127                        final T x = f.getFeatureVector().values;
128                        final int i = assigner.assign(x);
129
130                        for (int j = 0; j < D; j++) {
131                                vector[i][j] += (float) (Array.getDouble(x, j) - Array.getDouble(centroids[i], j));
132                        }
133                }
134
135                return prepareOutput(vector);
136        }
137
138        @Override
139        public MultidimensionalFloatFV aggregateVectors(List<? extends ArrayFeatureVector<T>> features) {
140                if (features == null || features.size() <= 0)
141                        return null;
142
143                final int K = this.centroids.length;
144                final int D = features.get(0).length();
145
146                final float[][] vector = new float[K][D];
147
148                for (final ArrayFeatureVector<T> f : features) {
149                        final T x = f.values;
150                        final int i = assigner.assign(x);
151
152                        for (int j = 0; j < D; j++) {
153                                vector[i][j] += (float) (Array.getDouble(x, j) - Array.getDouble(centroids[i], j));
154                        }
155                }
156
157                return prepareOutput(vector);
158        }
159
160        private MultidimensionalFloatFV prepareOutput(final float[][] vector) {
161                final MultidimensionalFloatFV out = new MultidimensionalFloatFV(vector);
162
163                if (normalise) {
164                        // l2 norm
165                        double sumsq = 0;
166                        for (int i = 0; i < out.values.length; i++) {
167                                sumsq += (out.values[i] * out.values[i]);
168                        }
169                        final float norm = (float) (1.0 / Math.sqrt(sumsq));
170                        for (int i = 0; i < out.values.length; i++) {
171                                out.values[i] *= norm;
172                        }
173                }
174
175                return out;
176        }
177
178        /**
179         * Generate a visualisation of the feature. This is primarily designed for
180         * descriptors that originated from the aggregation of SIFT.
181         * 
182         * @param descr
183         *            the feature
184         * @param nterms
185         *            the number of centroids used to create the feature
186         * @param nSpatialBins
187         *            the number of spatial bins used in the feature
188         * @param nOriBins
189         *            the number of orientation bins
190         * @return an image depicting the feature
191         */
192        public static FImage drawDescriptor(float[] descr, int nterms, int nSpatialBins, int nOriBins) {
193                final FImage image = new FImage(40 * nterms, 40);
194
195                for (int z = 0; z < nterms; z++) {
196                        for (int y = 0; y < nSpatialBins; y++) {
197                                for (int x = 0; x < nSpatialBins; x++) {
198                                        for (int i = 0; i < nOriBins; i++) {
199                                                final int xx = 5 + (10 * x);
200                                                final int yy = 5 + (10 * y);
201                                                final int length = (int) (descr[(4 * 4 * 8 * z) + (4 * 4 * y) + (4 * x) + i] * 100);
202                                                final double theta = i * 2 * Math.PI / 8;
203                                                image.drawLine(xx + 40 * z, yy, theta, length, 1F);
204                                        }
205                                }
206                        }
207                }
208
209                return image;
210        }
211}