001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.image.feature.local.aggregate; 031 032import java.lang.reflect.Array; 033import java.util.List; 034 035import org.openimaj.citation.annotation.Reference; 036import org.openimaj.citation.annotation.ReferenceType; 037import org.openimaj.feature.ArrayFeatureVector; 038import org.openimaj.feature.MultidimensionalFloatFV; 039import org.openimaj.feature.local.LocalFeature; 040import org.openimaj.image.FImage; 041import org.openimaj.ml.clustering.CentroidsProvider; 042import org.openimaj.ml.clustering.assignment.HardAssigner; 043 044/** 045 * Implementation of VLAD, the "Vector of Locally Aggregated Descriptors" 046 * algorithm. VLAD aggregates descriptors into a vector form based on locality 047 * in feature space. Like in the {@link BagOfVisualWords}, features are assigned 048 * to centroids in a codebook (usually learnt through k-means), but instead of 049 * creating a vector of codeword occurances, VLAD accumulates the residual 050 * vector between each input vector and its assigned centroid. Fundamentally, 051 * VLAD describes the distribution of the image vectors with respect to their 052 * assigned centroids. VLAD is closely related to the more complex 053 * {@link FisherVector}. 054 * <p> 055 * For a given number of centroids, K, and vector length, D, the VLAD descriptor 056 * has dimension K*D. This is obviously longer than the K-dimensional vector 057 * produced by {@link BagOfVisualWords}. However, the VLAD descriptor is can be 058 * useful with a much smaller K (i.e. of the order of 16-64 dimensions versus up 059 * to 1 million (or more) for {@link BagOfVisualWords}). 060 * 061 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 062 * 063 * @param <T> 064 * Primitive array type of the {@link ArrayFeatureVector}s used by 065 * the {@link LocalFeature}s that will be processed. 066 */ 067@Reference( 068 type = ReferenceType.Inproceedings, 069 author = { "Jegou, H.", "Douze, M.", "Schmid, C.", "Perez, P." }, 070 title = "Aggregating local descriptors into a compact image representation", 071 year = "2010", 072 booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on", 073 pages = { "3304 ", "3311" }, 074 month = "june", 075 customData = { 076 "doi", "10.1109/CVPR.2010.5540039", 077 "ISSN", "1063-6919" 078 }) 079public class VLAD<T> implements VectorAggregator<ArrayFeatureVector<T>, MultidimensionalFloatFV> { 080 private HardAssigner<T, ?, ?> assigner; 081 private T[] centroids; 082 private boolean normalise; 083 084 /** 085 * Construct with the given assigner and the centroids associated with the 086 * assigner. 087 * 088 * @param assigner 089 * the assigner 090 * @param centroids 091 * the centroids associated with the assigner 092 * @param normalise 093 * if true then output feature is l2 normalised 094 */ 095 public VLAD(HardAssigner<T, ?, ?> assigner, T[] centroids, boolean normalise) { 096 this.assigner = assigner; 097 this.centroids = centroids; 098 this.normalise = normalise; 099 } 100 101 /** 102 * Construct with the given assigner and the centroids associated with the 103 * assigner. 104 * 105 * @param assigner 106 * the assigner 107 * @param centroids 108 * the centroids associated with the assigner 109 * @param normalise 110 * if true then output feature is l2 normalised 111 */ 112 public VLAD(HardAssigner<T, ?, ?> assigner, CentroidsProvider<T> centroids, boolean normalise) { 113 this(assigner, centroids.getCentroids(), normalise); 114 } 115 116 @Override 117 public MultidimensionalFloatFV aggregate(List<? extends LocalFeature<?, ? extends ArrayFeatureVector<T>>> features) { 118 if (features == null || features.size() <= 0) 119 return null; 120 121 final int K = this.centroids.length; 122 final int D = features.get(0).getFeatureVector().length(); 123 124 final float[][] vector = new float[K][D]; 125 126 for (final LocalFeature<?, ? extends ArrayFeatureVector<T>> f : features) { 127 final T x = f.getFeatureVector().values; 128 final int i = assigner.assign(x); 129 130 for (int j = 0; j < D; j++) { 131 vector[i][j] += (float) (Array.getDouble(x, j) - Array.getDouble(centroids[i], j)); 132 } 133 } 134 135 return prepareOutput(vector); 136 } 137 138 @Override 139 public MultidimensionalFloatFV aggregateVectors(List<? extends ArrayFeatureVector<T>> features) { 140 if (features == null || features.size() <= 0) 141 return null; 142 143 final int K = this.centroids.length; 144 final int D = features.get(0).length(); 145 146 final float[][] vector = new float[K][D]; 147 148 for (final ArrayFeatureVector<T> f : features) { 149 final T x = f.values; 150 final int i = assigner.assign(x); 151 152 for (int j = 0; j < D; j++) { 153 vector[i][j] += (float) (Array.getDouble(x, j) - Array.getDouble(centroids[i], j)); 154 } 155 } 156 157 return prepareOutput(vector); 158 } 159 160 private MultidimensionalFloatFV prepareOutput(final float[][] vector) { 161 final MultidimensionalFloatFV out = new MultidimensionalFloatFV(vector); 162 163 if (normalise) { 164 // l2 norm 165 double sumsq = 0; 166 for (int i = 0; i < out.values.length; i++) { 167 sumsq += (out.values[i] * out.values[i]); 168 } 169 final float norm = (float) (1.0 / Math.sqrt(sumsq)); 170 for (int i = 0; i < out.values.length; i++) { 171 out.values[i] *= norm; 172 } 173 } 174 175 return out; 176 } 177 178 /** 179 * Generate a visualisation of the feature. This is primarily designed for 180 * descriptors that originated from the aggregation of SIFT. 181 * 182 * @param descr 183 * the feature 184 * @param nterms 185 * the number of centroids used to create the feature 186 * @param nSpatialBins 187 * the number of spatial bins used in the feature 188 * @param nOriBins 189 * the number of orientation bins 190 * @return an image depicting the feature 191 */ 192 public static FImage drawDescriptor(float[] descr, int nterms, int nSpatialBins, int nOriBins) { 193 final FImage image = new FImage(40 * nterms, 40); 194 195 for (int z = 0; z < nterms; z++) { 196 for (int y = 0; y < nSpatialBins; y++) { 197 for (int x = 0; x < nSpatialBins; x++) { 198 for (int i = 0; i < nOriBins; i++) { 199 final int xx = 5 + (10 * x); 200 final int yy = 5 + (10 * y); 201 final int length = (int) (descr[(4 * 4 * 8 * z) + (4 * 4 * y) + (4 * x) + i] * 100); 202 final double theta = i * 2 * Math.PI / 8; 203 image.drawLine(xx + 40 * z, yy, theta, length, 1F); 204 } 205 } 206 } 207 } 208 209 return image; 210 } 211}