001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/core/core-feature/src/main/jtemp/org/openimaj/feature/Sparse#T#FVComparison.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034package org.openimaj.feature; 035 036import gnu.trove.set.hash.TByteHashSet; 037 038import org.openimaj.math.util.distance.HammingUtils; 039import org.openimaj.util.array.SparseByteArray; 040 041/** 042 * Comparison/distance methods for DoubleFV objects. 043 * 044 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 045 */ 046public enum SparseByteFVComparison implements FVComparator<SparseByteFV> { 047 /** 048 * Euclidean distance 049 * d(H1,H2) = Math.sqrt( sumI( (H1(I)-H2(I))^2 ) ) 050 */ 051 EUCLIDEAN(true) { 052 @Override 053 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 054 if (h1.length != h2.length) 055 throw new IllegalArgumentException("Vectors have differing lengths"); 056 057 double d = 0; 058 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 059 double diff = e.value - e.otherValue; 060 d += (diff * diff); 061 } 062 063 return Math.sqrt(d); 064 } 065 }, 066 /** 067 * Correlation 068 * 069 * d(H1,H2) = sumI( H'1(I) * H'2(I) ) / sqrt( sumI[H'1(I)2]^2 * sumI[H'2(I)^2] ) 070 * where 071 * H'k(I) = Hk(I) - (1/N) * sumJ( Hk(J) ); N=number of FeatureVector bins 072 * 073 */ 074 CORRELATION(false) { 075 @Override 076 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 077 if (h1.length != h2.length) 078 throw new IllegalArgumentException("Vectors have differing lengths"); 079 080 double N = h1.length; 081 double SH1=0, SH2=0; 082 083 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 084 SH1 += e.value; 085 SH2 += e.otherValue; 086 } 087 SH1 /= N; 088 SH2 /= N; 089 090 double d = 0; 091 double SH1S = 0; 092 double SH2S = 0; 093 094 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 095 double h1prime = e.value - SH1; 096 double h2prime = e.otherValue - SH2; 097 098 d += (h1prime * h2prime); 099 SH1S += (h1prime * h1prime); 100 SH2S += (h2prime * h2prime); 101 } 102 103 return d / Math.sqrt(SH1S * SH2S); 104 } 105 }, 106 /** 107 * Chi-squared distance 108 * d(H1,H2) = 0.5 * sumI[(H1(I)-H2(I))^2 / (H1(I)+H2(I))] 109 */ 110 CHI_SQUARE(true) { 111 @Override 112 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 113 if (h1.length != h2.length) 114 throw new IllegalArgumentException("Vectors have differing lengths"); 115 116 double d = 0; 117 118 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 119 double a = e.value - e.otherValue; 120 double b = e.value + e.otherValue; 121 122 if (Math.abs(b) > 0) d += a*a/b; 123 } 124 125 return d / 2; 126 } 127 }, 128 /** 129 * Histogram intersection; assumes all values > 0. 130 * d(H1,H2) = sumI( min(H1(I), H2(I)) ) 131 */ 132 INTERSECTION(false) { 133 @Override 134 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 135 if (h1.length != h2.length) 136 throw new IllegalArgumentException("Vectors have differing lengths"); 137 138 double d = 0; 139 140 for (SparseByteArray.DualEntry e : h1.intersectEntries(h2)) { 141 d += Math.min(e.value, e.otherValue); 142 } 143 144 return d; 145 } 146 }, 147 /** 148 * Bhattacharyya distance 149 * d(H1,H2) = sqrt( 1 - (1 / sqrt(sumI(H1(I)) * sumI(H2(I))) ) * sumI( sqrt(H1(I) * H2(I)) ) ) 150 */ 151 BHATTACHARYYA(true) { 152 @Override 153 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 154 if (h1.length != h2.length) 155 throw new IllegalArgumentException("Vectors have differing lengths"); 156 157 double SH1 = 0; 158 double SH2 = 0; 159 double d = 0; 160 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 161 SH1 += e.value; 162 SH2 += e.otherValue; 163 d += Math.sqrt(e.value * e.otherValue); 164 } 165 166 double den = SH1 * SH2; 167 if (den == 0) return 1; 168 169 d /= Math.sqrt(den); 170 171 return Math.sqrt(1.0 - d); 172 } 173 }, 174 /** 175 * Hamming Distance 176 * d(H1,H2) = sumI(H1(I) == H2(I) ? 1 : 0) 177 */ 178 HAMMING(true) { 179 @Override 180 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 181 if (h1.length != h2.length) 182 throw new IllegalArgumentException("Vectors have differing lengths"); 183 184 int d = 0; 185 186 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) 187 if (e.value != e.otherValue) d++; 188 189 return d; 190 } 191 }, 192 /** 193 * Hamming Distance for packed bit strings 194 * d(H1,H2) = sumI(H1(I) == H2(I) ? 1 : 0) 195 */ 196 PACKED_HAMMING(true) { 197 @Override 198 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 199 if (h1.length != h2.length) 200 throw new IllegalArgumentException("Vectors have differing lengths"); 201 202 int d = 0; 203 204 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 205 d += HammingUtils.packedHamming(e.value, e.otherValue); 206 } 207 208 return d; 209 } 210 }, 211 /** 212 * City-block (L1) distance 213 * d(H1,H2) = sumI( abs(H1(I)-H2(I)) ) 214 */ 215 CITY_BLOCK(true) { 216 @Override 217 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 218 if (h1.length != h2.length) 219 throw new IllegalArgumentException("Vectors have differing lengths"); 220 221 double d = 0; 222 223 for (SparseByteArray.DualEntry e : h1.intersectEntries(h2)) { 224 d += Math.abs(e.value - e.otherValue); 225 } 226 227 return d; 228 } 229 }, 230 /** 231 * Cosine similarity (sim of 1 means identical) 232 * d(H1,H2)=sumI(H1(I) * H2(I))) / (sumI(H1(I)^2) sumI(H2(I)^2)) 233 */ 234 COSINE_SIM(false) { 235 @Override 236 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 237 if (h1.length != h2.length) 238 throw new IllegalArgumentException("Vectors have differing lengths"); 239 240 double h12 = 0; 241 double h11 = 0; 242 double h22 = 0; 243 244 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 245 h12 += e.value * e.otherValue; 246 h11 += e.value * e.value; 247 h22 += e.otherValue * e.otherValue; 248 } 249 250 return h12 / (Math.sqrt(h11) * Math.sqrt(h22)); 251 } 252 }, 253 /** 254 * Cosine distance (-COSINE_SIM) 255 */ 256 COSINE_DIST(true) { 257 @Override 258 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 259 return -1 * COSINE_SIM.compare(h1, h2); 260 } 261 }, 262 /** 263 * The arccosine of the cosine similarity 264 */ 265 ARCCOS(true) { 266 @Override 267 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 268 return Math.acos( COSINE_SIM.compare(h1, h2) ); 269 } 270 }, 271 /** 272 * The symmetric Kullback-Leibler divergence. Vectors must only contain 273 * positive values; internally they will be converted to double arrays 274 * and normalised to sum to unit length. 275 */ 276 SYMMETRIC_KL_DIVERGENCE(true) { 277 @Override 278 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 279 if (h1.length != h2.length) 280 throw new IllegalArgumentException("Vectors have differing lengths"); 281 282 double sum1 = 0; 283 double sum2 = 0; 284 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 285 sum1 += e.value; 286 sum2 += e.otherValue; 287 } 288 289 double d = 0; 290 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 291 double h1n = e.value / sum1; 292 double h2n = e.otherValue / sum2; 293 294 double q1 = h1n / h2n; 295 double q2 = h2n / h1n; 296 297 if (h1n != 0) d += (h1n * Math.log(q1) / Math.log(2)); 298 if (h2n != 0) d += (h2n * Math.log(q2) / Math.log(2)); 299 } 300 301 return d / 2.0; 302 } 303 }, 304 /** 305 * Jaccard distance. Converts each vector to a set 306 * for comparison. 307 */ 308 JACCARD_DISTANCE(true) { 309 @Override 310 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 311 byte[] h1v = h1.values(); 312 byte[] h2v = h2.values(); 313 314 TByteHashSet union = new TByteHashSet(h1v); 315 union.addAll(h2v); 316 if (h1v.length != h1.length || h2v.length != h2.length) 317 union.add((byte)0); 318 319 TByteHashSet intersection = new TByteHashSet(h1v); 320 intersection.retainAll(h2v); 321 if (h1v.length != h1.length && h2v.length != h2.length) 322 union.add((byte)0); 323 324 return 1.0 - (((double)intersection.size()) / (double)union.size()); 325 } 326 }, 327 /** 328 * Inner product 329 * d(H1,H2) =sumI(H1(I) * H2(I)) 330 */ 331 INNER_PRODUCT(false) { 332 @Override 333 public double compare(final SparseByteArray h1, final SparseByteArray h2) { 334 if (h1.length != h2.length) 335 throw new IllegalArgumentException("Vectors have differing lengths"); 336 337 double d = 0; 338 for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) { 339 d += (e.value * e.otherValue); 340 } 341 342 return d; 343 } 344 } 345 ; 346 347 private boolean isDistance; 348 SparseByteFVComparison(boolean isDistance) { 349 this.isDistance = isDistance; 350 } 351 352 @Override 353 public boolean isDistance() { 354 return isDistance; 355 } 356 357 @Override 358 public double compare(SparseByteFV h1, SparseByteFV h2) { 359 return compare(h1.values, h2.values); 360 } 361 362 /** 363 * Compare two feature vectors in the form of sparse arrays, 364 * returning a score or distance. 365 * 366 * @param h1 the first feature array 367 * @param h2 the second feature array 368 * @return a score or distance 369 */ 370 public abstract double compare(SparseByteArray h1, SparseByteArray h2); 371}