001/*
002        AUTOMATICALLY GENERATED BY jTemp FROM
003        /Users/jsh2/Work/openimaj/target/checkout/core/core-feature/src/main/jtemp/org/openimaj/feature/Sparse#T#FVComparison.jtemp
004*/
005/**
006 * Copyright (c) 2011, The University of Southampton and the individual contributors.
007 * All rights reserved.
008 *
009 * Redistribution and use in source and binary forms, with or without modification,
010 * are permitted provided that the following conditions are met:
011 *
012 *   *  Redistributions of source code must retain the above copyright notice,
013 *      this list of conditions and the following disclaimer.
014 *
015 *   *  Redistributions in binary form must reproduce the above copyright notice,
016 *      this list of conditions and the following disclaimer in the documentation
017 *      and/or other materials provided with the distribution.
018 *
019 *   *  Neither the name of the University of Southampton nor the names of its
020 *      contributors may be used to endorse or promote products derived from this
021 *      software without specific prior written permission.
022 *
023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
033 */
034package org.openimaj.feature;
035
036import gnu.trove.set.hash.TByteHashSet;
037
038import org.openimaj.math.util.distance.HammingUtils;
039import org.openimaj.util.array.SparseByteArray;
040
041/**
042 * Comparison/distance methods for DoubleFV objects.
043 * 
044 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
045 */
046public enum SparseByteFVComparison implements FVComparator<SparseByteFV> {
047        /**
048         * Euclidean distance
049         * d(H1,H2) = Math.sqrt( sumI( (H1(I)-H2(I))^2 ) )
050         */
051        EUCLIDEAN(true) {
052                @Override
053                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
054                        if (h1.length != h2.length)
055                            throw new IllegalArgumentException("Vectors have differing lengths");
056
057                        double d = 0;
058                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
059                                double diff = e.value - e.otherValue;
060                                d += (diff * diff);
061                        }
062
063                        return Math.sqrt(d);
064                }
065        }, 
066        /**
067         * Correlation
068         *
069         * d(H1,H2) = sumI( H'1(I) * H'2(I) ) / sqrt( sumI[H'1(I)2]^2 * sumI[H'2(I)^2] )
070         * where
071         * H'k(I) = Hk(I) - (1/N) * sumJ( Hk(J) ); N=number of FeatureVector bins
072         *
073         */
074        CORRELATION(false) {
075                @Override
076                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
077                        if (h1.length != h2.length)
078                            throw new IllegalArgumentException("Vectors have differing lengths");
079
080                        double N = h1.length;
081                        double SH1=0, SH2=0;
082
083                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
084                                SH1 += e.value;
085                                SH2 += e.otherValue;
086                        }
087                        SH1 /= N;
088                        SH2 /= N;
089
090                        double d = 0;
091                        double SH1S = 0;
092                        double SH2S = 0;
093
094                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
095                                double h1prime = e.value - SH1;
096                                double h2prime = e.otherValue - SH2;
097
098                                d += (h1prime * h2prime);
099                                SH1S += (h1prime * h1prime);
100                                SH2S += (h2prime * h2prime);
101                        }
102
103                        return d / Math.sqrt(SH1S * SH2S);
104                }
105        },
106        /**
107         * Chi-squared distance
108         * d(H1,H2) = 0.5 * sumI[(H1(I)-H2(I))^2 / (H1(I)+H2(I))]
109         */
110        CHI_SQUARE(true) {
111                @Override
112                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
113                        if (h1.length != h2.length)
114                            throw new IllegalArgumentException("Vectors have differing lengths");
115
116                        double d = 0;
117
118                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
119                            double a = e.value - e.otherValue;
120                            double b = e.value + e.otherValue;
121                            
122                            if (Math.abs(b) > 0) d += a*a/b;
123                        }
124
125                        return d / 2;
126                }
127        },
128        /**
129         * Histogram intersection; assumes all values &gt; 0.
130         * d(H1,H2) = sumI( min(H1(I), H2(I)) )
131         */
132        INTERSECTION(false) {
133                @Override
134                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
135                        if (h1.length != h2.length)
136                            throw new IllegalArgumentException("Vectors have differing lengths");
137
138                        double d = 0;
139
140                        for (SparseByteArray.DualEntry e : h1.intersectEntries(h2)) {
141                                d += Math.min(e.value, e.otherValue);
142                        }
143
144                        return d;
145                }
146        },
147        /**
148         * Bhattacharyya distance
149         * d(H1,H2) = sqrt( 1 - (1 / sqrt(sumI(H1(I)) * sumI(H2(I))) ) * sumI( sqrt(H1(I) * H2(I)) ) )
150         */
151        BHATTACHARYYA(true) {
152                @Override
153                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
154                        if (h1.length != h2.length)
155                            throw new IllegalArgumentException("Vectors have differing lengths");
156
157                        double SH1 = 0;
158                        double SH2 = 0;
159                        double d = 0;
160                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
161                                SH1 += e.value;
162                                SH2 += e.otherValue;
163                                d += Math.sqrt(e.value * e.otherValue);
164                        }
165
166                        double den = SH1 * SH2;
167                        if (den == 0) return 1;
168
169                        d /= Math.sqrt(den);
170
171                        return Math.sqrt(1.0 - d);
172                }
173        },
174        /**
175         * Hamming Distance
176         * d(H1,H2) = sumI(H1(I) == H2(I) ? 1 : 0)
177         */
178        HAMMING(true) {
179                @Override
180                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
181                        if (h1.length != h2.length)
182                            throw new IllegalArgumentException("Vectors have differing lengths");
183
184                        int d = 0;
185
186                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2))
187                                if (e.value != e.otherValue) d++;
188
189                        return d;
190                }               
191        },
192        /**
193         * Hamming Distance for packed bit strings
194         * d(H1,H2) = sumI(H1(I) == H2(I) ? 1 : 0)
195         */
196        PACKED_HAMMING(true) {
197                @Override
198                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
199                        if (h1.length != h2.length)
200                            throw new IllegalArgumentException("Vectors have differing lengths");
201
202                        int d = 0;
203
204                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
205                                d += HammingUtils.packedHamming(e.value, e.otherValue);
206                        }
207
208                        return d;
209                }       
210        },
211        /**
212         * City-block (L1) distance
213         * d(H1,H2) = sumI( abs(H1(I)-H2(I)) )
214         */
215        CITY_BLOCK(true) {
216                @Override
217                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
218                        if (h1.length != h2.length)
219                            throw new IllegalArgumentException("Vectors have differing lengths");
220
221                        double d = 0;
222
223                        for (SparseByteArray.DualEntry e : h1.intersectEntries(h2)) {
224                                d += Math.abs(e.value - e.otherValue);
225                        }
226
227                        return d;
228                }
229        },
230        /**
231         * Cosine similarity (sim of 1 means identical)
232         * d(H1,H2)=sumI(H1(I) * H2(I))) / (sumI(H1(I)^2) sumI(H2(I)^2))
233         */
234        COSINE_SIM(false) {
235                @Override
236                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
237                        if (h1.length != h2.length)
238                            throw new IllegalArgumentException("Vectors have differing lengths");
239
240                        double h12 = 0;
241                        double h11 = 0;
242                        double h22 = 0;
243                        
244                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
245                                h12 += e.value * e.otherValue;
246                                h11 += e.value * e.value;
247                                h22 += e.otherValue * e.otherValue;
248                        }
249
250                        return h12 / (Math.sqrt(h11) * Math.sqrt(h22));
251                }
252        },
253        /**
254         * Cosine distance (-COSINE_SIM)
255         */
256        COSINE_DIST(true) {
257                @Override
258                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
259                        return -1 * COSINE_SIM.compare(h1, h2);
260                }
261        },
262        /**
263         * The arccosine of the cosine similarity
264         */
265        ARCCOS(true) {
266                @Override
267                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
268                        return Math.acos( COSINE_SIM.compare(h1, h2) );
269                }
270        },
271        /**
272         * The symmetric Kullback-Leibler divergence. Vectors must only contain
273         * positive values; internally they will be converted to double arrays 
274         * and normalised to sum to unit length.
275         */
276        SYMMETRIC_KL_DIVERGENCE(true) {
277                @Override
278                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
279                        if (h1.length != h2.length)
280                                throw new IllegalArgumentException("Vectors have differing lengths");
281                        
282                        double sum1 = 0;
283                        double sum2 = 0;
284                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
285                                sum1 += e.value;
286                                sum2 += e.otherValue;
287                        }
288                        
289                        double d = 0;
290                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
291                                double h1n = e.value / sum1;
292                                double h2n = e.otherValue / sum2;
293                                
294                                double q1 = h1n / h2n;
295                                double q2 = h2n / h1n;
296                                
297                                if (h1n != 0) d += (h1n * Math.log(q1) / Math.log(2)); 
298                                if (h2n != 0) d += (h2n * Math.log(q2) / Math.log(2));
299                        }
300
301                        return d / 2.0;
302                }
303        },
304        /**
305         * Jaccard distance. Converts each vector to a set
306         * for comparison.
307         */
308        JACCARD_DISTANCE(true) {
309                @Override
310                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
311                        byte[] h1v = h1.values();
312                        byte[] h2v = h2.values();
313                        
314                        TByteHashSet union = new TByteHashSet(h1v);
315                        union.addAll(h2v);
316                        if (h1v.length != h1.length || h2v.length != h2.length)
317                                union.add((byte)0);
318
319                        TByteHashSet intersection = new TByteHashSet(h1v);
320                        intersection.retainAll(h2v);
321                        if (h1v.length != h1.length && h2v.length != h2.length)
322                                union.add((byte)0);
323
324                        return 1.0 - (((double)intersection.size()) / (double)union.size());
325                }
326        },
327        /**
328         * Inner product
329         * d(H1,H2) =sumI(H1(I) * H2(I))
330         */
331        INNER_PRODUCT(false) {
332                @Override
333                public double compare(final SparseByteArray h1, final SparseByteArray h2) {
334                        if (h1.length != h2.length)
335                            throw new IllegalArgumentException("Vectors have differing lengths");
336
337                        double d = 0;
338                        for (SparseByteArray.DualEntry e : h1.unionEntries(h2)) {
339                                d += (e.value * e.otherValue);
340                        }
341
342                        return d;
343                }
344        }
345        ;
346
347        private boolean isDistance; 
348        SparseByteFVComparison(boolean isDistance) {
349                this.isDistance = isDistance;
350        }
351
352        @Override
353        public boolean isDistance() {
354                return isDistance;
355        }
356
357        @Override
358        public double compare(SparseByteFV h1, SparseByteFV h2) {
359                return compare(h1.values, h2.values);
360        }
361
362        /**
363         * Compare two feature vectors in the form of sparse arrays, 
364         * returning a score or distance.
365         * 
366         * @param h1 the first feature array
367         * @param h2 the second feature array
368         * @return a score or distance
369         */     
370        public abstract double compare(SparseByteArray h1, SparseByteArray h2);
371}