001/*
002        AUTOMATICALLY GENERATED BY jTemp FROM
003        /Users/jsh2/Work/openimaj/target/checkout/machine-learning/nearest-neighbour/src/main/jtemp/org/openimaj/lsh/functions/#T#HyperplaneCosineFactory.jtemp
004*/
005/**
006 * Copyright (c) 2011, The University of Southampton and the individual contributors.
007 * All rights reserved.
008 *
009 * Redistribution and use in source and binary forms, with or without modification,
010 * are permitted provided that the following conditions are met:
011 *
012 *   *  Redistributions of source code must retain the above copyright notice,
013 *      this list of conditions and the following disclaimer.
014 *
015 *   *  Redistributions in binary form must reproduce the above copyright notice,
016 *      this list of conditions and the following disclaimer in the documentation
017 *      and/or other materials provided with the distribution.
018 *
019 *   *  Neither the name of the University of Southampton nor the names of its
020 *      contributors may be used to endorse or promote products derived from this
021 *      software without specific prior written permission.
022 *
023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
033 */
034package org.openimaj.lsh.functions;
035
036import org.openimaj.citation.annotation.Reference;
037import org.openimaj.citation.annotation.ReferenceType;
038import org.openimaj.feature.IntFVComparison;
039import org.openimaj.util.array.SparseIntArray;
040import org.openimaj.util.array.SparseIntArray.Entry;
041
042import cern.jet.random.Normal;
043import cern.jet.random.engine.MersenneTwister;
044
045/**
046 * A hash function factory that produces hash functions that approximate cosine
047 * distance using hyperplanes.
048 * <p>
049 * The hash function hashes the input vector into a binary value (i.e. 0 or 1). 
050 * A random vector on the surface of a hypersphere is generated during construction.  
051 * The hash code is computed by calculating the dot product of the random vector 
052 * with the input vector and testing to see whether the value is greater than or 
053 * equal to 0 (1 is output) or less than 0 (0 is output).  
054 * 
055 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
056 */
057 @Reference(
058        type = ReferenceType.Inproceedings,
059        author = { "Charikar, Moses S." },
060        title = "Similarity estimation techniques from rounding algorithms",
061        year = "2002",
062        booktitle = "Proceedings of the thiry-fourth annual ACM symposium on Theory of computing",
063        pages = { "380", "", "388" },
064        url = "http://doi.acm.org/10.1145/509907.509965",
065        publisher = "ACM",
066        series = "STOC '02"
067)
068public class IntHyperplaneCosineFactory extends IntHashFunctionFactory {
069        private class Function extends IntHashFunction {
070                double[] r;
071
072                Function(int ndims, MersenneTwister rng) {
073                        super(rng);
074
075            final Normal normal = new Normal(0, 1, rng);
076
077                        r = new double[ndims];
078                        double sumSq = 0;
079                        
080                        for (int i=0; i<ndims; i++) {
081                                r[i] = normal.nextDouble();
082                                sumSq += (r[i] * r[i]);
083                        }
084                        
085                        double norm = 1.0 / Math.sqrt(sumSq);
086                        for (int i=0; i<ndims; i++) {
087                                r[i] *= norm;
088                        }
089                }
090
091                @Override
092                public int computeHashCode(int[] point) {
093                        double dp = 0;
094                        
095                        for (int i=0; i<ndims; i++)
096                                dp += r[i] * point[i];
097                                
098                        return dp >= 0 ? 1 : 0;
099                }
100
101                @Override
102                public int computeHashCode(SparseIntArray array) {
103                        double dp = 0;
104                        
105                        for (Entry e : array.entries())
106                                dp += r[e.index] * e.value;
107                                
108                        return dp >= 0 ? 1 : 0;
109                }
110        }
111
112        /**
113         * Construct with the given arguments.
114         * 
115         * @param ndims
116         *            The number of dimensions
117         * @param rng
118         *            A random number generator
119         */
120        public IntHyperplaneCosineFactory(int ndims, MersenneTwister rng) {
121                super(ndims, rng);
122        }
123
124        @Override
125        public Function create() {
126                return new Function(ndims, rng);
127        }
128
129        @Override
130        protected IntFVComparison fvDistanceFunction() {
131                return IntFVComparison.CITY_BLOCK;
132        }
133}