001/* 002 AUTOMATICALLY GENERATED BY jTemp FROM 003 /Users/jsh2/Work/openimaj/target/checkout/machine-learning/nearest-neighbour/src/main/jtemp/org/openimaj/lsh/functions/#T#HammingFactory.jtemp 004*/ 005/** 006 * Copyright (c) 2011, The University of Southampton and the individual contributors. 007 * All rights reserved. 008 * 009 * Redistribution and use in source and binary forms, with or without modification, 010 * are permitted provided that the following conditions are met: 011 * 012 * * Redistributions of source code must retain the above copyright notice, 013 * this list of conditions and the following disclaimer. 014 * 015 * * Redistributions in binary form must reproduce the above copyright notice, 016 * this list of conditions and the following disclaimer in the documentation 017 * and/or other materials provided with the distribution. 018 * 019 * * Neither the name of the University of Southampton nor the names of its 020 * contributors may be used to endorse or promote products derived from this 021 * software without specific prior written permission. 022 * 023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 024 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 025 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 026 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 027 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 028 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 029 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 030 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 032 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 033 */ 034package org.openimaj.lsh.functions; 035 036import org.openimaj.citation.annotation.Reference; 037import org.openimaj.citation.annotation.ReferenceType; 038import org.openimaj.feature.ByteFVComparison; 039import org.openimaj.util.array.SparseByteArray; 040 041import cern.jet.random.Uniform; 042import cern.jet.random.engine.MersenneTwister; 043 044/** 045 * A hash function factory for producing hash functions that approximate the 046 * Hamming distance. 047 * 048 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 049 * 050 */ 051 @Reference( 052 type = ReferenceType.Inproceedings, 053 author = { "Indyk, Piotr", "Motwani, Rajeev" }, 054 title = "Approximate nearest neighbors: towards removing the curse of dimensionality", 055 year = "1998", 056 booktitle = "Proceedings of the thirtieth annual ACM symposium on Theory of computing", 057 pages = { "604", "", "613" }, 058 url = "http://doi.acm.org/10.1145/276698.276876", 059 publisher = "ACM", 060 series = "STOC '98" 061) 062public class ByteHammingFactory extends ByteHashFunctionFactory { 063 private class Function extends ByteHashFunction { 064 private int ham; 065 066 Function(ByteHammingFactory options, int ndims, MersenneTwister rng) { 067 super(rng); 068 069 Uniform uniform = new Uniform(rng); 070 071 if (options.bitsPerDim == 0) 072 ham = (int) uniform.nextIntFromTo(0, ndims - 1); 073 else 074 ham = (int) uniform.nextIntFromTo(0, (ndims * options.bitsPerDim) - 1); 075 } 076 077 @Override 078 public int computeHashCode(byte[] point) { 079 // which hash function 080 if (bitsPerDim == 0) { 081 return point[ham] == 0 ? 0 : 1; 082 } else { 083 // compact binary data 084 final int m = ham % bitsPerDim; 085 final int d = ham / bitsPerDim; 086 return (int) (HammingHelper.convert(point[d]) >>> m & 1L); 087 } 088 } 089 090 @Override 091 public int computeHashCode(SparseByteArray array) { 092 // which hash function 093 if (bitsPerDim == 0) { 094 return array.get(ham) == 0 ? 0 : 1; 095 } else { 096 // compact binary data 097 final int m = ham % bitsPerDim; 098 final int d = ham / bitsPerDim; 099 return (int) (HammingHelper.convert(array.get(d)) >>> m & 1L); 100 } 101 } 102 } 103 104 int bitsPerDim; 105 106 /** 107 * Construct a new factory using the given parameters. 108 * 109 * @param ndims 110 * The number of dimensions (i.e. length of the vector being 111 * hashed) 112 * @param rng 113 * A random number generator 114 * @param bitsPerDim 115 * The number of bits per dimension. If the data is packed, then 116 * this will be greater than zero, and internally a single bit 117 * will be sampled for computing the hash. If zero, then it is 118 * assumed that every element of the vector being hashed is 119 * either a zero or one. 120 */ 121 public ByteHammingFactory(int ndims, MersenneTwister rng, int bitsPerDim) { 122 super(ndims, rng); 123 124 this.bitsPerDim = bitsPerDim; 125 } 126 127 @Override 128 public Function create() { 129 return new Function(this, ndims, rng); 130 } 131 132 @Override 133 public ByteFVComparison fvDistanceFunction() { 134 if (bitsPerDim == 0) 135 return ByteFVComparison.HAMMING; 136 else 137 return ByteFVComparison.PACKED_HAMMING; 138 } 139}