001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.lsh.sketch; 031 032import java.util.ArrayList; 033import java.util.BitSet; 034import java.util.List; 035 036import org.openimaj.util.hash.HashFunction; 037import org.openimaj.util.hash.HashFunctionFactory; 038import org.openimaj.util.sketch.Sketcher; 039 040/** 041 * A {@link Sketcher} that produces bit-string sketches encoded as a 042 * {@link BitSet}. Only the least-significant bit of each hash function will be 043 * appended to the final sketch. The length of the output array will be computed 044 * such that the bit from each hash function is contained. 045 * 046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 047 * 048 * @param <OBJECT> 049 * Type of object being sketched 050 */ 051public class BitSetLSHSketcher<OBJECT> implements Sketcher<OBJECT, BitSet> { 052 List<HashFunction<OBJECT>> hashFunctions; 053 054 /** 055 * Construct with the given functions. 056 * 057 * @param functions 058 * the underlying hash functions. 059 */ 060 public BitSetLSHSketcher(List<HashFunction<OBJECT>> functions) { 061 this.hashFunctions = functions; 062 } 063 064 /** 065 * Construct with the given functions. 066 * 067 * @param first 068 * the first function 069 * @param remainder 070 * the remainder of the functions 071 */ 072 @SafeVarargs 073 public BitSetLSHSketcher(HashFunction<OBJECT> first, HashFunction<OBJECT>... remainder) { 074 this.hashFunctions = new ArrayList<HashFunction<OBJECT>>(); 075 this.hashFunctions.add(first); 076 077 for (final HashFunction<OBJECT> r : remainder) 078 this.hashFunctions.add(r); 079 } 080 081 /** 082 * Construct with the factory which is used to produce the required number 083 * of functions. 084 * 085 * @param factory 086 * the factory to use to produce the underlying hash functions. 087 * @param nFuncs 088 * the number of functions to create for the composition 089 */ 090 public BitSetLSHSketcher(HashFunctionFactory<OBJECT> factory, int nFuncs) { 091 this.hashFunctions = new ArrayList<HashFunction<OBJECT>>(); 092 093 for (int i = 0; i < nFuncs; i++) 094 hashFunctions.add(factory.create()); 095 } 096 097 @Override 098 public BitSet createSketch(OBJECT input) { 099 final int nbits = bitLength(); 100 final BitSet sketch = new BitSet(nbits); 101 102 for (int k = 0; k < nbits; k++) { 103 final int hash = hashFunctions.get(k).computeHashCode(input); 104 105 sketch.set(k, ((hash & 1) == 1) ? true : false); 106 } 107 108 return sketch; 109 } 110 111 /** 112 * Get the length of the sketch in bits. 113 * 114 * @return the number of bits in the sketch 115 */ 116 public int bitLength() { 117 return hashFunctions.size(); 118 } 119}