001/**
002 * Copyright (c) 2011, The University of Southampton and the individual contributors.
003 * All rights reserved.
004 *
005 * Redistribution and use in source and binary forms, with or without modification,
006 * are permitted provided that the following conditions are met:
007 *
008 *   *  Redistributions of source code must retain the above copyright notice,
009 *      this list of conditions and the following disclaimer.
010 *
011 *   *  Redistributions in binary form must reproduce the above copyright notice,
012 *      this list of conditions and the following disclaimer in the documentation
013 *      and/or other materials provided with the distribution.
014 *
015 *   *  Neither the name of the University of Southampton nor the names of its
016 *      contributors may be used to endorse or promote products derived from this
017 *      software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package org.openimaj.lsh.sketch;
031
032import java.util.ArrayList;
033import java.util.BitSet;
034import java.util.List;
035
036import org.openimaj.util.hash.HashFunction;
037import org.openimaj.util.hash.HashFunctionFactory;
038import org.openimaj.util.sketch.Sketcher;
039
040/**
041 * A {@link Sketcher} that produces bit-string sketches encoded as a
042 * {@link BitSet}. Only the least-significant bit of each hash function will be
043 * appended to the final sketch. The length of the output array will be computed
044 * such that the bit from each hash function is contained.
045 *
046 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk)
047 *
048 * @param <OBJECT>
049 *            Type of object being sketched
050 */
051public class BitSetLSHSketcher<OBJECT> implements Sketcher<OBJECT, BitSet> {
052        List<HashFunction<OBJECT>> hashFunctions;
053
054        /**
055         * Construct with the given functions.
056         *
057         * @param functions
058         *            the underlying hash functions.
059         */
060        public BitSetLSHSketcher(List<HashFunction<OBJECT>> functions) {
061                this.hashFunctions = functions;
062        }
063
064        /**
065         * Construct with the given functions.
066         *
067         * @param first
068         *            the first function
069         * @param remainder
070         *            the remainder of the functions
071         */
072        @SafeVarargs
073        public BitSetLSHSketcher(HashFunction<OBJECT> first, HashFunction<OBJECT>... remainder) {
074                this.hashFunctions = new ArrayList<HashFunction<OBJECT>>();
075                this.hashFunctions.add(first);
076
077                for (final HashFunction<OBJECT> r : remainder)
078                        this.hashFunctions.add(r);
079        }
080
081        /**
082         * Construct with the factory which is used to produce the required number
083         * of functions.
084         *
085         * @param factory
086         *            the factory to use to produce the underlying hash functions.
087         * @param nFuncs
088         *            the number of functions to create for the composition
089         */
090        public BitSetLSHSketcher(HashFunctionFactory<OBJECT> factory, int nFuncs) {
091                this.hashFunctions = new ArrayList<HashFunction<OBJECT>>();
092
093                for (int i = 0; i < nFuncs; i++)
094                        hashFunctions.add(factory.create());
095        }
096
097        @Override
098        public BitSet createSketch(OBJECT input) {
099                final int nbits = bitLength();
100                final BitSet sketch = new BitSet(nbits);
101
102                for (int k = 0; k < nbits; k++) {
103                        final int hash = hashFunctions.get(k).computeHashCode(input);
104
105                        sketch.set(k, ((hash & 1) == 1) ? true : false);
106                }
107
108                return sketch;
109        }
110
111        /**
112         * Get the length of the sketch in bits.
113         *
114         * @return the number of bits in the sketch
115         */
116        public int bitLength() {
117                return hashFunctions.size();
118        }
119}