001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.util.hash; 031 032/** 033 * Use MurmurHash (http://murmurhash.googlepages.com/) to generate a random hash 034 * for a string. 035 * 036 * @author Sina Samangooei (ss@ecs.soton.ac.uk) 037 * 038 */ 039public class StringMurmurHashFunction implements HashFunction<String> { 040 private int seed = -1; 041 042 /** 043 * Construct a new {@link StringMurmurHashFunction} with the default seed. 044 */ 045 public StringMurmurHashFunction() { 046 } 047 048 /** 049 * Construct a new {@link StringMurmurHashFunction} with the given seed. 050 * 051 * @param seed 052 * the seed 053 */ 054 public StringMurmurHashFunction(int seed) { 055 this.seed = seed; 056 } 057 058 @Override 059 public int computeHashCode(String data) { 060 return murmurhash(data.getBytes(), seed); 061 } 062 063 /** 064 * Compute the Murmur hash (http://murmurhash.googlepages.com/) of the given 065 * bytes. 066 * 067 * @see #murmurhash(byte[], int, int) 068 * 069 * @param data 070 * the data to hash 071 * @return the murmur hash 072 */ 073 public static int murmurhash(byte[] data) { 074 return murmurhash(data, data.length, -1); 075 } 076 077 /** 078 * Compute the Murmur hash (http://murmurhash.googlepages.com/) of the given 079 * bytes. 080 * 081 * @see #murmurhash(byte[], int, int) 082 * 083 * @param data 084 * the data to hash 085 * @param seed 086 * the random seed 087 * @return the murmur hash 088 */ 089 public static int murmurhash(byte[] data, int seed) { 090 return murmurhash(data, data.length, seed); 091 } 092 093 /** 094 * Compute the Murmur hash (http://murmurhash.googlepages.com/) of the given 095 * bytes. Based on the implementation from <a 096 * href="https://github.com/clearspring/stream-lib"> 097 * https://github.com/clearspring/stream-lib</a>. 098 * 099 * @param data 100 * the data to hash 101 * @param length 102 * the length of the data to hash 103 * @param seed 104 * the random seed 105 * @return the murmur hash 106 */ 107 public static int murmurhash(byte[] data, int length, int seed) { 108 final int m = 0x5bd1e995; 109 final int r = 24; 110 111 int h = seed ^ length; 112 113 final int len_4 = length >> 2; 114 115 for (int i = 0; i < len_4; i++) { 116 final int i_4 = i << 2; 117 int k = data[i_4 + 3]; 118 k = k << 8; 119 k = k | (data[i_4 + 2] & 0xff); 120 k = k << 8; 121 k = k | (data[i_4 + 1] & 0xff); 122 k = k << 8; 123 k = k | (data[i_4 + 0] & 0xff); 124 k *= m; 125 k ^= k >>> r; 126 k *= m; 127 h *= m; 128 h ^= k; 129 } 130 131 // avoid calculating modulo 132 final int len_m = len_4 << 2; 133 final int left = length - len_m; 134 135 if (left != 0) { 136 if (left >= 3) { 137 h ^= data[length - 3] << 16; 138 } 139 if (left >= 2) { 140 h ^= data[length - 2] << 8; 141 } 142 if (left >= 1) { 143 h ^= data[length - 1]; 144 } 145 146 h *= m; 147 } 148 149 h ^= h >>> 13; 150 h *= m; 151 h ^= h >>> 15; 152 153 return h; 154 } 155}