From 0c3885a0070a78342c8ec64a8dd56eba7ff6f09a Mon Sep 17 00:00:00 2001
From: Konstantin Osipov <kostja@tarantool.org>
Date: Sat, 2 Feb 2013 23:29:35 +0400
Subject: [PATCH] Replace murmur hash 2 with murmur hash 3.

---
 client/tarantool_checksum/CMakeLists.txt |   3 +
 client/tarantool_checksum/tc_generate.c  |   4 +-
 client/tarantool_checksum/tc_verify.c    |   1 -
 cmake/BuildMisc.cmake                    |   1 +
 include/assoc.h                          |   4 +-
 test/big/iterator.result                 |   6 +-
 test/big/sql.result                      |  10 +-
 test/box/socket.result                   |   8 +-
 test/unit/bit.c                          |   1 +
 third_party/PMurHash.c                   | 317 +++++++++++++++++++++++
 third_party/PMurHash.h                   |  64 +++++
 third_party/README                       |   6 +
 third_party/murmur_hash2.c               |  64 -----
 13 files changed, 409 insertions(+), 80 deletions(-)
 create mode 100644 third_party/PMurHash.c
 create mode 100644 third_party/PMurHash.h
 delete mode 100644 third_party/murmur_hash2.c

diff --git a/client/tarantool_checksum/CMakeLists.txt b/client/tarantool_checksum/CMakeLists.txt
index e24280ad63..fadc74533c 100644
--- a/client/tarantool_checksum/CMakeLists.txt
+++ b/client/tarantool_checksum/CMakeLists.txt
@@ -16,6 +16,9 @@ list(APPEND util_checksum_sources
     ${CMAKE_SOURCE_DIR}/cfg/tarantool_box_cfg.c
     ${CMAKE_SOURCE_DIR}/cfg/prscfg.c)
 
+list(APPEND util_checksum_sources
+    ${CMAKE_SOURCE_DIR}/third_party/PMurHash.c)
+
 set_source_files_compile_flags(
     ${util_checksum_sources})
 add_executable(${util_checksum} ${util_checksum_sources})
diff --git a/client/tarantool_checksum/tc_generate.c b/client/tarantool_checksum/tc_generate.c
index 3ee329393b..2683a21792 100644
--- a/client/tarantool_checksum/tc_generate.c
+++ b/client/tarantool_checksum/tc_generate.c
@@ -43,7 +43,7 @@
 #include <cfg/prscfg.h>
 #include <cfg/tarantool_box_cfg.h>
 
-#include <third_party/murmur_hash2.c>
+#include <third_party/PMurHash.h>
 #include <third_party/crc32.h>
 
 #include "tc_key.h"
@@ -74,7 +74,7 @@ search_hash(const struct tc_key *k, struct tc_space *s)
 			break;
 		}
 		case TC_SPACE_KEY_STRING:
-			 h = MurmurHash2(TC_KEY_DATA(k, i), TC_KEY_SIZE(k, i), h);
+			 h = PMurHash32(h, TC_KEY_DATA(k, i), TC_KEY_SIZE(k, i));
 			break;
 		case TC_SPACE_KEY_UNKNOWN:
 			assert(1);
diff --git a/client/tarantool_checksum/tc_verify.c b/client/tarantool_checksum/tc_verify.c
index 18321a55a6..aac9ba84a6 100644
--- a/client/tarantool_checksum/tc_verify.c
+++ b/client/tarantool_checksum/tc_verify.c
@@ -44,7 +44,6 @@
 #include <cfg/prscfg.h>
 #include <cfg/tarantool_box_cfg.h>
 
-#include <third_party/murmur_hash2.c>
 #include <third_party/crc32.h>
 
 #include "tc_key.h"
diff --git a/cmake/BuildMisc.cmake b/cmake/BuildMisc.cmake
index 8f90c771b6..e8975a5021 100644
--- a/cmake/BuildMisc.cmake
+++ b/cmake/BuildMisc.cmake
@@ -5,6 +5,7 @@ macro(libmisc_build)
         ${PROJECT_SOURCE_DIR}/third_party/crc32.c
         ${PROJECT_SOURCE_DIR}/third_party/proctitle.c
         ${PROJECT_SOURCE_DIR}/third_party/qsort_arg.c
+        ${PROJECT_SOURCE_DIR}/third_party/PMurHash.c
     )
 
     if (NOT HAVE_MEMMEM)
diff --git a/include/assoc.h b/include/assoc.h
index 49db6f4b14..497e12ecfc 100644
--- a/include/assoc.h
+++ b/include/assoc.h
@@ -82,7 +82,7 @@ static inline int lstrcmp(const void *a, const void *b)
 		return bl - al;
 	return memcmp(a, b, al);
 }
-#include <third_party/murmur_hash2.c>
+#include <third_party/PMurHash.h>
 #define mh_name _lstrptr
 struct mh_lstrptr_node_t {
 	const void *key;
@@ -97,7 +97,7 @@ mh_strptr_hash(const mh_node_t *a, mh_hash_arg_t arg) {
 	(void) arg;
 	const void *_k = (a->key);
 	const u32 l = load_varint32(&_k);
-	return (u32) MurmurHash2(_k, l, 13);
+	return PMurHash32(13, _k, l);
 }
 #define mh_hash(a, arg) mh_strptr_hash(a, arg)
 #define mh_eq_arg_t void *
diff --git a/test/big/iterator.result b/test/big/iterator.result
index 117481a34b..adb6172424 100644
--- a/test/big/iterator.result
+++ b/test/big/iterator.result
@@ -830,9 +830,9 @@ lua iterate(20, 4, 0, 1, box.index.GE, 'pid_001')
 ---
 sorted output
 $pid_001$
-$pid_002$
-$pid_005$
-$pid_017$
+$pid_007$
+$pid_011$
+$pid_019$
 $pid_023$
 ...
 lua iterate(20, 4, 0, 1, box.index.GE, 'pid_999')
diff --git a/test/big/sql.result b/test/big/sql.result
index 1fe0688f60..222809b43f 100644
--- a/test/big/sql.result
+++ b/test/big/sql.result
@@ -84,9 +84,9 @@ insert into t1 values ('key3', 'part1', 'part2_b')
 Insert OK, 1 row affected
 lua for k, v in box.space[1]:pairs() do print(v) end
 ---
-830039403: {'part1', 'part2'}
-863593835: {'part1', 'part2_b'}
 846816619: {'part1', 'part2_a'}
+863593835: {'part1', 'part2_b'}
+830039403: {'part1', 'part2'}
 ...
 select * from t1 where k0='key1'
 Found 1 tuple:
@@ -108,8 +108,10 @@ Found 3 tuples:
 [846816619, 'part1', 'part2_a']
 [863593835, 'part1', 'part2_b']
 call box.select_range(1, 0, 100, 'key2')
-Found 1 tuple:
+Found 3 tuples:
+[830039403, 'part1', 'part2']
 [846816619, 'part1', 'part2_a']
+[863593835, 'part1', 'part2_b']
 call box.select_range(1, 1, 100, 'part1', 'part2_a')
 Found 2 tuples:
 [846816619, 'part1', 'part2_a']
@@ -336,9 +338,9 @@ insert into t4 values(3, 'Creature ')
 Insert OK, 1 row affected
 lua for k, v in box.space[4]:pairs() do print(v) end
 ---
+2: {'Bilimbi'}
 3: {'Creature '}
 1: {'Aardvark '}
-2: {'Bilimbi'}
 ...
 lua box.space[4].index[0].idx:min()
 ---
diff --git a/test/box/socket.result b/test/box/socket.result
index 9d0340784b..bf4be1d81b 100644
--- a/test/box/socket.result
+++ b/test/box/socket.result
@@ -54,13 +54,13 @@ lua s:connect('::1', '30303')
 ---
  - nil
  - error
- - 111
- - Connection refused
+ - -1
+ - Host name resolution failed
 ...
 lua s:error()
 ---
- - 111
- - Connection refused
+ - -1
+ - Host name resolution failed
 ...
 lua s:connect('127.0.0.1', '30303', 0.01)
 ---
diff --git a/test/unit/bit.c b/test/unit/bit.c
index 9b305369ef..a7cce03672 100644
--- a/test/unit/bit.c
+++ b/test/unit/bit.c
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <inttypes.h>
 #include <assert.h>
+#include <stdlib.h>
 
 #include "unit.h"
 
diff --git a/third_party/PMurHash.c b/third_party/PMurHash.c
new file mode 100644
index 0000000000..017501264d
--- /dev/null
+++ b/third_party/PMurHash.c
@@ -0,0 +1,317 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/*-----------------------------------------------------------------------------
+ 
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+2. Portability. Plain old C so that it should compile on any old compiler.
+Both CPU endian and access-alignment neutral, but avoiding inefficient code
+when possible depending on CPU capabilities.
+
+3. Drop in. I personally like nice self contained public domain code, making it
+easy to pilfer without loads of refactoring to work properly in the existing
+application code & makefile structure and mucking around with licence files.
+Just copy PMurHash.h and PMurHash.c and you're ready to go.
+
+
+How does it work?
+
+We can only process entire 32 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 3 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 2 bits as a count (0..3) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads a uint32_t and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines, or a byte-by-byte read if the endianess is unknown.
+
+-----------------------------------------------------------------------------*/
+
+
+#include "PMurHash.h"
+
+/* I used ugly type names in the header to avoid potential conflicts with
+ * application or system typedefs & defines. Since I'm not including any more
+ * headers below here I can rename these so that the code reads like C99 */
+#undef uint32_t
+#define uint32_t MH_UINT32
+#undef uint8_t
+#define uint8_t  MH_UINT8
+
+/* MSVC warnings we choose to ignore */
+#if defined(_MSC_VER)
+  #pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * READ_UINT32(x)   Read a little endian unsigned 32-bit int
+ * UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
+ * ROTL32(x,r)      Rotate x left by r bits
+ */
+
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+  #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+  #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+  #define __BYTE_ORDER __LITTLE_ENDIAN
+  #define UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+  #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+  #if defined(__ARMEL__) || defined(__MIPSEL__)
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #endif
+  #if defined(__ARMEB__) || defined(__MIPSEB__)
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+  /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+  #define READ_UINT32(ptr)   (*((uint32_t*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+  /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+  #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+    #define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
+  #else
+    /* Without a known fast bswap32 we're just as well off doing this */
+    #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+    #define UNALIGNED_SAFE
+  #endif
+#else
+  /* Unknown endianess so last resort is to read individual bytes */
+  #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+  /* Since we're not doing word-reads we can skip the messing about with realignment */
+  #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
+  #define ROTL32(x,r)  _rotl(x,r)
+#else
+  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define C1  (0xcc9e2d51)
+#define C2  (0x1b873593)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define DOBLOCK(h1, k1) do{ \
+        k1 *= C1; \
+        k1 = ROTL32(k1,15); \
+        k1 *= C2; \
+        \
+        h1 ^= k1; \
+        h1 = ROTL32(h1,13); \
+        h1 = h1*5+0xe6546b64; \
+    }while(0)
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
+    int _i = cnt; \
+    while(_i--) { \
+        c = c>>8 | *ptr++<<24; \
+        n++; len--; \
+        if(n==4) { \
+            DOBLOCK(h1, c); \
+            n = 0; \
+        } \
+    } }while(0)
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
+ * if wanted. Both ph1 and pcarry are required arguments. */
+void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
+{
+  uint32_t h1 = *ph1;
+  uint32_t c = *pcarry;
+
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end;
+
+  /* Extract carry count from low 2 bits of c value */
+  int n = c & 3;
+
+#if defined(UNALIGNED_SAFE)
+  /* This CPU handles unaligned word access */
+
+  /* Consume any carry bytes */
+  int i = (4-n) & 3;
+  if(i && i <= len) {
+    DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* Process 32-bit chunks */
+  end = ptr + len/4*4;
+  for( ; ptr < end ; ptr+=4) {
+    uint32_t k1 = READ_UINT32(ptr);
+    DOBLOCK(h1, k1);
+  }
+
+#else /*UNALIGNED_SAFE*/
+  /* This CPU does not handle unaligned word access */
+
+  /* Consume enough so that the next data byte is word aligned */
+  int i = -(long)ptr & 3;
+  if(i && i <= len) {
+      DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+  end = ptr + len/4*4;
+  switch(n) { /* how many bytes in c */
+  case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = READ_UINT32(ptr);
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>24;
+      c = READ_UINT32(ptr);
+      k1 |= c<<8;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>16;
+      c = READ_UINT32(ptr);
+      k1 |= c<<16;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>8;
+      c = READ_UINT32(ptr);
+      k1 |= c<<24;
+      DOBLOCK(h1, k1);
+    }
+  }
+#endif /*UNALIGNED_SAFE*/
+
+  /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
+  len -= len/4*4;
+
+  /* Append any remaining bytes into carry */
+  DOBYTES(len, h1, c, n, ptr, len);
+
+  /* Copy out new running hash and carry */
+  *ph1 = h1;
+  *pcarry = (c & ~0xff) | n;
+} 
+
+/*---------------------------------------------------------------------------*/
+
+/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
+uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
+{
+  uint32_t k1;
+  int n = carry & 3;
+  if(n) {
+    k1 = carry >> (4-n)*8;
+    k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
+  }
+  h ^= total_length;
+
+  /* fmix */
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Murmur3A compatable all-at-once */
+uint32_t PMurHash32(uint32_t seed, const void *key, int len)
+{
+  uint32_t h1=seed, carry=0;
+  PMurHash32_Process(&h1, &carry, key, len);
+  return PMurHash32_Result(h1, carry, len);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Provide an API suitable for smhasher */
+void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
+{
+  uint32_t h1=seed, carry=0;
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end = ptr + len;
+
+#if 0 /* Exercise the progressive processing */
+  while(ptr < end) {
+    //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
+    const uint8_t *mid = ptr + (rand()&0xF);
+    mid = mid<end?mid:end;
+    PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
+    ptr = mid;
+  }
+#else
+  PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
+#endif
+  h1 = PMurHash32_Result(h1, carry, len);
+  *(uint32_t*)out = h1;
+}
+
+/*---------------------------------------------------------------------------*/
diff --git a/third_party/PMurHash.h b/third_party/PMurHash.h
new file mode 100644
index 0000000000..28ead00a7d
--- /dev/null
+++ b/third_party/PMurHash.h
@@ -0,0 +1,64 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/* ------------------------------------------------------------------------- */
+/* Determine what native type to use for uint32_t */
+
+/* We can't use the name 'uint32_t' here because it will conflict with
+ * any version provided by the system headers or application. */
+
+/* First look for special cases */
+#if defined(_MSC_VER)
+  #define MH_UINT32 unsigned long
+#endif
+
+/* If the compiler says it's C99 then take its word for it */
+#if !defined(MH_UINT32) && ( \
+     defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L )
+  #include <stdint.h>
+  #define MH_UINT32 uint32_t
+#endif
+
+/* Otherwise try testing against max value macros from limit.h */
+#if !defined(MH_UINT32)
+  #include  <limits.h>
+  #if   (USHRT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned short
+  #elif (UINT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned int
+  #elif (ULONG_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned long
+  #endif
+#endif
+
+#if !defined(MH_UINT32)
+  #error Unable to determine type name for unsigned 32-bit int
+#endif
+
+/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */
+#define MH_UINT8  unsigned char
+
+
+/* ------------------------------------------------------------------------- */
+/* Prototypes */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len);
+MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length);
+MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len);
+
+void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/third_party/README b/third_party/README
index 796b612ae2..57b6c06af4 100644
--- a/third_party/README
+++ b/third_party/README
@@ -50,3 +50,9 @@ How to update rb.h
 ======================
 Get the header from
 git://canonware.com/jemalloc.git
+
+How to update murmur hash
+=========================
+
+wget http://smhasher.googlecode.com/svn/trunk/PMurHash.c -O PMurHash.c
+wget http://smhasher.googlecode.com/svn/trunk/PMurHash.h -O PMurHash.h
diff --git a/third_party/murmur_hash2.c b/third_party/murmur_hash2.c
deleted file mode 100644
index 6c64bace58..0000000000
--- a/third_party/murmur_hash2.c
+++ /dev/null
@@ -1,64 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash2, by Austin Appleby
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-//    machines.
-
-static inline unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
-{
-	// 'm' and 'r' are mixing constants generated offline.
-	// They're not really 'magic', they just happen to work well.
-
-	const unsigned int m = 0x5bd1e995;
-	const int r = 24;
-
-	// Initialize the hash to a 'random' value
-
-	unsigned int h = seed ^ len;
-
-	// Mix 4 bytes at a time into the hash
-
-	const unsigned char * data = (const unsigned char *)key;
-
-	while(len >= 4)
-	{
-		unsigned int k = *(unsigned int *)data;
-
-		k *= m; 
-		k ^= k >> r; 
-		k *= m; 
-		
-		h *= m; 
-		h ^= k;
-
-		data += 4;
-		len -= 4;
-	}
-	
-	// Handle the last few bytes of the input array
-
-	switch(len)
-	{
-	case 3: h ^= data[2] << 16;
-	case 2: h ^= data[1] << 8;
-	case 1: h ^= data[0];
-	        h *= m;
-	};
-
-	// Do a few final mixes of the hash to ensure the last few
-	// bytes are well-incorporated.
-
-	h ^= h >> 13;
-	h *= m;
-	h ^= h >> 15;
-
-	return h;
-} 
-- 
GitLab