#include #include /* Can be obtained from http://www.pobox.com/~qed/pstdint.h , but can be replaced with if appropriate */ #include "pstdint.h" #ifndef CLOCKS_PER_SEC # ifdef CLK_TCK # define CLOCKS_PER_SEC (CLK_TCK) # endif #endif /* Hash performance test for various hash functions */ /* http://burtleburtle.net/bob/hash/doobs.html */ typedef uint32_t ub4; /* unsigned 4-byte quantities */ typedef uint8_t ub1; /* unsigned 1-byte quantities */ #undef get32bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get32bits(d) (*((const uint32_t *) (d))) #endif #if !defined (get32bits) #define get32bits(d) ( (uint32_t)(d)[0] \ + ((uint32_t)(d)[1]<>13); b -= c; a ^= x; b -= a; x = (a<<8); c -= a; b ^= x; c -= b; x = (b>>13); ... Unfortunately, superscalar Pentiums and Sparcs can't take advantage of that parallelism. They've also turned some of those single-cycle latency instructions into multi-cycle latency instructions. Still, this is the fastest good hash I could find. There were about 2^^68 to choose from. I only looked at a billion or so. -------------------------------------------------------------------- */ #define mix(a,b,c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } /* -------------------------------------------------------------------- hash() -- hash a variable-length key into a 32-bit value k : the key (the unaligned variable-length array of bytes) len : the length of the key, counting by bytes initval : can be any 4-byte value Returns a 32-bit value. Every bit of the key affects every bit of the return value. Every 1-bit and 2-bit delta achieves avalanche. About 6*len+35 instructions. The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. For example, if you need only 10 bits, do h = (h & hashmask(10)); In which case, the hash table should have hashsize(10) elements. If you are hashing n strings (ub1 **)k, do it like this: for (i=0, h=0; i= 12) { a += get32bits (k); b += get32bits (k+4); c += get32bits (k+8); mix(a,b,c); k += 12; len -= 12; } /*------------------------------------- handle the last 11 bytes */ c += length; switch(len) { /* all the case statements fall through */ case 11: c+=((ub4)k[10]<<24); case 10: c+=((ub4)k[9]<<16); case 9 : c+=((ub4)k[8]<<8); /* the first byte of c is reserved for the length */ case 8 : b+=((ub4)k[7]<<24); case 7 : b+=((ub4)k[6]<<16); case 6 : b+=((ub4)k[5]<<8); case 5 : b+=k[4]; case 4 : a+=((ub4)k[3]<<24); case 3 : a+=((ub4)k[2]<<16); case 2 : a+=((ub4)k[1]<<8); case 1 : a+=k[0]; /* case 0: nothing left to add */ } mix(a,b,c); /*-------------------------------------------- report the result */ return c; } uint32_t hashBobJenkins (const char * k, int length) { return hashBobJenkinsUpdate (k, length, 0); } /* ======================================================================== */ static uint32_t crc_16_table[16] = { 0x0000, 0xCC01, 0xD801, 0x1400, 0xF001, 0x3C00, 0x2800, 0xE401, 0xA001, 0x6C00, 0x7800, 0xB401, 0x5000, 0x9C01, 0x8801, 0x4400 }; /* * This code was found at: http://wannabe.guru.org/alg/node191.html * and still exists here: http://www.fearme.com/misc/alg/node191.html * * this source code is based on Rex and Binstock which, in turn, * acknowledges William James Hunt. * * According to the site this CRC uses the polynomial x^16+x^5+x^2+1. * Unfortunately, DOCSIS uses x^16+x^12+x^5+1. D'oh! */ static uint32_t GetCRC16Update (uint32_t start_crc, const char * data_stream, int length) { uint32_t crc = start_crc; uint32_t r; /* while there is more data to process */ while (length-- > 0) { /* compute checksum of lower four bits of *data_stream */ r = crc_16_table[crc & 0xF]; crc = (crc >> 4) & 0x0FFF; crc ^= r ^ crc_16_table[*data_stream & 0xF]; /* now compute checksum of upper four bits of *data_stream */ r = crc_16_table[crc & 0xF]; crc = (crc >> 4) & 0x0FFF; crc ^= r ^ crc_16_table[(*data_stream >> 4) & 0xF]; /* next... */ data_stream++; } return crc; } uint32_t GetCRC16 (const char * data_stream, int length) { return GetCRC16Update (0, data_stream, length); } /* ======================================================================== */ static uint32_t crc_table[256]; /* This code was found at: * http://cell.onecall.net/cell-relay/publications/software/CRC/32bitCRC.c.html */ /* */ /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ /* the high-bit first (Big-Endian) bit ordering convention */ /* */ /* Synopsis: */ /* gen_crc_table() -- generates a 256-word table containing all CRC */ /* remainders for every possible 8-bit byte. It */ /* must be executed (once) before any CRC updates. */ /* */ /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ /* Returns the updated value of the CRC accumulator after */ /* processing each byte in the addressed block of data. */ /* */ /* It is assumed that an unsigned long is at least 32 bits wide and */ /* that the predefined type char occupies one 8-bit byte of storage. */ /* */ /* The generator polynomial used for this version of the package is */ /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ /* Other degree 32 polynomials may be substituted by re-defining the */ /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ /* multiplied by an appropriate power of x. The representation used */ /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ /* word and the coefficient of x^31 is stored in the most significant */ /* bit. The CRC is to be appended to the data most significant byte */ /* first. For those protocols in which bytes are transmitted MSB */ /* first and in the same order as they are encountered in the block */ /* this convention results in the CRC remainder being transmitted with */ /* the coefficient of x^31 first and with that of x^0 last (just as */ /* would be done by a hardware shift register mechanization). */ /* */ /* The table lookup technique was adapted from the algorithm described */ /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ /* generate the table of CRC remainders for all possible bytes */ #define CRC32POLYNOMIAL 0x04c11db7L static void GenerateCRC32Table (void) { register int i, j; register uint32_t crc_accum; for ( i = 0; i < 256; i++ ) { crc_accum = ( (unsigned long) i << 24 ); for ( j = 0; j < 8; j++ ) { if ( crc_accum & 0x80000000L ) { crc_accum = ( crc_accum << 1 ) ^ CRC32POLYNOMIAL; } else { crc_accum = ( crc_accum << 1 ); } } crc_table[i] = crc_accum; } return; } /* update the CRC on the data block one byte at a time */ static uint32_t UpdateCRC32 (uint32_t crc_accum, const char *data_blk_ptr, int data_blk_size) { register int j; register uint8_t i; for (j = 0; j < data_blk_size; j++) { i = (crc_accum >> 24) ^ *data_blk_ptr++; crc_accum = (crc_accum << 8) ^ crc_table[i]; } return crc_accum; } uint32_t GetCRC32 (const char * data_stream, int length) { return UpdateCRC32 (0, data_stream, length); } /* ======================================================================== */ /* Performs two parallel CRC-32 on even and odd bytes of the input, then combines the two in a further CRC-32 calculation */ uint32_t GetCRC32PH (const char *data_blk_ptr, int data_blk_size) { int j; uint8_t i0, i1; uint32_t crc_accum0 = 0, crc_accum1 = UINT32_C (0x23456789); if (data_blk_size & 1) crc_accum0 ^= *data_blk_ptr++; for (j = 1; j < data_blk_size; j+=2) { i0 = ((crc_accum0 >> 24) ^ *data_blk_ptr++); i1 = ((crc_accum1 >> 24) ^ *data_blk_ptr++); crc_accum0 = (crc_accum0 << 8) ^ crc_table[i0]; crc_accum1 = (crc_accum1 << 8) ^ crc_table[i1]; } return crc_accum0 + crc_accum1; } /* ======================================================================== */ /* Fowler / Noll / Vo (FNV) Hash http://www.isthe.com/chongo/tech/comp/fnv/ */ uint32_t FNVHash (const char * data, int len) { int i; uint32_t hash; hash = UINT32_C(2166136261); for (i=0; i < len; i++) { hash = (UINT32_C(16777619) * hash) ^ data[i]; } return hash; } /* ======================================================================== */ /* * http://burtleburtle.net/bob/hash/doobs.html */ uint32_t oneAtATimeHash (const char * s, int len) { int32_t hash; int i; for (hash = 0, i = 0; i < len; i++) { hash += s[i]; hash += (hash << 10); hash ^= (hash >> 6); /* Non-portable due to ANSI C */ } hash += (hash << 3); hash ^= (hash >> 11); /* Non-portable due to ANSI C */ hash += (hash << 15); return (uint32_t) hash; } /* ======================================================================== */ uint32_t oneAtATimeHashPH (const char * s, int len) { int32_t hash0 = 0, hash1 = INT32_C(0x23456789); int i; if (len & 1) hash1 ^= *s++; for (i = 1; i < len; i+=2) { hash0 += *s++; hash1 += *s++; hash0 += (hash0 << 10); hash1 += (hash1 << 10); hash0 ^= (hash0 >> 6); /* Non-portable due to ANSI C */ hash1 ^= (hash1 >> 6); /* Non-portable due to ANSI C */ } hash0 += hash1; hash0 += (hash0 << 3); hash0 ^= (hash0 >> 11); /* Non-portable due to ANSI C */ hash0 += (hash0 << 15); return (uint32_t) hash0; } /* ======================================================================== */ /* By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh derivative license. See: http://www.azillionmonkeys.com/qed/weblicense.html for license details. http://www.azillionmonkeys.com/qed/hash.html */ #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) #define get16bits(d) (*((const uint16_t *) (d))) #endif #if !defined (get16bits) #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ +(uint32_t)(((const uint8_t *)(d))[0]) ) #endif uint32_t SuperFastHash (const char * data, int len) { uint32_t hash = 0, tmp; int rem; if (len <= 0 || data == NULL) return 0; rem = len & 3; len >>= 2; /* Main loop */ for (;len > 0; len--) { hash += get16bits (data); tmp = (get16bits (data+2) << 11) ^ hash; hash = (hash << 16) ^ tmp; data += 2*sizeof (uint16_t); hash += hash >> 11; } /* Handle end cases */ switch (rem) { case 3: hash += get16bits (data); hash ^= hash << 16; hash ^= data[sizeof (uint16_t)] << 18; hash += hash >> 11; break; case 2: hash += get16bits (data); hash ^= hash << 11; hash += hash >> 17; break; case 1: hash += *data; hash ^= hash << 10; hash += hash >> 1; } /* Force "avalanching" of final 127 bits */ hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } uint32_t SuperFastHashAsm (const char * data, int len) { uint32_t hash = 0; if (len <= 0 || data == NULL) return 0; #if defined(__WATCOMC__) || defined(_MSC_VER) __asm { xor ebx, ebx mov esi, data mov edi, len mov eax, edi mov ecx, edi and edi, 3 shr ecx, 2 jz L2 L1: movzx ebx, word ptr [esi] ; 0 add eax, ebx ; 1 movzx ebx, word ptr [esi+2] ; 0 shl ebx, 11 ; 1 xor ebx, eax ; 2 shl eax, 16 ; 2 add esi, 4 ; 0 xor eax, ebx ; 3 mov edx, eax ; 4 shr eax, 11 ; 4 add eax, edx ; 5 dec ecx ; 2 jnz L1 L2: mov ecx, edi cmp ecx, 1 jz lcase1 cmp ecx, 2 jz lcase2 cmp ecx, 3 jnz L3 mov bx, word ptr [esi] add eax, ebx xor ebx, ebx mov edx, eax shl eax, 16 xor eax, edx movsx ebx, byte ptr [esi+2] shl ebx, 18 xor eax, ebx mov edx, eax shr eax, 11 add eax, edx jmp L3 lcase2: mov bx, word ptr [esi] add eax, ebx mov edx, eax shl eax, 11 xor eax, edx mov edx, eax shr eax, 17 add eax, edx jmp L3 lcase1: movsx ebx, byte ptr [esi] add eax, ebx mov edx, eax shl eax, 10 xor eax, edx mov edx, eax shr eax, 1 add eax, edx L3: mov ebx, eax shl eax, 3 xor eax, ebx mov ebx, eax shr eax, 5 add eax, ebx mov ebx, eax shl eax, 4 xor eax, ebx mov ebx, eax shr eax, 17 add eax, ebx mov ebx, eax shl eax, 25 xor eax, ebx mov ebx, eax shr eax, 6 add eax, ebx mov hash, eax } #endif return hash; } /* ======================================================================== */ /* A hashing function that I believe was created by either Chris Torek or Dan Bernstein */ uint32_t alphaNumHash (const char * s, int len) { uint32_t h; int i; for (h = 0, i = 0; i < len; i++) { h = (h << 5) + (h * 5) + (unsigned char) s[i]; } return h; } /* ======================================================================== */ uint32_t foldBits (uint32_t k, uint32_t n) { uint32_t p = 0; if ((n + ~UINT32_C(0)) >= 31) return k; while (k) { p ^= k; k = k >> n; } return p & ((1 << n) - 1); } typedef uint32_t (* hashFn) (const char * s, int len); #define BUFF_SZ (128*2) #define NTESTS INT32_C(5000000) double test (hashFn hash) { static char buff[BUFF_SZ]; clock_t c0, c1; int32_t i; for (buff[0]=0, i=1; i < BUFF_SZ; i++) buff[i] = (char) (i + buff[i-1]); c0 = clock (); for (i=0; i < NTESTS; i++) hash (buff, BUFF_SZ); c1 = clock (); return (c1 - c0)*(1.0 / (double)CLOCKS_PER_SEC); } struct tagtest { double res; char * name; hashFn hash; } tests[] = { #if 0 { 0.0, "CRC16\t\t", GetCRC16 }, { 0.0, "oneAtATimeHashPH", oneAtATimeHashPH }, { 0.0, "CRC32PH\t\t", GetCRC32PH }, { 0.0, "SFHAsm\t\t", SuperFastHashAsm }, #endif { 0.0, "CRC32\t\t", GetCRC32 }, { 0.0, "oneAtATimeHash\t", oneAtATimeHash }, { 0.0, "alphaNumHash\t", alphaNumHash }, { 0.0, "FNVHash\t\t", FNVHash }, { 0.0, "BobJenkins\t", hashBobJenkins }, { 0.0, "SuperFastHash\t", SuperFastHash }, { 0.0, NULL, NULL } }; int main () { int i, j; GenerateCRC32Table (); for (j=0; tests[j].name != NULL; j++) { for (i=0; i < 3; i++) { double res = test (tests[j].hash); if (tests[j].res == 0.0 || tests[j].res > res) tests[j].res = res; } printf ("%s:%8.4fs\n", tests[j].name, tests[j].res); } return 0; }