Skip to content

Commit

Permalink
lib/hash: Use CRC32 for hashing.
Browse files Browse the repository at this point in the history
Use CRC32 intrinsics for hash computations when building for
X86_64 with SSE4_2.

Add a new hash_words64() and change hash_words() to be inlined when
'n_words' is a compile-time constant.

Signed-off-by: Jarno Rajahalme <[email protected]>
Acked-by: Ben Pfaff <[email protected]>
  • Loading branch information
Jarno Rajahalme committed Jul 11, 2014
1 parent afae68b commit ff8eeab
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 30 deletions.
27 changes: 12 additions & 15 deletions lib/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,6 @@ hash_bytes(const void *p_, size_t n, uint32_t basis)
return hash_finish(hash, orig_n);
}

/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
* 'p' must be properly aligned. */
uint32_t
hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
{
uint32_t hash;
size_t i;

hash = basis;
for (i = 0; i < n_words; i++) {
hash = hash_add(hash, p[i]);
}
return hash_finish(hash, n_words * 4);
}

uint32_t
hash_double(double x, uint32_t basis)
{
Expand All @@ -74,3 +59,15 @@ hash_double(double x, uint32_t basis)
memcpy(value, &x, sizeof value);
return hash_3words(value[0], value[1], basis);
}

uint32_t
hash_words__(const uint32_t p[], size_t n_words, uint32_t basis)
{
return hash_words_inline(p, n_words, basis);
}

uint32_t
hash_words64__(const uint64_t p[], size_t n_words, uint64_t basis)
{
return hash_words64_inline(p, n_words, basis);
}
208 changes: 195 additions & 13 deletions lib/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ hash_rot(uint32_t x, int k)
return (x << k) | (x >> (32 - k));
}

uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis);
uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);

static inline uint32_t hash_int(uint32_t x, uint32_t basis);
Expand Down Expand Up @@ -83,6 +82,9 @@ static inline uint32_t mhash_finish(uint32_t hash, uint32_t n_bytes)
return hash;
}

#if !(defined(__SSE4_2__) && defined(__x86_64__))
/* Mhash-based implemantation. */

static inline uint32_t hash_add(uint32_t hash, uint32_t data)
{
return mhash_add(hash, data);
Expand All @@ -93,23 +95,29 @@ static inline uint32_t hash_finish(uint32_t hash, uint32_t final)
return mhash_finish(hash, final);
}

static inline uint32_t hash_string(const char *s, uint32_t basis)
/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
* 'p' must be properly aligned.
*
* This is inlined for the compiler to have access to the 'n_words', which
* in many cases is a constant. */
static inline uint32_t
hash_words_inline(const uint32_t p[], size_t n_words, uint32_t basis)
{
return hash_bytes(s, strlen(s), basis);
}
uint32_t hash;
size_t i;

static inline uint32_t hash_int(uint32_t x, uint32_t basis)
{
return hash_2words(x, basis);
hash = basis;
for (i = 0; i < n_words; i++) {
hash = hash_add(hash, p[i]);
}
return hash_finish(hash, n_words * 4);
}

/* An attempt at a useful 1-bit hash function. Has not been analyzed for
* quality. */
static inline uint32_t hash_boolean(bool x, uint32_t basis)
static inline uint32_t
hash_words64_inline(const uint64_t p[], size_t n_words, uint64_t basis)
{
const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */
const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */
return (x ? P0 : P1) ^ hash_rot(basis, 1);
return hash_words_inline((uint32_t *)p, n_words * 2,
(uint32_t)basis ^ basis >> 32);
}

static inline uint32_t hash_pointer(const void *p, uint32_t basis)
Expand Down Expand Up @@ -140,6 +148,180 @@ static inline uint32_t hash_uint64_basis(const uint64_t x,
{
return hash_3words((uint32_t)(x >> 32), (uint32_t)x, basis);
}

#else /* __SSE4_2__ && __x86_64__ */
#include <smmintrin.h>

static inline uint32_t hash_add(uint32_t hash, uint32_t data)
{
return _mm_crc32_u32(hash, data);
}

static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
{
/* The finishing multiplier 0x805204f3 has been experimentally
* derived to pass the testsuite hash tests. */
hash = _mm_crc32_u64(hash, final) * 0x805204f3;
return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
}

/* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
* We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
*
* This is inlined for the compiler to have access to the 'n_words', which
* in many cases is a constant. */
static inline uint32_t
hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
{
const uint64_t *p = (const void *)p_;
uint64_t hash1 = basis;
uint64_t hash2 = 0;
uint64_t hash3 = n_words;
const uint32_t *endp = (const uint32_t *)p + n_words;
const uint64_t *limit = p + n_words / 2 - 3;

while (p <= limit) {
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u64(hash2, p[1]);
hash3 = _mm_crc32_u64(hash3, p[2]);
p += 3;
}
switch (endp - (const uint32_t *)p) {
case 1:
hash1 = _mm_crc32_u32(hash1, *(const uint32_t *)&p[0]);
break;
case 2:
hash1 = _mm_crc32_u64(hash1, p[0]);
break;
case 3:
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u32(hash2, *(const uint32_t *)&p[1]);
break;
case 4:
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u64(hash2, p[1]);
break;
case 5:
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u64(hash2, p[1]);
hash3 = _mm_crc32_u32(hash3, *(const uint32_t *)&p[2]);
break;
}
return hash_finish(hash1, hash2 << 32 | hash3);
}

/* A simpler version for 64-bit data.
* 'n_words' is the count of 64-bit words, basis is 64 bits. */
static inline uint32_t
hash_words64_inline(const uint64_t p[], size_t n_words, uint64_t basis)
{
uint64_t hash1 = (uint32_t)basis;
uint64_t hash2 = basis >> 32;
uint64_t hash3 = n_words;
const uint64_t *endp = p + n_words;
const uint64_t *limit = endp - 3;

while (p <= limit) {
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u64(hash2, p[1]);
hash3 = _mm_crc32_u64(hash3, p[2]);
p += 3;
}
switch (endp - p) {
case 1:
hash1 = _mm_crc32_u64(hash1, p[0]);
break;
case 2:
hash1 = _mm_crc32_u64(hash1, p[0]);
hash2 = _mm_crc32_u64(hash2, p[1]);
break;
}
return hash_finish(hash1, hash2 << 32 | hash3);
}

static inline uint32_t hash_uint64_basis(const uint64_t x,
const uint32_t basis)
{
/* '23' chosen to mix bits enough for the test-hash to pass. */
return hash_finish(_mm_crc32_u64(basis, x), 23);
}

static inline uint32_t hash_uint64(const uint64_t x)
{
return hash_uint64_basis(x, 0);
}

static inline uint32_t hash_2words(uint32_t x, uint32_t y)
{
return hash_uint64((uint64_t)y << 32 | x);
}

static inline uint32_t hash_pointer(const void *p, uint32_t basis)
{
return hash_uint64_basis((uint64_t) (uintptr_t) p, basis);
}
#endif

uint32_t hash_words__(const uint32_t p[], size_t n_words, uint32_t basis);
uint32_t hash_words64__(const uint64_t p[], size_t n_words, uint64_t basis);

/* Inline the larger hash functions only when 'n_words' is known to be
* compile-time constant. */
#if __GNUC__ >= 4
static inline uint32_t
hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
{
if (__builtin_constant_p(n_words)) {
return hash_words_inline(p, n_words, basis);
} else {
return hash_words__(p, n_words, basis);
}
}

static inline uint32_t
hash_words64(const uint64_t p[], size_t n_words, uint64_t basis)
{
if (__builtin_constant_p(n_words)) {
return hash_words64_inline(p, n_words, basis);
} else {
return hash_words64__(p, n_words, basis);
}
}

#else

static inline uint32_t
hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
{
return hash_words__(p, n_words, basis);
}

static inline uint32_t
hash_words64(const uint64_t p[], size_t n_words, uint64_t basis)
{
return hash_words64__(p, n_words, basis);
}
#endif

static inline uint32_t hash_string(const char *s, uint32_t basis)
{
return hash_bytes(s, strlen(s), basis);
}

static inline uint32_t hash_int(uint32_t x, uint32_t basis)
{
return hash_2words(x, basis);
}

/* An attempt at a useful 1-bit hash function. Has not been analyzed for
* quality. */
static inline uint32_t hash_boolean(bool x, uint32_t basis)
{
const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */
const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */
return (x ? P0 : P1) ^ hash_rot(basis, 1);
}

#ifdef __cplusplus
}
#endif
Expand Down
11 changes: 9 additions & 2 deletions tests/test-hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,22 @@ check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t),

for (i = 0; i <= 96; i++) {
for (j = i + 1; j <= 96; j++) {
uint32_t in1[3], in2[3];
uint32_t out1, out2;
uint32_t in0[3], in1[3], in2[3];
uint32_t out0,out1, out2;
const int min_unique = 12;
const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1;

set_bit(in0, i);
set_bit(in1, i);
set_bit(in2, j);
out0 = hash(in0, 3, 0);
out1 = hash(in1, 3, 0);
out2 = hash(in2, 3, 0);

if (out0 != out1) {
printf("%s hash not the same for non-64 aligned data "
"%08"PRIx32" != %08"PRIx32"\n", name, out0, out1);
}
if ((out1 & unique_mask) == (out2 & unique_mask)) {
printf("%s has a partial collision:\n", name);
printf("hash(1 << %d) == %08"PRIx32"\n", i, out1);
Expand Down

0 comments on commit ff8eeab

Please sign in to comment.