Skip to content

Commit

Permalink
hash: Replace primary hash functions by murmurhash.
Browse files Browse the repository at this point in the history
murmurhash is faster than Jenkins and slightly higher quality, so switch to
it for hashing words.

The best timings I got for hashing for data lengths of the following
numbers of 32-bit words, in seconds per 1,000,000,000 hashes, were:

words     murmurhash      Jenkins hash
-----     ----------      ------------
   1           8.4              10.4
   2          10.3              10.3
   3          11.2              10.7
   4          12.6              18.0
   5          13.9              18.3
   6          15.2              18.7

In other words, murmurhash outperforms Jenkins for all input lengths other
than exactly 3 32-bit words (12 bytes).  (It's understandable that Jenkins
would have a best case at 12 bytes, because Jenkins works in 12-byte
chunks.)  Even in the case where Jenkins is faster, it's only by 5%.  On
average within this data set, murmurhash is 15% faster, and for 4-word
input it is 30% faster.

We retain Jenkins for flow_hash_symmetric_l4() and flow_hash_fields(),
which are cases where the hash value is exposed externally.

This commit appears to improve "ovs-benchmark rate" results slightly by
a few hundred connections per second (under 1%), when used with an NVP
controller.

Signed-off-by: Ben Pfaff <[email protected]>
Acked-by: Ethan Jackson <[email protected]>
  • Loading branch information
blp committed Jan 22, 2013
1 parent cb8ca81 commit c49d1dd
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 169 deletions.
2 changes: 2 additions & 0 deletions lib/automake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ lib_libopenvswitch_a_SOURCES = \
lib/hmap.h \
lib/hmapx.c \
lib/hmapx.h \
lib/jhash.c \
lib/jhash.h \
lib/json.c \
lib/json.h \
lib/jsonrpc.c \
Expand Down
5 changes: 3 additions & 2 deletions lib/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "csum.h"
#include "dynamic-string.h"
#include "hash.h"
#include "jhash.h"
#include "match.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
Expand Down Expand Up @@ -722,7 +723,7 @@ flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis)
fields.tp_port = flow->tp_src ^ flow->tp_dst;
}
}
return hash_bytes(&fields, sizeof fields, basis);
return jhash_bytes(&fields, sizeof fields, basis);
}

/* Hashes the portions of 'flow' designated by 'fields'. */
Expand All @@ -733,7 +734,7 @@ flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields,
switch (fields) {

case NX_HASH_FIELDS_ETH_SRC:
return hash_bytes(flow->dl_src, sizeof flow->dl_src, basis);
return jhash_bytes(flow->dl_src, sizeof flow->dl_src, basis);

case NX_HASH_FIELDS_SYMMETRIC_L4:
return flow_hash_symmetric_l4(flow, basis);
Expand Down
91 changes: 24 additions & 67 deletions lib/hash.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
* Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,95 +18,42 @@
#include <string.h>
#include "unaligned.h"

/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
* 'p' must be properly aligned. */
uint32_t
hash_words(const uint32_t *p, size_t n, uint32_t basis)
{
uint32_t a, b, c;

a = b = c = 0xdeadbeef + (((uint32_t) n) << 2) + basis;

while (n > 3) {
a += p[0];
b += p[1];
c += p[2];
hash_mix(&a, &b, &c);
n -= 3;
p += 3;
}

switch (n) {
case 3:
c += p[2];
/* fall through */
case 2:
b += p[1];
/* fall through */
case 1:
a += p[0];
hash_final(&a, &b, &c);
/* fall through */
case 0:
break;
}
return c;
}

/* Returns the hash of 'a', 'b', and 'c'. */
uint32_t
hash_3words(uint32_t a, uint32_t b, uint32_t c)
{
a += 0xdeadbeef;
b += 0xdeadbeef;
c += 0xdeadbeef;
hash_final(&a, &b, &c);
return c;
}

/* Returns the hash of 'a' and 'b'. */
uint32_t
hash_2words(uint32_t a, uint32_t b)
{
return hash_3words(a, b, 0);
return mhash_finish(mhash_add(mhash_add(mhash_add(a, 0), b), c), 12);
}

/* Returns the hash of the 'n' bytes at 'p', starting from 'basis'. */
uint32_t
hash_bytes(const void *p_, size_t n, uint32_t basis)
{
const uint8_t *p = p_;
uint32_t a, b, c;

a = b = c = 0xdeadbeef + n + basis;
size_t orig_n = n;
uint32_t hash;

while (n >= 12) {
a += get_unaligned_u32((uint32_t *) p);
b += get_unaligned_u32((uint32_t *) (p + 4));
c += get_unaligned_u32((uint32_t *) (p + 8));
hash_mix(&a, &b, &c);
n -= 12;
p += 12;
hash = basis;
while (n >= 4) {
hash = mhash_add(hash, get_unaligned_u32((const uint32_t *) p));
n -= 4;
p += 4;
}

if (n) {
uint32_t tmp[3];
uint32_t tmp = 0;

tmp[0] = tmp[1] = tmp[2] = 0;
memcpy(tmp, p, n);
a += tmp[0];
b += tmp[1];
c += tmp[2];
hash_final(&a, &b, &c);
memcpy(&tmp, p, n);
hash = mhash_add__(hash, tmp);
}

return c;
return mhash_finish(hash, orig_n);
}

/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
* 'p' must be properly aligned. */
uint32_t
mhash_words(const uint32_t p[], size_t n_words, uint32_t basis)
hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
{
uint32_t hash;
size_t i;
Expand All @@ -117,3 +64,13 @@ mhash_words(const uint32_t p[], size_t n_words, uint32_t basis)
}
return mhash_finish(hash, n_words * 4);
}

uint32_t
hash_double(double x, uint32_t basis)
{
uint32_t value[2];
BUILD_ASSERT_DECL(sizeof x == sizeof value);

memcpy(value, &x, sizeof value);
return hash_3words(value[0], value[1], basis);
}
134 changes: 46 additions & 88 deletions lib/hash.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
* Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,65 +26,69 @@
extern "C" {
#endif

/* This is the public domain lookup3 hash by Bob Jenkins from
* http://burtleburtle.net/bob/c/lookup3.c, modified for style. */

static inline uint32_t
hash_rot(uint32_t x, int k)
{
return (x << k) | (x >> (32 - k));
}

static inline void
hash_mix(uint32_t *a, uint32_t *b, uint32_t *c)
uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis);
uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);

static inline uint32_t hash_int(uint32_t x, uint32_t basis);
static inline uint32_t hash_2words(uint32_t, uint32_t);
uint32_t hash_3words(uint32_t, uint32_t, uint32_t);

static inline uint32_t hash_boolean(bool x, uint32_t basis);
uint32_t hash_double(double, uint32_t basis);

static inline uint32_t hash_pointer(const void *, uint32_t basis);
static inline uint32_t hash_string(const char *, uint32_t basis);

/* Murmurhash by Austin Appleby,
* from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
*
* The upstream license there says:
*
* // MurmurHash3 was written by Austin Appleby, and is placed in the public
* // domain. The author hereby disclaims copyright to this source code.
*
* See hash_words() for sample usage. */

static inline uint32_t mhash_add__(uint32_t hash, uint32_t data)
{
*a -= *c; *a ^= hash_rot(*c, 4); *c += *b;
*b -= *a; *b ^= hash_rot(*a, 6); *a += *c;
*c -= *b; *c ^= hash_rot(*b, 8); *b += *a;
*a -= *c; *a ^= hash_rot(*c, 16); *c += *b;
*b -= *a; *b ^= hash_rot(*a, 19); *a += *c;
*c -= *b; *c ^= hash_rot(*b, 4); *b += *a;
data *= 0xcc9e2d51;
data = hash_rot(data, 15);
data *= 0x1b873593;
return hash ^ data;
}

static inline void
hash_final(uint32_t *a, uint32_t *b, uint32_t *c)
static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
{
*c ^= *b; *c -= hash_rot(*b, 14);
*a ^= *c; *a -= hash_rot(*c, 11);
*b ^= *a; *b -= hash_rot(*a, 25);
*c ^= *b; *c -= hash_rot(*b, 16);
*a ^= *c; *a -= hash_rot(*c, 4);
*b ^= *a; *b -= hash_rot(*a, 14);
*c ^= *b; *c -= hash_rot(*b, 24);
hash = mhash_add__(hash, data);
hash = hash_rot(hash, 13);
return hash * 5 + 0xe6546b64;
}

uint32_t hash_words(const uint32_t *, size_t n_word, uint32_t basis);
uint32_t hash_2words(uint32_t, uint32_t);
uint32_t hash_3words(uint32_t, uint32_t, uint32_t);
uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
static inline uint32_t mhash_finish(uint32_t hash, size_t n_bytes)
{
hash ^= n_bytes;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
}

static inline uint32_t hash_string(const char *s, uint32_t basis)
{
return hash_bytes(s, strlen(s), basis);
}

/* This is Bob Jenkins' integer hash from
* http://burtleburtle.net/bob/hash/integer.html, modified for style.
*
* This hash is faster than hash_2words(), but it isn't as good when 'basis' is
* important. So use this function for speed or hash_2words() for hash
* quality. */
static inline uint32_t hash_int(uint32_t x, uint32_t basis)
{
x -= x << 6;
x ^= x >> 17;
x -= x << 9;
x ^= x << 4;
x += basis;
x -= x << 3;
x ^= x << 10;
x ^= x >> 15;
return x;
return hash_2words(x, basis);
}

/* An attempt at a useful 1-bit hash function. Has not been analyzed for
Expand All @@ -96,15 +100,6 @@ static inline uint32_t hash_boolean(bool x, uint32_t basis)
return (x ? P0 : P1) ^ hash_rot(basis, 1);
}

static inline uint32_t hash_double(double x, uint32_t basis)
{
uint32_t value[2];
BUILD_ASSERT_DECL(sizeof x == sizeof value);

memcpy(value, &x, sizeof value);
return hash_3words(value[0], value[1], basis);
}

static inline uint32_t hash_pointer(const void *p, uint32_t basis)
{
/* Often pointers are hashed simply by casting to integer type, but that
Expand All @@ -118,46 +113,9 @@ static inline uint32_t hash_pointer(const void *p, uint32_t basis)
return hash_int((uint32_t) (uintptr_t) p, basis);
}

/* Murmurhash by Austin Appleby,
* from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
*
* The upstream license there says:
*
* // MurmurHash3 was written by Austin Appleby, and is placed in the public
* // domain. The author hereby disclaims copyright to this source code.
*
* Murmurhash is faster and higher-quality than the Jenkins lookup3 hash. When
* we have a little more familiarity with it, it's probably a good idea to
* switch all of OVS to it.
*
* For now, we have this implementation here for use by code that needs a hash
* that is convenient for use one word at a time, since the Jenkins lookup3
* hash works three words at a time.
*
* See mhash_words() for sample usage. */

uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis);

static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
static inline uint32_t hash_2words(uint32_t x, uint32_t y)
{
data *= 0xcc9e2d51;
data = hash_rot(data, 15);
data *= 0x1b873593;

hash ^= data;
hash = hash_rot(hash, 13);
return hash * 5 + 0xe6546b64;
}

static inline uint32_t mhash_finish(uint32_t hash, size_t n_bytes)
{
hash ^= n_bytes;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
return mhash_finish(mhash_add(mhash_add(x, 0), y), 4);
}

#ifdef __cplusplus
Expand Down
Loading

0 comments on commit c49d1dd

Please sign in to comment.