forked from tildeleb/hashland
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added version of farm hash from deleted repo
- Loading branch information
Showing
12 changed files
with
895 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
As this is a highly derivative work, I have placed it under the same license as the original implementation: | ||
|
||
// Copyright (c) 2014 Damian Gryski | ||
// | ||
// Permission is hereby granted, free of charge, to any person obtaining a copy | ||
// of this software and associated documentation files (the "Software"), to deal | ||
// in the Software without restriction, including without limitation the rights | ||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
// copies of the Software, and to permit persons to whom the Software is | ||
// furnished to do so, subject to the following conditions: | ||
// | ||
// The above copyright notice and this permission notice shall be included in | ||
// all copies or substantial portions of the Software. | ||
// | ||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
// THE SOFTWARE. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
go-farm: Google's FarmHash in pure Go | ||
|
||
This is a (mechanical) translation of the non-SSE4/non-AESNI hash functions from FarmHash. | ||
|
||
For more information on FarmHash, please see https://code.google.com/p/farmhash | ||
|
||
For a cgo library wrapping the C++ one, please see https://github.com/dgryski/go-farmhash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package farm | ||
|
||
// Some primes between 2^63 and 2^64 for various uses. | ||
const k0 uint64 = 0xc3a5c85c97cb3127 | ||
const k1 uint64 = 0xb492b66fbe98f273 | ||
const k2 uint64 = 0x9ae16a3b2f90404f | ||
|
||
// Magic numbers for 32-bit hashing. Copied from Murmur3. | ||
const c1 uint32 = 0xcc9e2d51 | ||
const c2 uint32 = 0x1b873593 | ||
|
||
// A 32-bit to 32-bit integer hash copied from Murmur3. | ||
func fmix(h uint32) uint32 { | ||
h ^= h >> 16 | ||
h *= 0x85ebca6b | ||
h ^= h >> 13 | ||
h *= 0xc2b2ae35 | ||
h ^= h >> 16 | ||
return h | ||
} | ||
|
||
func mur(a, h uint32) uint32 { | ||
// Helper from Murmur3 for combining two 32-bit values. | ||
a *= c1 | ||
a = rotate32(a, 17) | ||
a *= c2 | ||
h ^= a | ||
h = rotate32(h, 19) | ||
return h*5 + 0xe6546b64 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package farm | ||
|
||
import "testing" | ||
|
||
func BenchmarkShort(b *testing.B) { | ||
|
||
buf := make([]byte, 32) | ||
|
||
for i := 0; i < b.N; i++ { | ||
Hash32(buf) | ||
} | ||
} | ||
|
||
func BenchmarkHash64(b *testing.B) { | ||
|
||
buf := make([]byte, 2048) | ||
|
||
for i := 0; i < b.N; i++ { | ||
Hash64(buf) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package farm | ||
|
||
import "testing" | ||
|
||
// Generated from the C++ code | ||
var golden32 = []struct { | ||
out uint32 | ||
in string | ||
}{ | ||
{0x3c973d4d, "a"}, | ||
{0x417330fd, "ab"}, | ||
{0x2f635ec7, "abc"}, | ||
{0x98b51e95, "abcd"}, | ||
{0xa3f366ac, "abcde"}, | ||
{0x0f813aa4, "abcdef"}, | ||
{0x21deb6d7, "abcdefg"}, | ||
{0xfd7ec8b9, "abcdefgh"}, | ||
{0x6f98dc86, "abcdefghi"}, | ||
{0xf2669361, "abcdefghij"}, | ||
{0xe273108f, "Discard medicine more than two years old."}, | ||
{0xf585dfc4, "He who has a shady past knows that nice guys finish last."}, | ||
{0x363394d1, "I wouldn't marry him with a ten foot pole."}, | ||
{0x7613810f, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, | ||
{0x2cc30bb7, "The days of the digital watch are numbered. -Tom Stoppard"}, | ||
{0x322984d9, "Nepal premier won't resign."}, | ||
{0xa5812ac8, "For every action there is an equal and opposite government program."}, | ||
{0x1090d244, "His money is twice tainted: 'taint yours and 'taint mine."}, | ||
{0xff16c9e6, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, | ||
{0xcc3d0ff2, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, | ||
{0xc6246b8d, "size: a.out: bad magic"}, | ||
{0xd225e92e, "The major problem is with sendmail. -Mark Horton"}, | ||
{0x1b8db5d0, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, | ||
{0x4fda5f07, "If the enemy is within range, then so are you."}, | ||
{0x2e18e880, "It's well we cannot hear the screams/That we create in others' dreams."}, | ||
{0xd07de88f, "You remind me of a TV show, but that's all right: I watch it anyway."}, | ||
{0x221694e4, "C is as portable as Stonehedge!!"}, | ||
{0xe2053c2c, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, | ||
{0x11c493bb, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, | ||
{0x0819a4e8, "How can you write a big system without C++? -Paul Glick"}, | ||
} | ||
|
||
func TestHash32(t *testing.T) { | ||
|
||
for _, tt := range golden32 { | ||
if h := Hash32([]byte(tt.in)); h != tt.out { | ||
t.Errorf("Hash32(%q)=%#08x (len=%d), want %#08x", tt.in, h, len(tt.in), tt.out) | ||
} | ||
} | ||
|
||
} | ||
|
||
// Generated from the C++ code | ||
var golden64 = []struct { | ||
out uint64 | ||
in string | ||
}{ | ||
{0xb3454265b6df75e3, "a"}, | ||
{0xaa8d6e5242ada51e, "ab"}, | ||
{0x24a5b3a074e7f369, "abc"}, | ||
{0x1a5502de4a1f8101, "abcd"}, | ||
{0xc22f4663e54e04d4, "abcde"}, | ||
{0xc329379e6a03c2cd, "abcdef"}, | ||
{0x3c40c92b1ccb7355, "abcdefg"}, | ||
{0xfee9d22990c82909, "abcdefgh"}, | ||
{0x332c8ed4dae5ba42, "abcdefghi"}, | ||
{0x8a3abb6a5f3fb7fb, "abcdefghij"}, | ||
{0xe8f89ab6df9bdd25, "Discard medicine more than two years old."}, | ||
{0x786d7e1987023ca9, "He who has a shady past knows that nice guys finish last."}, | ||
{0xa9961670ce2a46d9, "I wouldn't marry him with a ten foot pole."}, | ||
{0x5d14f96c18fe3d5e, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, | ||
{0x2a578b80bb82147c, "The days of the digital watch are numbered. -Tom Stoppard"}, | ||
{0x8eb3808d1ccfc779, "Nepal premier won't resign."}, | ||
{0xb8d104d1135bbc60, "For every action there is an equal and opposite government program."}, | ||
{0xec8848fd3b266c10, "His money is twice tainted: 'taint yours and 'taint mine."}, | ||
{0xfe6aa49558b3cbe0, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, | ||
{0xa104da6f2f575514, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, | ||
{0x80d73b843ba57db8, "size: a.out: bad magic"}, | ||
{0xc2f8db8624fefc0e, "The major problem is with sendmail. -Mark Horton"}, | ||
{0xa58e3702193e4631, "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, | ||
{0xbdd69b798d6ba37a, "If the enemy is within range, then so are you."}, | ||
{0x61751a90ec7d71bf, "It's well we cannot hear the screams/That we create in others' dreams."}, | ||
{0x836f5ff0c2a7dfaa, "You remind me of a TV show, but that's all right: I watch it anyway."}, | ||
{0xb944f8a16261e414, "C is as portable as Stonehedge!!"}, | ||
{0x96d012f9bccb3e, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, | ||
{0x8fe4429d157f60f5, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, | ||
{0x5a0a6efd52e84e2a, "How can you write a big system without C++? -Paul Glick"}, | ||
} | ||
|
||
func TestHash64(t *testing.T) { | ||
for _, tt := range golden64 { | ||
if h := Hash64([]byte(tt.in)); h != tt.out { | ||
t.Errorf("Hash64(%q)=%#016x, (len=%d) want %#016x", tt.in, h, len(tt.in), tt.out) | ||
} | ||
|
||
} | ||
} | ||
|
||
func TestFingerprint128(t *testing.T) { | ||
|
||
var tests = []struct { | ||
hi, lo uint64 | ||
in string | ||
}{ | ||
{9054869399155703984, 8033370924408288235, "abcdef"}, | ||
{352412539875473798, 3547689611939963773, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, | ||
{14320160249354795919, 10805939018293574989, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall RuleAAAAAAAAAAAAAAAA"}, | ||
} | ||
|
||
for _, tt := range tests { | ||
if lo, hi := Fingerprint128([]byte(tt.in)); hi != tt.hi || lo != tt.lo { | ||
t.Errorf("Fingerprint128(%q)=(%#016x, %#016x) (len=%d) want (%#016x, %#016x)", tt.in, lo, hi, len(tt.in), tt.lo, tt.hi) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
package farm | ||
|
||
// This file provides a 32-bit hash equivalent to CityHash32 (v1.1.1) | ||
// and a 128-bit hash equivalent to CityHash128 (v1.1.1). It also provides | ||
// a seeded 32-bit hash function similar to CityHash32. | ||
|
||
func hash32Len13to24Seed(s []byte, seed uint32) uint32 { | ||
slen := len(s) | ||
a := fetch32(s, -4+(slen>>1)) | ||
b := fetch32(s, 4) | ||
c := fetch32(s, slen-8) | ||
d := fetch32(s, (slen >> 1)) | ||
e := fetch32(s, 0) | ||
f := fetch32(s, slen-4) | ||
h := d*c1 + uint32(slen) + seed | ||
a = rotate32(a, 12) + f | ||
h = mur(c, h) + a | ||
a = rotate32(a, 3) + c | ||
h = mur(e, h) + a | ||
a = rotate32(a+f, 12) + d | ||
h = mur(b^seed, h) + a | ||
return fmix(h) | ||
} | ||
|
||
func hash32Len0to4(s []byte, seed uint32) uint32 { | ||
slen := len(s) | ||
b := seed | ||
c := uint32(9) | ||
for i := 0; i < slen; i++ { | ||
v := int8(s[i]) | ||
b = uint32(b*c1) + uint32(v) | ||
c ^= b | ||
} | ||
return fmix(mur(b, mur(uint32(slen), c))) | ||
} | ||
|
||
func hash128to64(x uint128) uint64 { | ||
// Murmur-inspired hashing. | ||
const kMul uint64 = 0x9ddfea08eb382d69 | ||
a := (x.lo ^ x.hi) * kMul | ||
a ^= (a >> 47) | ||
b := (x.hi ^ a) * kMul | ||
b ^= (b >> 47) | ||
b *= kMul | ||
return b | ||
} | ||
|
||
type uint128 struct { | ||
lo uint64 | ||
hi uint64 | ||
} | ||
|
||
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings | ||
// of any length representable in signed long. Based on City and Murmur. | ||
func cityMurmur(s []byte, seed uint128) uint128 { | ||
slen := len(s) | ||
a := seed.lo | ||
b := seed.hi | ||
c := uint64(0) | ||
d := uint64(0) | ||
l := slen - 16 | ||
if l <= 0 { // len <= 16 | ||
a = shiftMix(a*k1) * k1 | ||
c = b*k1 + hashLen0to16(s) | ||
if slen >= 8 { | ||
d = shiftMix(a + fetch64(s, 0)) | ||
} else { | ||
d = shiftMix(a + c) | ||
} | ||
} else { // len > 16 | ||
c = hashLen16(fetch64(s, int(slen-8))+k1, a) | ||
d = hashLen16(b+uint64(slen), c+fetch64(s, int(slen-16))) | ||
a += d | ||
for { | ||
a ^= shiftMix(fetch64(s, 0)*k1) * k1 | ||
a *= k1 | ||
b ^= a | ||
c ^= shiftMix(fetch64(s, 8)*k1) * k1 | ||
c *= k1 | ||
d ^= c | ||
s = s[16:] | ||
l -= 16 | ||
if l <= 0 { | ||
break | ||
} | ||
} | ||
} | ||
a = hashLen16(a, c) | ||
b = hashLen16(d, b) | ||
return uint128{a ^ b, hashLen16(b, a)} | ||
} | ||
|
||
func cityHash128WithSeed(s []byte, seed uint128) uint128 { | ||
slen := len(s) | ||
if slen < 128 { | ||
return cityMurmur(s, seed) | ||
} | ||
|
||
endIdx := ((slen - 1) / 128) * 128 | ||
lastBlockIdx := endIdx + ((slen - 1) & 127) - 127 | ||
last := s[lastBlockIdx:] | ||
|
||
// We expect len >= 128 to be the common case. Keep 56 bytes of state: | ||
// v, w, x, y, and z. | ||
var v1, v2 uint64 | ||
var w1, w2 uint64 | ||
x := seed.lo | ||
y := seed.hi | ||
z := uint64(slen) * k1 | ||
v1 = rotate64(y^k1, 49)*k1 + fetch64(s, 0) | ||
v2 = rotate64(v1, 42)*k1 + fetch64(s, 8) | ||
w1 = rotate64(y+z, 35)*k1 + x | ||
w2 = rotate64(x+fetch64(s, 88), 53) * k1 | ||
|
||
// This is the same inner loop as CityHash64(), manually unrolled. | ||
for { | ||
x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1 | ||
y = rotate64(y+v2+fetch64(s, 48), 42) * k1 | ||
x ^= w2 | ||
y += v1 + fetch64(s, 40) | ||
z = rotate64(z+w1, 33) * k1 | ||
v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1) | ||
w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16)) | ||
z, x = x, z | ||
s = s[64:] | ||
x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1 | ||
y = rotate64(y+v2+fetch64(s, 48), 42) * k1 | ||
x ^= w2 | ||
y += v1 + fetch64(s, 40) | ||
z = rotate64(z+w1, 33) * k1 | ||
v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1) | ||
w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16)) | ||
z, x = x, z | ||
s = s[64:] | ||
slen -= 128 | ||
if slen < 128 { | ||
break | ||
} | ||
} | ||
x += rotate64(v1+z, 49) * k0 | ||
y = y*k0 + rotate64(w2, 37) | ||
z = z*k0 + rotate64(w1, 27) | ||
w1 *= 9 | ||
v1 *= k0 | ||
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. | ||
for tail_done := 0; tail_done < slen; { | ||
tail_done += 32 | ||
y = rotate64(x+y, 42)*k0 + v2 | ||
w1 += fetch64(last, 128-tail_done+16) | ||
x = x*k0 + w1 | ||
z += w2 + fetch64(last, 128-tail_done) | ||
w2 += v1 | ||
v1, v2 = weakHashLen32WithSeeds(last[128-tail_done:], v1+z, v2) | ||
v1 *= k0 | ||
} | ||
|
||
// At this point our 56 bytes of state should contain more than | ||
// enough information for a strong 128-bit hash. We use two | ||
// different 56-byte-to-8-byte hashes to get a 16-byte final result. | ||
x = hashLen16(x, v1) | ||
y = hashLen16(y+z, w1) | ||
return uint128{hashLen16(x+v2, w2) + y, | ||
hashLen16(x+w2, y+v2)} | ||
} | ||
|
||
func cityHash128(s []byte) uint128 { | ||
slen := len(s) | ||
if slen >= 16 { | ||
return cityHash128WithSeed(s[16:], uint128{fetch64(s, 0), fetch64(s, 8) + k0}) | ||
} | ||
return cityHash128WithSeed(s, uint128{k0, k1}) | ||
} | ||
|
||
func Fingerprint128(s []byte) (lo, hi uint64) { | ||
h := cityHash128(s) | ||
return h.lo, h.hi | ||
} | ||
|
||
func Fingerprint64(s []byte) uint64 { | ||
return Hash64(s) | ||
} | ||
|
||
func Fingerprint32(s []byte) uint32 { | ||
return Hash32(s) | ||
} | ||
|
||
func Hash128(s []byte) (lo, hi uint64) { | ||
return Fingerprint128(s) | ||
} | ||
|
||
func Hash128WithSeed(s []byte, seed0, seed1 uint64) (lo, hi uint64) { | ||
h := cityHash128WithSeed(s, uint128{seed0, seed1}) | ||
return h.lo, h.hi | ||
} |
Oops, something went wrong.