Skip to content

Commit

Permalink
added version of farm hash from deleted repo
Browse files Browse the repository at this point in the history
  • Loading branch information
tildeleb committed Mar 20, 2021
1 parent be19204 commit 5ea0485
Show file tree
Hide file tree
Showing 12 changed files with 895 additions and 1 deletion.
22 changes: 22 additions & 0 deletions farm/COPYING
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
As this is a highly derivative work, I have placed it under the same license as the original implementation:

// Copyright (c) 2014 Damian Gryski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

7 changes: 7 additions & 0 deletions farm/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
go-farm: Google's FarmHash in pure Go

This is a (mechanical) translation of the non-SSE4/non-AESNI hash functions from FarmHash.

For more information on FarmHash, please see https://code.google.com/p/farmhash

For a cgo library wrapping the C++ one, please see https://github.com/dgryski/go-farmhash
30 changes: 30 additions & 0 deletions farm/basics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package farm

// Some primes between 2^63 and 2^64 for various uses.
const k0 uint64 = 0xc3a5c85c97cb3127
const k1 uint64 = 0xb492b66fbe98f273
const k2 uint64 = 0x9ae16a3b2f90404f

// Magic numbers for 32-bit hashing. Copied from Murmur3.
const c1 uint32 = 0xcc9e2d51
const c2 uint32 = 0x1b873593

// A 32-bit to 32-bit integer hash copied from Murmur3.
func fmix(h uint32) uint32 {
h ^= h >> 16
h *= 0x85ebca6b
h ^= h >> 13
h *= 0xc2b2ae35
h ^= h >> 16
return h
}

func mur(a, h uint32) uint32 {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1
a = rotate32(a, 17)
a *= c2
h ^= a
h = rotate32(h, 19)
return h*5 + 0xe6546b64
}
21 changes: 21 additions & 0 deletions farm/bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package farm

import "testing"

func BenchmarkShort(b *testing.B) {

buf := make([]byte, 32)

for i := 0; i < b.N; i++ {
Hash32(buf)
}
}

func BenchmarkHash64(b *testing.B) {

buf := make([]byte, 2048)

for i := 0; i < b.N; i++ {
Hash64(buf)
}
}
114 changes: 114 additions & 0 deletions farm/farm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package farm

import "testing"

// Generated from the C++ code
var golden32 = []struct {
out uint32
in string
}{
{0x3c973d4d, "a"},
{0x417330fd, "ab"},
{0x2f635ec7, "abc"},
{0x98b51e95, "abcd"},
{0xa3f366ac, "abcde"},
{0x0f813aa4, "abcdef"},
{0x21deb6d7, "abcdefg"},
{0xfd7ec8b9, "abcdefgh"},
{0x6f98dc86, "abcdefghi"},
{0xf2669361, "abcdefghij"},
{0xe273108f, "Discard medicine more than two years old."},
{0xf585dfc4, "He who has a shady past knows that nice guys finish last."},
{0x363394d1, "I wouldn't marry him with a ten foot pole."},
{0x7613810f, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
{0x2cc30bb7, "The days of the digital watch are numbered. -Tom Stoppard"},
{0x322984d9, "Nepal premier won't resign."},
{0xa5812ac8, "For every action there is an equal and opposite government program."},
{0x1090d244, "His money is twice tainted: 'taint yours and 'taint mine."},
{0xff16c9e6, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
{0xcc3d0ff2, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
{0xc6246b8d, "size: a.out: bad magic"},
{0xd225e92e, "The major problem is with sendmail. -Mark Horton"},
{0x1b8db5d0, "Give me a rock, paper and scissors and I will move the world. CCFestoon"},
{0x4fda5f07, "If the enemy is within range, then so are you."},
{0x2e18e880, "It's well we cannot hear the screams/That we create in others' dreams."},
{0xd07de88f, "You remind me of a TV show, but that's all right: I watch it anyway."},
{0x221694e4, "C is as portable as Stonehedge!!"},
{0xe2053c2c, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
{0x11c493bb, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
{0x0819a4e8, "How can you write a big system without C++? -Paul Glick"},
}

func TestHash32(t *testing.T) {

for _, tt := range golden32 {
if h := Hash32([]byte(tt.in)); h != tt.out {
t.Errorf("Hash32(%q)=%#08x (len=%d), want %#08x", tt.in, h, len(tt.in), tt.out)
}
}

}

// Generated from the C++ code
var golden64 = []struct {
out uint64
in string
}{
{0xb3454265b6df75e3, "a"},
{0xaa8d6e5242ada51e, "ab"},
{0x24a5b3a074e7f369, "abc"},
{0x1a5502de4a1f8101, "abcd"},
{0xc22f4663e54e04d4, "abcde"},
{0xc329379e6a03c2cd, "abcdef"},
{0x3c40c92b1ccb7355, "abcdefg"},
{0xfee9d22990c82909, "abcdefgh"},
{0x332c8ed4dae5ba42, "abcdefghi"},
{0x8a3abb6a5f3fb7fb, "abcdefghij"},
{0xe8f89ab6df9bdd25, "Discard medicine more than two years old."},
{0x786d7e1987023ca9, "He who has a shady past knows that nice guys finish last."},
{0xa9961670ce2a46d9, "I wouldn't marry him with a ten foot pole."},
{0x5d14f96c18fe3d5e, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
{0x2a578b80bb82147c, "The days of the digital watch are numbered. -Tom Stoppard"},
{0x8eb3808d1ccfc779, "Nepal premier won't resign."},
{0xb8d104d1135bbc60, "For every action there is an equal and opposite government program."},
{0xec8848fd3b266c10, "His money is twice tainted: 'taint yours and 'taint mine."},
{0xfe6aa49558b3cbe0, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
{0xa104da6f2f575514, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
{0x80d73b843ba57db8, "size: a.out: bad magic"},
{0xc2f8db8624fefc0e, "The major problem is with sendmail. -Mark Horton"},
{0xa58e3702193e4631, "Give me a rock, paper and scissors and I will move the world. CCFestoon"},
{0xbdd69b798d6ba37a, "If the enemy is within range, then so are you."},
{0x61751a90ec7d71bf, "It's well we cannot hear the screams/That we create in others' dreams."},
{0x836f5ff0c2a7dfaa, "You remind me of a TV show, but that's all right: I watch it anyway."},
{0xb944f8a16261e414, "C is as portable as Stonehedge!!"},
{0x96d012f9bccb3e, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
{0x8fe4429d157f60f5, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
{0x5a0a6efd52e84e2a, "How can you write a big system without C++? -Paul Glick"},
}

func TestHash64(t *testing.T) {
for _, tt := range golden64 {
if h := Hash64([]byte(tt.in)); h != tt.out {
t.Errorf("Hash64(%q)=%#016x, (len=%d) want %#016x", tt.in, h, len(tt.in), tt.out)
}

}
}

func TestFingerprint128(t *testing.T) {

var tests = []struct {
hi, lo uint64
in string
}{
{9054869399155703984, 8033370924408288235, "abcdef"},
{352412539875473798, 3547689611939963773, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
{14320160249354795919, 10805939018293574989, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall RuleAAAAAAAAAAAAAAAA"},
}

for _, tt := range tests {
if lo, hi := Fingerprint128([]byte(tt.in)); hi != tt.hi || lo != tt.lo {
t.Errorf("Fingerprint128(%q)=(%#016x, %#016x) (len=%d) want (%#016x, %#016x)", tt.in, lo, hi, len(tt.in), tt.lo, tt.hi)
}
}
}
194 changes: 194 additions & 0 deletions farm/farmhashcc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package farm

// This file provides a 32-bit hash equivalent to CityHash32 (v1.1.1)
// and a 128-bit hash equivalent to CityHash128 (v1.1.1). It also provides
// a seeded 32-bit hash function similar to CityHash32.

func hash32Len13to24Seed(s []byte, seed uint32) uint32 {
slen := len(s)
a := fetch32(s, -4+(slen>>1))
b := fetch32(s, 4)
c := fetch32(s, slen-8)
d := fetch32(s, (slen >> 1))
e := fetch32(s, 0)
f := fetch32(s, slen-4)
h := d*c1 + uint32(slen) + seed
a = rotate32(a, 12) + f
h = mur(c, h) + a
a = rotate32(a, 3) + c
h = mur(e, h) + a
a = rotate32(a+f, 12) + d
h = mur(b^seed, h) + a
return fmix(h)
}

func hash32Len0to4(s []byte, seed uint32) uint32 {
slen := len(s)
b := seed
c := uint32(9)
for i := 0; i < slen; i++ {
v := int8(s[i])
b = uint32(b*c1) + uint32(v)
c ^= b
}
return fmix(mur(b, mur(uint32(slen), c)))
}

func hash128to64(x uint128) uint64 {
// Murmur-inspired hashing.
const kMul uint64 = 0x9ddfea08eb382d69
a := (x.lo ^ x.hi) * kMul
a ^= (a >> 47)
b := (x.hi ^ a) * kMul
b ^= (b >> 47)
b *= kMul
return b
}

type uint128 struct {
lo uint64
hi uint64
}

// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
func cityMurmur(s []byte, seed uint128) uint128 {
slen := len(s)
a := seed.lo
b := seed.hi
c := uint64(0)
d := uint64(0)
l := slen - 16
if l <= 0 { // len <= 16
a = shiftMix(a*k1) * k1
c = b*k1 + hashLen0to16(s)
if slen >= 8 {
d = shiftMix(a + fetch64(s, 0))
} else {
d = shiftMix(a + c)
}
} else { // len > 16
c = hashLen16(fetch64(s, int(slen-8))+k1, a)
d = hashLen16(b+uint64(slen), c+fetch64(s, int(slen-16)))
a += d
for {
a ^= shiftMix(fetch64(s, 0)*k1) * k1
a *= k1
b ^= a
c ^= shiftMix(fetch64(s, 8)*k1) * k1
c *= k1
d ^= c
s = s[16:]
l -= 16
if l <= 0 {
break
}
}
}
a = hashLen16(a, c)
b = hashLen16(d, b)
return uint128{a ^ b, hashLen16(b, a)}
}

func cityHash128WithSeed(s []byte, seed uint128) uint128 {
slen := len(s)
if slen < 128 {
return cityMurmur(s, seed)
}

endIdx := ((slen - 1) / 128) * 128
lastBlockIdx := endIdx + ((slen - 1) & 127) - 127
last := s[lastBlockIdx:]

// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
var v1, v2 uint64
var w1, w2 uint64
x := seed.lo
y := seed.hi
z := uint64(slen) * k1
v1 = rotate64(y^k1, 49)*k1 + fetch64(s, 0)
v2 = rotate64(v1, 42)*k1 + fetch64(s, 8)
w1 = rotate64(y+z, 35)*k1 + x
w2 = rotate64(x+fetch64(s, 88), 53) * k1

// This is the same inner loop as CityHash64(), manually unrolled.
for {
x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1
y = rotate64(y+v2+fetch64(s, 48), 42) * k1
x ^= w2
y += v1 + fetch64(s, 40)
z = rotate64(z+w1, 33) * k1
v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1)
w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16))
z, x = x, z
s = s[64:]
x = rotate64(x+y+v1+fetch64(s, 8), 37) * k1
y = rotate64(y+v2+fetch64(s, 48), 42) * k1
x ^= w2
y += v1 + fetch64(s, 40)
z = rotate64(z+w1, 33) * k1
v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1)
w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s, 16))
z, x = x, z
s = s[64:]
slen -= 128
if slen < 128 {
break
}
}
x += rotate64(v1+z, 49) * k0
y = y*k0 + rotate64(w2, 37)
z = z*k0 + rotate64(w1, 27)
w1 *= 9
v1 *= k0
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for tail_done := 0; tail_done < slen; {
tail_done += 32
y = rotate64(x+y, 42)*k0 + v2
w1 += fetch64(last, 128-tail_done+16)
x = x*k0 + w1
z += w2 + fetch64(last, 128-tail_done)
w2 += v1
v1, v2 = weakHashLen32WithSeeds(last[128-tail_done:], v1+z, v2)
v1 *= k0
}

// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = hashLen16(x, v1)
y = hashLen16(y+z, w1)
return uint128{hashLen16(x+v2, w2) + y,
hashLen16(x+w2, y+v2)}
}

func cityHash128(s []byte) uint128 {
slen := len(s)
if slen >= 16 {
return cityHash128WithSeed(s[16:], uint128{fetch64(s, 0), fetch64(s, 8) + k0})
}
return cityHash128WithSeed(s, uint128{k0, k1})
}

func Fingerprint128(s []byte) (lo, hi uint64) {
h := cityHash128(s)
return h.lo, h.hi
}

func Fingerprint64(s []byte) uint64 {
return Hash64(s)
}

func Fingerprint32(s []byte) uint32 {
return Hash32(s)
}

func Hash128(s []byte) (lo, hi uint64) {
return Fingerprint128(s)
}

func Hash128WithSeed(s []byte, seed0, seed1 uint64) (lo, hi uint64) {
h := cityHash128WithSeed(s, uint128{seed0, seed1})
return h.lo, h.hi
}
Loading

0 comments on commit 5ea0485

Please sign in to comment.