Skip to content

Commit

Permalink
all tests works again, added new test, TestL, all 3 byte keys
Browse files Browse the repository at this point in the history
  • Loading branch information
tildeleb committed Oct 1, 2017
1 parent 3dda215 commit 82388a7
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 11 deletions.
4 changes: 2 additions & 2 deletions hashf/hashf.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ var HashFunctions = map[string]HashFunction{
// "skein256xor", "skein256low", "skein256hi", "sha1", "keccak160l",
// "siphash64", "siphash128a", "siphash128b",
// "keccak644", "keccak648" "keccak160",
var TestHashFunctions = []string{"nullhash", "perfecthash",
var TestHashFunctions = []string{"nullhash", //"perfecthash",
"aeshash64", "gomap64", "j364", "j264", "murmur364",
"siphash64",
"siphash64pg",
Expand Down Expand Up @@ -248,7 +248,7 @@ func Hashf(k []byte, seed uint64) (h uint64) {
*/
switch Hf2 {
case "perfecthash":
//fmt.Printf("k=%v\n", k)
fmt.Printf("k=%v\n", k)
//h = uint64(k[0])<<56 | uint64(k[1])<<48 | uint64(k[2])<<40 | uint64(k[3])<<32 | uint64(k[4])<<24 | uint64(k[5])<<16 | uint64(k[6])<<8 | uint64(k[7])<<0
//h = uint64(k[7])<<56 | uint64(k[6])<<48 | uint64(k[5])<<40 | uint64(k[4])<<32 | uint64(k[3])<<24 | uint64(k[2])<<16 | uint64(k[1])<<8 | uint64(k[0])<<0
h = uint64(k[3])<<24 | uint64(k[2])<<16 | uint64(k[1])<<8 | uint64(k[0])<<0
Expand Down
43 changes: 37 additions & 6 deletions hashtable/hashtable.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ type Stats struct {
Cols int // number of collisions
Probes int // number of probes
Heads int // number of chains > 1
Dups int // number of dup keys
Dups int // number of dup hashes on the same chain
Dups2 int // number of dup hashes
Nbuckets int // number of new buckets added
Entries int
LongestChain int // longest chain of entries
Expand Down Expand Up @@ -115,6 +116,24 @@ func btoi(b []byte) int {
return int(b[3])<<24 | int(b[2])<<16 | int(b[1])<<8 | int(b[0])
}

var ph uint64

var Ones = [16]int{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}
var Zeros = [16]int{4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}

func diff(d uint64) (zeros int, ones int) {
for i := 0; i < 16; i++ {
four := d & 0xF
zeros += Zeros[four]
ones += Ones[four]
d = d >> 4
}
if zeros+ones != 64 {
panic("diff")
}
return
}

func (ht *HashTable) Insert(ka []byte) {
k := make([]byte, len(ka), len(ka))
k = k[:]
Expand All @@ -141,7 +160,9 @@ func (ht *HashTable) Insert(ka []byte) {
if ht.Buckets[idx] == nil {
// no entry or chain at this location, make it
ht.Buckets[idx] = append(ht.Buckets[idx], Bucket{Key: k})
//fmt.Printf("Insert: ins idx=%d, len=%d, hash=0x%08x, key=%q\n", idx, len(ht.Buckets[idx]), h, ht.Buckets[idx][0].Key)
//fmt.Printf("Insert: ins idx=%d, len=%d, hash=%#016x, key=%q\n", idx, len(ht.Buckets[idx]), h, ht.Buckets[idx][0].Key)
//z, o := diff(h)
//fmt.Printf("%02d %02d %#064b z=%02d o=%02d", btoi(k), idx, h, z, o)
if Trace {
fmt.Printf("{%q: %d, %q: %d, %q: %q, %q: %d, %q: %d, %q: %d, %q: %v, %q: %v},\n",
"i", ht.Tcnt, "l", cnt, "op", "I", "t", 0, "b", idx, "s", 0, "k", btoi(k), "v", btoi(k))
Expand All @@ -158,7 +179,7 @@ func (ht *HashTable) Insert(ka []byte) {
}
ht.Probes++
ht.Heads++
return
break
}
if ht.oa {
//fmt.Printf("Insert: col idx=%d, len=%d, hash=0x%08x, key=%q\n", idx, len(ht.Buckets[idx]), h, ht.Buckets[idx][0].Key)
Expand Down Expand Up @@ -197,7 +218,7 @@ func (ht *HashTable) Insert(ka []byte) {
// first scan slice for dups
for j := range ht.Buckets[idx] {
bh := hashf.Hashf(ht.Buckets[idx][j].Key, ht.Seed)
//fmt.Printf("idx=%d, j=%d/%d, bh=0x%08x, h=0x%08x, key=%q\n", idx, j, len(ht.Buckets[idx]), bh, h, ht.Buckets[idx][j].Key)
//fmt.Printf("idx=%d, j=%d/%d, bh=%#016x, h=%#016x, key=%q\n", idx, j, len(ht.Buckets[idx]), bh, h, ht.Buckets[idx][j].Key)
if bh == h {
if ht.pd {
//fmt.Printf("idx=%d, j=%d/%d, bh=0x%08x, h=0x%08x, key=%q, bkey=%q\n", idx, j, len(ht.Buckets[idx]), bh, h, k, ht.Buckets[idx][j].Key)
Expand All @@ -213,6 +234,8 @@ func (ht *HashTable) Insert(ka []byte) {
//fmt.Printf("len(ht.Buckets[idx])=%d, ht.LongestChain=%d\n", len(ht.Buckets[idx]), ht.LongestChain)
ht.LongestChain = len(ht.Buckets[idx])
}
//z, o := diff(h)
//fmt.Printf("%02d %02d %#064b z=%02d o=%02d", btoi(k), idx, h, z, o)
if Trace {
fmt.Printf("{%q: %d, %q: %d, %q: %q, %q: %d, %q: %d, %q: %d, %q: %v, %q: %v},\n",
"i", ht.Tcnt, "l", cnt, "op", "I", "t", len(ht.Buckets[idx])-1, "b", idx, "s", 0, "k", btoi(k), "v", btoi(k))
Expand All @@ -223,6 +246,14 @@ func (ht *HashTable) Insert(ka []byte) {
break
}
}
if ph != 0 {
//xor := h ^ ph
//z, o := diff(xor)
//fmt.Printf(" xor=%#064b z=%02d o=%02d\n", xor, z, o)
} else {
//fmt.Printf("\n")
}
ph = h
}

// The theoretical metric from "Red Dragon Book"
Expand Down Expand Up @@ -285,8 +316,8 @@ func (s *HashTable) Print() {
}
*/
//fmt.Printf("%#v\n", s)
fmt.Printf("size=%h, inserts=%h, heads=%h, newBuckets=%h, LongestChain=%h, dups=%d, q=%0.2f, time=%0.2f%s\n",
fmt.Printf("size=%h, inserts=%h, heads=%h, newBuckets=%h, LongestChain=%h, dups=%d, dups2=%d, q=%0.2f, time=%0.2f%s\n",
hrff.Int64{int64(s.Size), ""}, hrff.Int64{int64(s.Inserts), ""}, hrff.Int64{int64(s.Heads), ""},
hrff.Int64{int64(s.Nbuckets), ""}, hrff.Int64{int64(s.LongestChain), ""}, s.Dups, q, t, units)
hrff.Int64{int64(s.Nbuckets), ""}, hrff.Int64{int64(s.LongestChain), ""}, s.Dups, s.Dups2, q, t, units)
}
}
40 changes: 37 additions & 3 deletions ht.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,38 @@ func TestJ(file string, lines int, hf2 string) (ht *HashTable) {
return
}

// all possible 3 bytes keys must have distinct hashses
func TestL(file string, lines int, hf2 string) (ht *HashTable) {
type HashSet struct {
m map[uint64]struct{} // set of hashes added
n int // number of hashes added
}
hs := &HashSet{make(map[uint64]struct{}), 0}
key := make([]byte, 3, 3)
key = key[:]
ht = NewHashTable(256*256*256, seed, *extra, *pd, *oa, *prime)
start := time.Now()
for i := 0; i < 256; i++ {
for j := 0; j < 256; j++ {
for k := 0; k < 256; k++ {
key[0] = byte(i)
key[1] = byte(j)
key[2] = byte(k)
h := Hashf(key, ht.Seed)
hs.m[h] = struct{}{}
hs.n++
//fmt.Printf("i=%d, j=%d, k=%d, key=%#v\n", i, j, k, key)
ht.Insert(key)
}
}
}
stop := time.Now()
ht.Dur = tdiff(start, stop)
ht.Dups2 = hs.n - len(hs.m)
//fmt.Printf("collisions=%d\n", ht.Dups2)
return
}

func unhex(c byte) uint8 {
switch {
case '0' <= c && c <= '9':
Expand Down Expand Up @@ -639,6 +671,7 @@ var Tests = []Test{
{"TestI", &I, TestI, "integers from 0 to ni-1 (does not read file)"},
{"TestJ", &J, TestJ, "one bit keys (does not read file)"},
{"TestK", &K, TestK, "read file of keys and print hashes"},
{"TestL", &L, TestL, "all possible 3 byte keys"},
}

func runTestsWithFileAndHashes(file string, lines int, hf []string) {
Expand Down Expand Up @@ -726,17 +759,18 @@ var H = flag.Bool("H", false, "test H")
var I = flag.Bool("I", false, "test I")
var J = flag.Bool("J", false, "test J")
var K = flag.Bool("K", false, "test K")
var L = flag.Bool("L", false, "test L")
var S = flag.Bool("S", false, "test S")

var letters = []string{"abcdefgh", "efghijkl", "ijklmnop", "mnopqrst", "qrstuvwx", "uvwxyz01"} // 262144 words
var TestPointers = []**bool{&A, &B, &C, &D, &E, &F, &G, &H, &I, &J, &K}
var TestPointers = []**bool{&A, &B, &C, &D, &E, &F, &G, &H, &I, &J, &K, &L}

func allTestsOn() {
*A, *B, *C, *D, *E, *F, *G, *H, *I, *J = true, true, true, true, true, true, true, true, true, true
*A, *B, *C, *D, *E, *F, *G, *H, *I, *J, *L = true, true, true, true, true, true, true, true, true, true, true
}

func allTestsOff() {
*A, *B, *C, *D, *E, *F, *G, *H, *I, *J = false, false, false, false, false, false, false, false, false, false
*A, *B, *C, *D, *E, *F, *G, *H, *I, *J, *L = false, false, false, false, false, false, false, false, false, false, false
}

func main() {
Expand Down

0 comments on commit 82388a7

Please sign in to comment.