forked from chroma-core/chroma
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ENH] Add rendezvous hashing to go and python. Add Assignment policy …
…to go so it can be used in future commits (chroma-core#1360) ## Description of changes *Summarize the changes made by this PR.* Adds basic rendezvous hashing to the python and go codebases so it can be used ## Test plan *How are these changes tested?* Basic unit tests. Manually tested against k8s. ## Documentation Changes None required
- Loading branch information
Showing
13 changed files
with
271 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from chromadb.utils.rendezvous_hash import assign, murmur3hasher | ||
|
||
|
||
def test_rendezvous_hash() -> None: | ||
# Tests the assign works as expected | ||
members = ["a", "b", "c"] | ||
key = "key" | ||
|
||
def mock_hasher(member: str, key: str) -> int: | ||
return members.index(member) # Highest index wins | ||
|
||
assert assign(key, members, mock_hasher) == "c" | ||
|
||
|
||
def test_even_distribution() -> None: | ||
member_count = 10 | ||
tolerance = 25 | ||
nodes = [str(i) for i in range(member_count)] | ||
|
||
# Test if keys are evenly distributed across nodes | ||
key_distribution = {node: 0 for node in nodes} | ||
num_keys = 1000 | ||
for i in range(num_keys): | ||
key = f"key_{i}" | ||
node = assign(key, nodes, murmur3hasher) | ||
key_distribution[node] += 1 | ||
|
||
# Check if keys are somewhat evenly distributed | ||
for node in nodes: | ||
assert abs(key_distribution[node] - num_keys / len(nodes)) < tolerance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# An implementation of https://en.wikipedia.org/wiki/Rendezvous_hashing | ||
from typing import Callable, List, cast | ||
import mmh3 | ||
|
||
Hasher = Callable[[str, str], int] | ||
Member = str | ||
Members = List[str] | ||
Key = str | ||
|
||
|
||
def assign(key: Key, members: Members, hasher: Hasher) -> Member: | ||
"""Assigns a key to a member using the rendezvous hashing algorithm""" | ||
if len(members) == 0: | ||
raise ValueError("Cannot assign key to empty memberlist") | ||
if len(members) == 1: | ||
return members[0] | ||
if key == "": | ||
raise ValueError("Cannot assign empty key") | ||
|
||
max_score = -1 | ||
max_member = None | ||
|
||
for member in members: | ||
score = hasher(member, key) | ||
if score > max_score: | ||
max_score = score | ||
max_member = member | ||
|
||
max_member = cast(Member, max_member) | ||
return max_member | ||
|
||
|
||
def merge_hashes(x: int, y: int) -> int: | ||
"""murmurhash3 mix 64-bit""" | ||
acc = x ^ y | ||
acc ^= acc >> 33 | ||
acc = ( | ||
acc * 0xFF51AFD7ED558CCD | ||
) % 2**64 # We need to mod here to prevent python from using arbitrary size int | ||
acc ^= acc >> 33 | ||
acc = (acc * 0xC4CEB9FE1A85EC53) % 2**64 | ||
acc ^= acc >> 33 | ||
return acc | ||
|
||
|
||
def murmur3hasher(member: Member, key: Key) -> int: | ||
"""Hashes the key and member using the murmur3 hashing algorithm""" | ||
member_hash = mmh3.hash64(member, signed=False)[0] | ||
key_hash = mmh3.hash64(key, signed=False)[0] | ||
return merge_hashes(member_hash, key_hash) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package utils | ||
|
||
import ( | ||
"errors" | ||
|
||
"github.com/spaolacci/murmur3" | ||
) | ||
|
||
type Hasher = func(member string, key string) uint64 | ||
type Member = string | ||
type Members = []Member | ||
type Key = string | ||
|
||
// assign assigns a key to a member using the rendezvous hashing algorithm. | ||
func Assign(key Key, members Members, hasher Hasher) (Member, error) { | ||
if len(members) == 0 { | ||
return "", errors.New("cannot assign key to empty member list") | ||
} | ||
if len(members) == 1 { | ||
return members[0], nil | ||
} | ||
if key == "" { | ||
return "", errors.New("cannot assign empty key") | ||
} | ||
|
||
maxScore := uint64(0) | ||
var maxMember Member | ||
|
||
for _, member := range members { | ||
score := hasher(string(member), string(key)) | ||
if score > maxScore { | ||
maxScore = score | ||
maxMember = member | ||
} | ||
} | ||
|
||
return maxMember, nil | ||
} | ||
|
||
func mergeHashes(a uint64, b uint64) uint64 { | ||
acc := a ^ b | ||
acc ^= acc >> 33 | ||
acc *= 0xff51afd7ed558ccd | ||
acc ^= acc >> 33 | ||
acc *= 0xc4ceb9fe1a85ec53 | ||
acc ^= acc >> 33 | ||
return acc | ||
} | ||
|
||
// NOTE: The python implementation of murmur3 may differ from the golang implementation. | ||
// For now, this is fine since go and python don't need to agree on any hashing schemes | ||
// but if we ever need to agree on a hashing scheme, we should verify that the implementations | ||
// are the same. | ||
func Murmur3Hasher(member string, key string) uint64 { | ||
hasher := murmur3.New64() | ||
hasher.Write([]byte(member)) | ||
memberHash := hasher.Sum64() | ||
hasher.Reset() | ||
hasher.Write([]byte(key)) | ||
keyHash := hasher.Sum64() | ||
return mergeHashes(memberHash, keyHash) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package utils | ||
|
||
import ( | ||
"fmt" | ||
"math" | ||
"testing" | ||
) | ||
|
||
func mockHasher(member string, key string) uint64 { | ||
members := []string{"a", "b", "c"} | ||
for i, m := range members { | ||
if m == member { | ||
return uint64(i) | ||
} | ||
} | ||
return 0 | ||
} | ||
|
||
func TestRendezvousHash(t *testing.T) { | ||
members := []string{"a", "b", "c"} | ||
key := "key" | ||
|
||
// Test that the assign function returns the expected result | ||
node, error := Assign(key, members, mockHasher) | ||
|
||
if error != nil { | ||
t.Errorf("Assign() returned an error: %v", error) | ||
} | ||
|
||
if node != "c" { | ||
t.Errorf("Assign() = %v, want %v", node, "c") | ||
} | ||
} | ||
|
||
func TestEvenDistribution(t *testing.T) { | ||
memberCount := 10 | ||
tolerance := 25 | ||
var nodes []string | ||
for i := 0; i < memberCount; i++ { | ||
nodes = append(nodes, fmt.Sprint(i+'0')) // Convert int to string | ||
} | ||
|
||
keyDistribution := make(map[string]int) | ||
numKeys := 1000 | ||
|
||
// Test if keys are evenly distributed across nodes | ||
for i := 0; i < numKeys; i++ { | ||
key := "key_" + fmt.Sprint(i) | ||
node, err := Assign(key, nodes, Murmur3Hasher) | ||
if err != nil { | ||
t.Errorf("Assign() returned an error: %v", err) | ||
} | ||
keyDistribution[node]++ | ||
} | ||
|
||
// Check if keys are somewhat evenly distributed | ||
for _, count := range keyDistribution { | ||
if math.Abs(float64(count-numKeys/memberCount)) > float64(tolerance) { | ||
t.Errorf("Key distribution is uneven: %v", keyDistribution) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters