Skip to content

Commit

Permalink
Bug 1884327 - faster perfect hash r=ahochheiden
Browse files Browse the repository at this point in the history
While we're at it, also remove the `six` layer, which slightly impacts
some callers.

Differential Revision: https://phabricator.services.mozilla.com/D204010
  • Loading branch information
serge-sans-paille committed Mar 11, 2024
1 parent ced6168 commit cb634cf
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 25 deletions.
12 changes: 8 additions & 4 deletions xpcom/components/gen_static_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,14 +920,18 @@ def gen_substs(manifests):

cid_phf = PerfectHash(modules, PHF_SIZE, key=lambda module: module.cid.bytes)

contract_phf = PerfectHash(contracts, PHF_SIZE, key=lambda entry: entry.contract)
contract_phf = PerfectHash(
contracts, PHF_SIZE, key=lambda entry: entry.contract.encode()
)

js_services_phf = PerfectHash(
list(js_services.values()), PHF_SIZE, key=lambda entry: entry.js_name
list(js_services.values()), PHF_SIZE, key=lambda entry: entry.js_name.encode()
)

protocol_handlers_phf = PerfectHash(
list(protocol_handlers.values()), TINY_PHF_SIZE, key=lambda entry: entry.scheme
list(protocol_handlers.values()),
TINY_PHF_SIZE,
key=lambda entry: entry.scheme.encode(),
)

js_services_json = {}
Expand All @@ -945,7 +949,7 @@ def gen_substs(manifests):
substs["contract_count"] = len(contracts)
substs["protocol_handler_count"] = len(protocol_handlers)

substs["default_protocol_handler_idx"] = protocol_handlers_phf.get_index("default")
substs["default_protocol_handler_idx"] = protocol_handlers_phf.get_index(b"default")

gen_module_funcs(substs, module_funcs)

Expand Down
31 changes: 10 additions & 21 deletions xpcom/ds/tools/perfecthash.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@
import textwrap
from collections import namedtuple

import six
from mozbuild.util import ensure_bytes


# Iteration over bytestrings works differently in Python 2 and 3; this function
# captures the two possibilities. Returns an 'int' given the output of iterating
# through a bytestring regardless of the input.
def _ord(c):
if six.PY3:
return c
return ord(c)


class PerfectHash(object):
"""PerfectHash objects represent a computed perfect hash function, which
Expand Down Expand Up @@ -88,7 +76,7 @@ def __init__(self, entries, size, validate=True, key=lambda e: e[0]):

for bucket in buckets:
# Once we've reached an empty bucket, we're done.
if len(bucket.entries) == 0:
if not bucket.entries:
break

# Try values for the basis until we find one with no conflicts.
Expand All @@ -103,7 +91,7 @@ def __init__(self, entries, size, validate=True, key=lambda e: e[0]):
# There was a conflict, try the next basis.
basis += 1
idx = 0
del slots[:]
slots.clear()
assert basis < self.U32_MAX, "table too small"
else:
slots.append(slot)
Expand All @@ -127,15 +115,16 @@ def _hash(cls, key, basis=FNV_OFFSET_BASIS):
32-bit FNV is used for indexing into the first table, and the value
stored in that table is used as the offset basis for indexing into the
values table."""
for byte in memoryview(ensure_bytes(key)):
obyte = _ord(byte)
FNV_PRIME = cls.FNV_PRIME
U32_MAX = cls.U32_MAX
for obyte in memoryview(key):
basis ^= obyte # xor-in the byte
basis *= cls.FNV_PRIME # Multiply by the FNV prime
basis &= cls.U32_MAX # clamp to 32-bits
basis *= FNV_PRIME # Multiply by the FNV prime
basis &= U32_MAX # clamp to 32-bits
return basis

def key(self, entry):
return memoryview(ensure_bytes(self._key(entry)))
return memoryview(self._key(entry))

def get_raw_index(self, key):
"""Determine the index in self.entries without validating"""
Expand All @@ -145,7 +134,7 @@ def get_raw_index(self, key):
def get_index(self, key):
"""Given a key, determine the index in self.entries"""
idx = self.get_raw_index(key)
if memoryview(ensure_bytes(key)) != self.key(self.entries[idx]):
if memoryview(key) != self.key(self.entries[idx]):
return None
return idx

Expand Down Expand Up @@ -334,7 +323,7 @@ def gen_jslinearstr_getter(
not in the table."""

assert all(
_ord(b) <= 0x7F for e in self.phf.entries for b in self.phf.key(e)
b <= 0x7F for e in self.phf.entries for b in self.phf.key(e)
), "non-ASCII key"

if return_type is None:
Expand Down

0 comments on commit cb634cf

Please sign in to comment.