Skip to content

Commit

Permalink
Bug 1579862 - Tweaking HuffmanTableImplementationSaturated;r=arai
Browse files Browse the repository at this point in the history
A few changes to HuffmanTableImplementationSaturated:

- we can now create instances of HuffmanTableImplementationSaturated with max bit lengths up to 10;
- we reject cases in which we have more than 256 elements;
- internal indices in HuffmanTableImplementationSaturated are now represented with uint8_t instead of usize_t,
  which divides the size of this array by 4 and should improve memory locality.

According to my benchmarking, this decreases duration by ~7%.

Depends on D45644

Differential Revision: https://phabricator.services.mozilla.com/D45722

--HG--
extra : moz-landing-system : lando
David Teller committed Sep 13, 2019
1 parent 01cf344 commit 37fa21b
Showing 2 changed files with 51 additions and 49 deletions.
45 changes: 24 additions & 21 deletions js/src/frontend/BinASTTokenReaderContext.cpp
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ const uint8_t MAX_PREFIX_BIT_LENGTH = 32;
// length in the table is <= MAX_BIT_LENGTH_IN_SATURATED_TABLE,
// we use a `HuffmanTableSaturated`. Otherwise, we fall back to
// a slower but less memory-hungry solution.
const uint8_t MAX_BIT_LENGTH_IN_SATURATED_TABLE = 8;
const uint8_t MAX_BIT_LENGTH_IN_SATURATED_TABLE = 10;

// The length of the bit buffer, in bits.
const uint8_t BIT_BUFFER_SIZE = 64;
@@ -1785,7 +1785,8 @@ JS::Result<Ok> HuffmanTableImplementationGeneric<T>::init(
JSContext* cx, size_t numberOfSymbols, uint8_t maxBitLength) {
MOZ_ASSERT(this->implementation.template is<
HuffmanTableUnreachable>()); // Make sure that we're initializing.
if (maxBitLength > MAX_BIT_LENGTH_IN_SATURATED_TABLE) {
if (maxBitLength > MAX_BIT_LENGTH_IN_SATURATED_TABLE ||
numberOfSymbols > 256) {
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationMap<T>>{}, cx};
MOZ_TRY(this->implementation.template as<HuffmanTableImplementationMap<T>>()
@@ -2031,28 +2032,26 @@ JS::Result<Ok> HuffmanTableImplementationSaturated<T>::init(
}
// Enlarge `saturated`, as we're going to fill it in random order.
for (size_t i = 0; i < saturatedLength; ++i) {
// We use -1 (which is always invalid) to detect implementation errors.
saturated.infallibleAppend(
size_t(-1)); // Capacity reserved in this method.
// Capacity reserved in this method.
saturated.infallibleAppend(uint8_t(-1));
}
return Ok();
}

#ifdef DEBUG
template <typename T>
void HuffmanTableImplementationSaturated<T>::selfCheck() {
MOZ_ASSERT(
this->maxBitLength <=
MAX_CODE_BIT_LENGTH); // Double-check that we've initialized properly.
// Double-check that we've initialized properly.
MOZ_ASSERT(this->maxBitLength <= MAX_CODE_BIT_LENGTH);

bool foundMaxBitLength = false;
for (size_t i = 0; i < saturated.length(); ++i) {
// Check that all indices have been properly initialized.
// Note: this is an explicit `for(;;)` loop instead of
// a `for(:)` range loop, as knowing `i` should simplify
// debugging.
const size_t index = saturated[i];
MOZ_ASSERT(index != size_t(-1));
const uint8_t index = saturated[i];
MOZ_ASSERT(index < values.length());
if (values[index].key.bitLength == maxBitLength) {
foundMaxBitLength = true;
}
@@ -2075,7 +2074,8 @@ JS::Result<Ok> HuffmanTableImplementationSaturated<T>::addSymbol(

// First add the value to `values`.
if (!values.emplaceBack(bits, bitLength, std::move(value))) {
MOZ_CRASH(); // Memory was reserved in `init()`.
// Memory was reserved in `init()`.
MOZ_CRASH();
}

// Notation: in the following, unless otherwise specified, we consider
@@ -2091,12 +2091,13 @@ JS::Result<Ok> HuffmanTableImplementationSaturated<T>::addSymbol(
// for which this condition is true. That's all the values of segment
// `[0bC...C0...0, 0bC...C1...1]`.
const uint8_t padding = maxBitLength - bitLength;
const size_t begin = bits << padding; // `0bC...C0...0` above

// `begin` holds `0bC...C0...0` above
const size_t begin = bits << padding;

// `length holds `0bC...C1...1` - `0bC...C0...0`
const size_t length =
((padding != 0) // `0bC...C1...1` above - `0bC...C0...0` above.
? size_t(-1) >> (8 * sizeof(size_t) - padding)
: 0) +
1;
((padding != 0) ? size_t(-1) >> (8 * sizeof(size_t) - padding) : 0) + 1;
for (size_t i = begin; i < begin + length; ++i) {
saturated[i] = index;
}
@@ -2111,11 +2112,13 @@ HuffmanEntry<const T*> HuffmanTableImplementationSaturated<T>::lookup(
// In the documentation of `addSymbol`, this is
// `0bB...B`.
const uint32_t bits = key.leadingBits(maxBitLength);
const size_t index =
saturated[bits]; // Invariants: `saturated.length() == 1 << maxBitLength`
// and `bits <= 1 << maxBitLength`.
const auto& entry =
values[index]; // Invariants: `saturated[i] < values.length()`.

// Invariants: `saturated.length() == 1 << maxBitLength`
// and `bits <= 1 << maxBitLength`.
const size_t index = saturated[bits];

// Invariants: `saturated[i] < values.length()`.
const auto& entry = values[index];
return HuffmanEntry<const T*>(entry.key.bits, entry.key.bitLength,
&entry.value);
}
55 changes: 27 additions & 28 deletions js/src/frontend/BinASTTokenReaderContext.h
Original file line number Diff line number Diff line change
@@ -350,45 +350,45 @@ class HuffmanTableImplementationMap {
//
// Symbol | Binary Code | Int value of Code | Bit Length
// ------ | ------------ | ----------------- | ----------
// A | 00111 | 7 | 5
// B | 00110 | 6 | 5
// C | 0010 | 2 | 4
// D | 011 | 3 | 3
// E | 010 | 2 | 3
// F | 000 | 0 | 3
// G | 11 | 3 | 2
// H | 10 | 2 | 2
// A | 11000 | 24 | 5
// B | 11001 | 25 | 5
// C | 1101 | 13 | 4
// D | 100 | 4 | 3
// E | 101 | 5 | 3
// F | 111 | 7 | 3
// G | 00 | 0 | 2
// H | 01 | 1 | 2
//
// By definition of a Huffman Table, the Binary Codes represent
// paths in a Huffman Tree. Consequently, padding these codes
// to the end would not change the result.
//
// Symbol | Binary Code | Int value of Code | Bit Length
// ------ | ------------ | ----------------- | ----------
// A | 00111 | 7 | 5
// B | 00110 | 6 | 5
// C | 0010? | [4...5] | 4
// D | 011?? | [12...15] | 3
// E | 010?? | [8..11] | 3
// F | 000?? | [0..3] | 3
// G | 11??? | [24...31] | 2
// H | 10??? | [16...23] | 2
// A | 11000 | 24 | 5
// B | 11001 | 25 | 5
// C | 1101? | [26...27] | 4
// D | 100?? | [16...19] | 3
// E | 101?? | [20..23] | 3
// F | 111?? | [28..31] | 3
// G | 00??? | [0...7] | 2
// H | 01??? | [8...15] | 2
//
// Row "Int value of Code" now contains all possible values
// that may be expressed in 5 bits. By using these values
// as array indices, we may therefore represent the
// Huffman table as an array:
//
// Index | Symbol | Bit Length
// -------- | ---------- | -------------
// [0..3] | F | 3
// [4..5] | C | 4
// 6 | B | 5
// 7 | A | 5
// [8..11] | E | 3
// [12..15] | D | 3
// [16..23] | H | 2
// [24..31] | G | 2
// Index | Symbol | Bit Length
// --------- | ---------- | -------------
// [0...7] | G | 2
// [8...15] | H | 2
// [16...19] | D | 3
// [20...23] | E | 3
// 24 | A | 5
// 25 | B | 5
// [26...27] | C | 4
// [28...31] | F | 3
//
// By using the next 5 bits in the bit buffer, we may, in
// a single lookup, determine the symbol and the bit length.
@@ -461,12 +461,11 @@ class HuffmanTableImplementationSaturated {

// The entries in this Huffman table, prepared for lookup.
// The `size_t` argument is an index into `values`.
// FIXME: We could make this a `uint8_t` to save space.
//
// Invariant (once `init*` has been called):
// - Length is `1 << maxBitLength`.
// - for all i, `saturated[i] < values.length()`
Vector<size_t> saturated;
Vector<uint8_t> saturated;

// The maximal bitlength of a value in this table.
//

0 comments on commit 37fa21b

Please sign in to comment.