Skip to content

Commit

Permalink
Use encoding scheme with m_packedCap to support larger PackedArrays
Browse files Browse the repository at this point in the history
Summary: Though m_packedCap is declared as uint32_t, PackedArrays must ensure that
the upper byte of m_packedCap is 0 because it overlaps with m_kind field.
To make this work, PackedArray was only supporting capacities up to 2^24-1
and for larger capacities it would escalate to MixedArray.

The Vector collection implementation is being changed to use an array as
the Vector's buffer, and it would be nice if Vector could assume it's
always dealing with a PackedArray. However, imposing a maximum capacity
of 2^24-1 on Vectors feels unreasonably low.

This diff introduces an encoding scheme for PackedArray's capacity field.
This encoding scheme provides a mapping from 3-byte "capacity codes" to
4-byte capacities. The encoding scheme allows us to avoid doing any extra
work in the common case when growing a PackedArray, and it only introduces
one additional (predictable) branch in the common case when allocating,
copying, or freeing a PackedArray.

Reviewed By: @jdelong

Differential Revision: D1337469
  • Loading branch information
paroski authored and JoelMarcey committed Jun 2, 2014
1 parent 475d253 commit f5445e1
Show file tree
Hide file tree
Showing 11 changed files with 325 additions and 95 deletions.
4 changes: 3 additions & 1 deletion hphp/runtime/base/apc-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "hphp/runtime/base/typed-value.h"
#include "hphp/runtime/base/array-data.h"
#include "hphp/runtime/base/packed-array-defs.h"
#include "hphp/runtime/base/mixed-array-defs.h"
#include "hphp/runtime/base/apc-handle.h"
#include "hphp/runtime/base/apc-array.h"
Expand Down Expand Up @@ -114,7 +115,8 @@ size_t getMemSize(const ArrayData* arr) {
switch (arr->kind()) {
case ArrayData::ArrayKind::kPackedKind: {
auto size = sizeof(ArrayData) +
(arr->m_packedCap - arr->m_size) * sizeof(TypedValue);
(packedCodeToCap(arr->m_packedCapCode) - arr->m_size) *
sizeof(TypedValue);
auto const values = reinterpret_cast<const TypedValue*>(arr + 1);
auto const last = values + arr->m_size;
for (auto ptr = values; ptr != last; ++ptr) {
Expand Down
9 changes: 5 additions & 4 deletions hphp/runtime/base/array-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,11 @@ struct ArrayData {
UNUSED uint8_t m_unused0;
ArrayKind m_kind;
};
// Packed arrays overlay their capacity with the kind field.
// kPackedKind is zero, and aliases the top byte of
// m_packedCap, so it won't influence the capacity.
uint32_t m_packedCap;
// Packed arrays overlay their encoded capacity with the kind field.
// kPackedKind is zero, and aliases the top byte of m_packedCapCode,
// so it won't influence the encoded capacity. For details on the
// encoding see the definition of packedCapToCode().
uint32_t m_packedCapCode;
};
uint32_t m_size;
};
Expand Down
3 changes: 2 additions & 1 deletion hphp/runtime/base/empty-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ std::pair<ArrayData*,TypedValue*> EmptyArray::MakePackedInl(TypedValue tv) {
auto const ad = static_cast<ArrayData*>(
MM().objMallocLogged(sizeof(ArrayData) + cap * sizeof(TypedValue))
);
assert(cap == packedCodeToCap(cap));
ad->m_kindAndSize = uint64_t{1} << 32 | cap; // also set kind
ad->m_posAndCount = 0;

Expand All @@ -140,7 +141,7 @@ std::pair<ArrayData*,TypedValue*> EmptyArray::MakePackedInl(TypedValue tv) {
assert(ad->m_size == 1);
assert(ad->m_pos == 0);
assert(ad->m_count == 0);
assert(ad->m_packedCap == cap);
assert(ad->m_packedCapCode == cap);
assert(PackedArray::checkInvariants(ad));
return { ad, &lval };
}
Expand Down
84 changes: 62 additions & 22 deletions hphp/runtime/base/mixed-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,22 @@ ArrayData* MixedArray::MakeReserveMixed(uint32_t capacity) {

ArrayData* MixedArray::MakePacked(uint32_t size, const TypedValue* values) {
assert(size > 0);
ArrayData* ad;
if (LIKELY(size <= kPackedCapCodeThreshold)) {
auto const cap = size;
ad = static_cast<ArrayData*>(
MM().objMallocLogged(sizeof(ArrayData) + sizeof(TypedValue) * cap)
);
assert(cap == packedCodeToCap(cap));
ad->m_kindAndSize = uint64_t{size} << 32 | cap; // sets kPackedKind
assert(ad->m_kind == kPackedKind);
assert(ad->m_size == size);
assert(packedCodeToCap(ad->m_packedCapCode) == cap);
} else {
ad = MakePackedHelper(size, values);
}

auto const cap = size;
auto const ad = static_cast<ArrayData*>(
MM().objMallocLogged(sizeof(ArrayData) + sizeof(TypedValue) * cap)
);

ad->m_kindAndSize = uint64_t{size} << 32 | cap; // sets kPackedKind
ad->m_posAndCount = uint64_t{1} << 32;

// Append values by moving -- Caller assumes we update refcount.
// Values are in reverse order since they come from the stack, which
// grows down.
Expand All @@ -99,15 +106,28 @@ ArrayData* MixedArray::MakePacked(uint32_t size, const TypedValue* values) {
++ptr;
}

assert(ad->m_kind == kPackedKind);
assert(ad->m_size == size);
assert(ad->m_packedCap == cap);
assert(ad->m_pos == 0);
assert(ad->m_count == 1);
assert(PackedArray::checkInvariants(ad));
return ad;
}

NEVER_INLINE
ArrayData*
MixedArray::MakePackedHelper(uint32_t size, const TypedValue* values) {
auto const cap = roundUpPackedCap(size);
auto const ad = static_cast<ArrayData*>(
MM().objMallocLogged(sizeof(ArrayData) + sizeof(TypedValue) * cap)
);
auto const capCode = packedCapToCode(cap);
ad->m_kindAndSize = uint64_t{size} << 32 | capCode; // sets kPackedKind
assert(ad->m_kind == kPackedKind);
assert(ad->m_size == size);
assert(packedCodeToCap(ad->m_packedCapCode) == cap);
return ad;
}


MixedArray* MixedArray::MakeStruct(uint32_t size, StringData** keys,
const TypedValue* values) {
assert(size > 0);
Expand Down Expand Up @@ -324,15 +344,24 @@ ArrayData* MixedArray::MakeUncounted(ArrayData* array) {
ArrayData* MixedArray::MakeUncountedPacked(ArrayData* array) {
assert(PackedArray::checkInvariants(array));

// We don't need to copy the full capacity, since the array won't
// change once it's uncounted.
auto const cap = array->m_size;
ArrayData* ad;
auto const size = array->m_size;
if (LIKELY(size <= kPackedCapCodeThreshold)) {
// We don't need to copy the full capacity, since the array won't
// change once it's uncounted.
auto const cap = size;
ad = static_cast<ArrayData*>(
std::malloc(sizeof(ArrayData) + cap * sizeof(TypedValue))
);
assert(cap == packedCodeToCap(cap));
ad->m_kindAndSize = uint64_t{size} << 32 | cap; // zero kind
assert(ad->m_kind == ArrayData::kPackedKind);
assert(packedCodeToCap(ad->m_packedCapCode) == cap);
assert(ad->m_size == size);
} else {
ad = MakeUncountedPackedHelper(array);
}

auto const ad = static_cast<ArrayData*>(
std::malloc(sizeof(ArrayData) + cap * sizeof(TypedValue))
);
ad->m_kindAndSize = uint64_t{size} << 32 | cap; // zero kind
ad->m_posAndCount = static_cast<uint64_t>(UncountedValue) << 32 |
static_cast<uint32_t>(array->m_pos);
auto const srcData = packedData(array);
Expand All @@ -342,17 +371,28 @@ ArrayData* MixedArray::MakeUncountedPacked(ArrayData* array) {
tvCopy(*CreateVarForUncountedArray(tvAsCVarRef(ptr)).asTypedValue(),
*targetData);
}

assert(ad->m_kind == ArrayData::kPackedKind);
assert(ad->m_packedCap == cap);
assert(ad->m_size == size);
assert(ad->m_pos == array->m_pos);
assert(ad->m_count == UncountedValue);
assert(ad->isUncounted());
assert(PackedArray::checkInvariants(ad));
return ad;
}

NEVER_INLINE
ArrayData* MixedArray::MakeUncountedPackedHelper(ArrayData* array) {
auto const cap = roundUpPackedCap(array->m_size);
auto const ad = static_cast<ArrayData*>(
std::malloc(sizeof(ArrayData) + cap * sizeof(TypedValue))
);
auto const capCode = packedCapToCode(cap);
auto const size = array->m_size;
ad->m_kindAndSize = uint64_t{size} << 32 | capCode; // zero kind
assert(ad->m_kind == ArrayData::kPackedKind);
assert(packedCodeToCap(ad->m_packedCapCode) == cap);
assert(ad->m_size == size);
return ad;
}

//=============================================================================
// Destruction

Expand Down Expand Up @@ -511,7 +551,7 @@ bool MixedArray::checkInvariants() const {

// All arrays:
assert(m_tableMask > 0 && ((m_tableMask+1) & m_tableMask) == 0);
assert(m_tableMask == folly::nextPowTwo(m_cap) - 1);
assert(m_tableMask == folly::nextPowTwo<uint64_t>(m_cap) - 1);
assert(m_cap == computeMaxElms(m_tableMask));

if (isZombie()) return true;
Expand Down
6 changes: 6 additions & 0 deletions hphp/runtime/base/mixed-array.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ struct MixedArray : private ArrayData {
* The returned array is already incref'd.
*/
static ArrayData* MakeReserve(uint32_t capacity);
static ArrayData* MakeReserveSlow(uint32_t capacity);

/*
* Allocate a new, empty, request-local array in mixed mode, with
Expand All @@ -132,6 +133,7 @@ struct MixedArray : private ArrayData {
* Pre: size > 0
*/
static ArrayData* MakePacked(uint32_t size, const TypedValue* values);
static ArrayData* MakePackedHelper(uint32_t size, const TypedValue* values);

/*
* Like MakePacked, but given static strings, make a struct-like array.
Expand All @@ -150,6 +152,7 @@ struct MixedArray : private ArrayData {
*/
static ArrayData* MakeUncounted(ArrayData* array);
static ArrayData* MakeUncountedPacked(ArrayData* array);
static ArrayData* MakeUncountedPackedHelper(ArrayData* array);

// This behaves the same as iter_begin except that it assumes
// this array is not empty and its not virtual.
Expand Down Expand Up @@ -288,6 +291,9 @@ struct MixedArray : private ArrayData {
static const uint32_t SmallHashSize = 1 << MinLgTableSize;
static const uint32_t SmallMask = SmallHashSize - 1;
static const uint32_t SmallSize = SmallHashSize - SmallHashSize / LoadScale;
static const uint64_t MaxHashSize = uint64_t(1) << 32;
static const uint32_t MaxMask = MaxHashSize - 1;
static const uint32_t MaxSize = MaxMask - MaxMask / LoadScale;
static const uint32_t MaxMakeSize = 4 * SmallSize;

uint32_t iterLimit() const { return m_used; }
Expand Down
73 changes: 72 additions & 1 deletion hphp/runtime/base/packed-array-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ namespace HPHP {
//////////////////////////////////////////////////////////////////////

constexpr uint32_t kPackedSmallSize = 3; // same as mixed-array for now
constexpr uint32_t kMaxPackedCap = 1 << 24;

// Max size allowed by PackedArray's capacity encoding scheme
constexpr uint32_t kMaxPackedCap = 0xFF00FF00ul;

constexpr uint32_t kPackedCapCodeThreshold = 0x10000ul;

//////////////////////////////////////////////////////////////////////

Expand All @@ -37,6 +41,73 @@ TypedValue* packedData(const ArrayData* arr) {
);
}

/**
* Packed arrays use an encoding scheme for their capacity field so that
* capacities up to almost 2^32 can be supported using only 3 bytes:
* cap = capCode < 0x10000 ? capCode : (capCode - 0xFF00) * 0x100
*
* The encoding breaks 3-byte capacity codes (capCodes) into two ranges.
* Codes 0 - 65535 are mapped to capacities 0 - 65535 in increments of 1.
* Codes 65536 - 16777215 are mapped to capacities 65536 - 4278255360 in
* increments of 256. This scheme works out well in a couple ways:
* - No meaningful loss of granularity for the capacity field.
* - cap == capCode in the common case, which comes in handy.
* - No multiplication or division needed.
*/

uint32_t packedCapToCode(uint32_t) UNUSED;
uint32_t packedCapToCode(uint32_t cap) {
assert(cap <= kMaxPackedCap);
if (UNLIKELY(cap > kPackedCapCodeThreshold)) {
auto const capCode = (cap + 0xFF00FFul) >> 8;
assert(capCode <= 0xFFFFFFul && capCode <= cap);
return capCode;
}
return cap;
}

uint32_t packedCodeToCap(uint32_t) UNUSED;
uint32_t packedCodeToCap(uint32_t capCode) {
assert(capCode <= 0xFFFFFFul);
if (UNLIKELY(capCode > kPackedCapCodeThreshold)) {
auto const cap = (capCode - 0xFF00ul) << 8;
assert(cap <= kMaxPackedCap && capCode <= cap);
return cap;
}
return capCode;
}

uint32_t roundUpPackedCap(uint32_t) UNUSED;
uint32_t roundUpPackedCap(uint32_t cap) {
assert(cap <= kMaxPackedCap);
if (UNLIKELY(cap > kPackedCapCodeThreshold)) {
cap = (cap + 0xFFlu) & ~0xFFul;
}
// The capacity should not change if it round trips into
// encoded form and back
assert(packedCodeToCap(packedCapToCode(cap)) == cap);
return cap;
}

bool sizeLessThanPackedCapCode(uint32_t, uint32_t) UNUSED;
bool sizeLessThanPackedCapCode(uint32_t size, uint32_t packedCapCode) {
assert(packedCapCode <= 0xFFFFFFul);
// Try comparing against m_packedCapCode first so that we can
// avoid computing the capacity in the common case
if (LIKELY(size < packedCapCode)) {
assert(size < packedCodeToCap(packedCapCode));
return true;
}
if (LIKELY(packedCapCode <= kPackedCapCodeThreshold)) {
assert(!(size < packedCodeToCap(packedCapCode)));
return false;
}
auto const cap = (packedCapCode - 0xFF00ul) << 8;
assert(cap <= kMaxPackedCap && packedCapCode <= cap);
assert((size < packedCodeToCap(packedCapCode)) == (size < cap));
return size < cap;
}

}

//////////////////////////////////////////////////////////////////////
Expand Down
Loading

0 comments on commit f5445e1

Please sign in to comment.