Skip to content

Commit

Permalink
Refactor/consolidate legacy Bloom implementation details (facebook#5784)
Browse files Browse the repository at this point in the history
Summary:
Refactoring to consolidate implementation details of legacy
Bloom filters. This helps to organize and document some related,
obscure code.

Also added make/cpp var TEST_CACHE_LINE_SIZE so that it's easy to
compile and run unit tests for non-native cache line size. (Fixed a
related test failure in db_properties_test.)
Pull Request resolved: facebook#5784

Test Plan:
make check, including Recently added Bloom schema unit tests
(in ./plain_table_db_test && ./bloom_test), and including with
TEST_CACHE_LINE_SIZE=128U and TEST_CACHE_LINE_SIZE=256U. Tested the
schema tests with temporary fault injection into new implementations.

Some performance testing with modified unit tests suggest a small to moderate
improvement in speed.

Differential Revision: D17381384

Pulled By: pdillinger

fbshipit-source-id: ee42586da996798910fc45ac0b6289147f16d8df
  • Loading branch information
pdillinger authored and facebook-github-bot committed Sep 16, 2019
1 parent 638d239 commit 6862624
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 201 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
endif
endif

ifdef TEST_CACHE_LINE_SIZE
PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE)
endif

# This (the first rule) must depend on "all".
default: all

Expand Down
14 changes: 7 additions & 7 deletions db/db_properties_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,11 @@ void VerifySimilar(uint64_t a, uint64_t b, double bias) {
}
}

void VerifyTableProperties(const TableProperties& base_tp,
const TableProperties& new_tp,
double filter_size_bias = 0.1,
double index_size_bias = 0.1,
double data_size_bias = 0.1,
double num_data_blocks_bias = 0.05) {
void VerifyTableProperties(
const TableProperties& base_tp, const TableProperties& new_tp,
double filter_size_bias = CACHE_LINE_SIZE >= 256 ? 0.15 : 0.1,
double index_size_bias = 0.1, double data_size_bias = 0.1,
double num_data_blocks_bias = 0.05) {
VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias);
VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias);
VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias);
Expand Down Expand Up @@ -266,7 +265,8 @@ void GetExpectedTableProperties(
// discount 1 byte as value size is not encoded in value delta encoding
(value_delta_encoding ? 1 : 0));
expected_tp->filter_size =
kTableCount * (kKeysPerTable * kBloomBitsPerKey / 8);
kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 +
/*average-ish overhead*/ CACHE_LINE_SIZE / 2);
}
} // anonymous namespace

Expand Down
27 changes: 18 additions & 9 deletions port/port_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,22 +178,31 @@ typedef pthread_once_t OnceType;
extern void InitOnce(OnceType* once, void (*initializer)());

#ifndef CACHE_LINE_SIZE
#if defined(__s390__)
#define CACHE_LINE_SIZE 256U
#elif defined(__powerpc__) || defined(__aarch64__)
#define CACHE_LINE_SIZE 128U
#else
#define CACHE_LINE_SIZE 64U
#endif
// To test behavior with non-native cache line size, e.g. for
// Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size.
// This disables ALIGN_AS to keep it from failing compilation.
#ifdef TEST_CACHE_LINE_SIZE
#define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE
#define ALIGN_AS(n) /*empty*/
#else
#if defined(__s390__)
#define CACHE_LINE_SIZE 256U
#elif defined(__powerpc__) || defined(__aarch64__)
#define CACHE_LINE_SIZE 128U
#else
#define CACHE_LINE_SIZE 64U
#endif
#define ALIGN_AS(n) alignas(n)
#endif
#endif

static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0,
"Cache line size must be a power of 2 number of bytes");

extern void *cacheline_aligned_alloc(size_t size);

extern void cacheline_aligned_free(void *memblock);

#define ALIGN_AS(n) alignas(n)

#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)

extern void Crash(const std::string& srcfile, int srcline);
Expand Down
7 changes: 3 additions & 4 deletions table/full_filter_bits_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ class Slice;

class FullFilterBitsBuilder : public FilterBitsBuilder {
public:
explicit FullFilterBitsBuilder(const size_t bits_per_key,
const size_t num_probes);
explicit FullFilterBitsBuilder(const int bits_per_key, const int num_probes);

// No Copy allowed
FullFilterBitsBuilder(const FullFilterBitsBuilder&) = delete;
Expand Down Expand Up @@ -56,8 +55,8 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {

private:
friend class FullFilterBlockTest_DuplicateEntries_Test;
size_t bits_per_key_;
size_t num_probes_;
int bits_per_key_;
int num_probes_;
std::vector<uint32_t> hash_entries_;

// Get totalbits that optimized for cpu cache line
Expand Down
8 changes: 4 additions & 4 deletions table/plain/plain_table_bloom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ uint32_t GetTotalBitsForLocality(uint32_t total_bits) {
PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes)
: kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {}

void PlainTableBloomV1::SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks) {
data_ = reinterpret_cast<uint8_t*>(raw_data);
void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits,
uint32_t num_blocks) {
data_ = raw_data;
kTotalBits = total_bits;
kNumBlocks = num_blocks;
}
Expand Down Expand Up @@ -63,7 +63,7 @@ void PlainTableBloomV1::SetTotalBits(Allocator* allocator,
if (kNumBlocks > 0 && cache_line_offset > 0) {
raw += CACHE_LINE_SIZE - cache_line_offset;
}
data_ = reinterpret_cast<uint8_t*>(raw);
data_ = raw;
}

void BloomBlockBuilder::AddKeysHashes(const std::vector<uint32_t>& keys_hashes) {
Expand Down
66 changes: 19 additions & 47 deletions table/plain/plain_table_bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
#include "rocksdb/slice.h"

#include "port/port.h"
#include "util/bloom_impl.h"
#include "util/hash.h"

#include "third-party/folly/folly/ConstexprMath.h"

#include <memory>

namespace rocksdb {
Expand Down Expand Up @@ -50,12 +53,9 @@ class PlainTableBloomV1 {

uint32_t GetNumBlocks() const { return kNumBlocks; }

Slice GetRawData() const {
return Slice(reinterpret_cast<char*>(data_), GetTotalBits() / 8);
}
Slice GetRawData() const { return Slice(data_, GetTotalBits() / 8); }

void SetRawData(unsigned char* raw_data, uint32_t total_bits,
uint32_t num_blocks = 0);
void SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks = 0);

uint32_t GetTotalBits() const { return kTotalBits; }

Expand All @@ -66,7 +66,10 @@ class PlainTableBloomV1 {
uint32_t kNumBlocks;
const uint32_t kNumProbes;

uint8_t* data_;
char* data_;

static constexpr int LOG2_CACHE_LINE_SIZE =
folly::constexpr_log2(CACHE_LINE_SIZE);
};

#if defined(_MSC_VER)
Expand All @@ -76,8 +79,9 @@ class PlainTableBloomV1 {
#endif
inline void PlainTableBloomV1::Prefetch(uint32_t h) {
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
PREFETCH(&(data_[b / 8]), 0, 3);
uint32_t ignored;
LegacyLocalityBloomImpl</*ExtraRotates*/ true>::PrepareHashMayMatch(
h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE);
}
}
#if defined(_MSC_VER)
Expand All @@ -86,54 +90,22 @@ inline void PlainTableBloomV1::Prefetch(uint32_t h) {

inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
return LegacyLocalityBloomImpl<true>::HashMayMatch(
h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE);
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
if ((data_[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
return LegacyNoLocalityBloomImpl::HashMayMatch(h, kTotalBits, kNumProbes,
data_);
}
return true;
}

inline void PlainTableBloomV1::AddHash(uint32_t h) {
assert(IsInitialized());
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data_[bitpos / 8] |= (1 << (bitpos % 8));
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
LegacyLocalityBloomImpl<true>::AddHash(h, kNumBlocks, kNumProbes, data_,
LOG2_CACHE_LINE_SIZE);
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_);
}
}

Expand Down
7 changes: 3 additions & 4 deletions table/plain/plain_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,9 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
}
}
// cast away const qualifier, because bloom_ won't be changed
bloom_.SetRawData(
const_cast<unsigned char*>(
reinterpret_cast<const unsigned char*>(bloom_block->data())),
static_cast<uint32_t>(bloom_block->size()) * 8, num_blocks);
bloom_.SetRawData(const_cast<char*>(bloom_block->data()),
static_cast<uint32_t>(bloom_block->size()) * 8,
num_blocks);
} else {
// Index in file but no bloom in file. Disable bloom filter in this case.
enable_bloom_ = false;
Expand Down
28 changes: 28 additions & 0 deletions third-party/folly/folly/ConstexprMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,32 @@ template <typename T, typename... Ts>
constexpr T constexpr_max(T a, T b, Ts... ts) {
return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...);
}

namespace detail {
template <typename T>
constexpr T constexpr_log2_(T a, T e) {
return e == T(1) ? a : constexpr_log2_(a + T(1), e / T(2));
}

template <typename T>
constexpr T constexpr_log2_ceil_(T l2, T t) {
return l2 + T(T(1) << l2 < t ? 1 : 0);
}

template <typename T>
constexpr T constexpr_square_(T t) {
return t * t;
}
} // namespace detail

template <typename T>
constexpr T constexpr_log2(T t) {
return detail::constexpr_log2_(T(0), t);
}

template <typename T>
constexpr T constexpr_log2_ceil(T t) {
return detail::constexpr_log2_ceil_(constexpr_log2(t), t);
}

} // namespace folly
Loading

0 comments on commit 6862624

Please sign in to comment.