Skip to content

Commit

Permalink
Added bloom filter support.
Browse files Browse the repository at this point in the history
In particular, we add a new FilterPolicy class.  An instance
of this class can be supplied in Options when opening a
database.  If supplied, the instance is used to generate
summaries of keys (e.g., a bloom filter) which are placed in
sstables.  These summaries are consulted by DB::Get() so we
can avoid reading sstable blocks that are guaranteed to not
contain the key we are looking for.

This change provides one implementation of FilterPolicy
based on bloom filters.

Other changes:
- Updated version number to 1.4.
- Some build tweaks.
- C binding for CompactRange.
- A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom.
- Minor .gitignore update.
  • Loading branch information
ghemawat committed Apr 17, 2012
1 parent bc1ee4d commit 85584d4
Show file tree
Hide file tree
Showing 38 changed files with 2,078 additions and 578 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
build_config.mk
*.a
*.o
*.dylib*
*.so
*.so.*
*_test
db_bench
14 changes: 11 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode)
#-----------------------------------------------

# detect what platform we're building on
$(shell sh ./build_detect_platform)
# this file is generated by build_detect_platform to set build flags and sources
$(shell ./build_detect_platform build_config.mk)
# this file is generated by the previous line to set build flags and sources
include build_config.mk

CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
Expand All @@ -34,6 +34,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)

TESTS = \
arena_test \
bloom_test \
c_test \
cache_test \
coding_test \
Expand All @@ -43,6 +44,7 @@ TESTS = \
dbformat_test \
env_test \
filename_test \
filter_block_test \
log_test \
memenv_test \
skiplist_test \
Expand All @@ -63,7 +65,7 @@ default: all
ifneq ($(PLATFORM_SHARED_EXT),)
# Update db.h if you change these.
SHARED_MAJOR = 1
SHARED_MINOR = 3
SHARED_MINOR = 4
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
Expand Down Expand Up @@ -101,6 +103,9 @@ db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

Expand Down Expand Up @@ -128,6 +133,9 @@ env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)

Expand Down
38 changes: 21 additions & 17 deletions build_detect_platform
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/sh
#
# Detects OS we're compiling on and generates build_config.mk,
# which in turn gets read while processing Makefile.
# Detects OS we're compiling on and outputs a file specified by the first
# argument, which in turn gets read while processing Makefile.
#
# build_config.mk will set the following variables:
# The output will set the following variables:
# PLATFORM_LDFLAGS Linker flags
# PLATFORM_SHARED_EXT Extension for shared libraries
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
Expand All @@ -13,11 +13,15 @@
# -DLEVELDB_PLATFORM_POSIX if cstdatomic is present
# -DLEVELDB_PLATFORM_NOATOMIC if it is not

SCRIPT_DIR=`dirname $0`
OUTPUT=$1
if test -z "$OUTPUT"; then
echo "usage: $0 <output-filename>"
exit 1
fi

# Delete existing build_config.mk
rm -f build_config.mk
touch build_config.mk
# Delete existing output, if it exists
rm -f $OUTPUT
touch $OUTPUT

if test -z "$CXX"; then
CXX=g++
Expand Down Expand Up @@ -96,7 +100,7 @@ esac
# except for the test and benchmark files. By default, find will output a list
# of all files matching either rule, so we need to append -print to make the
# prune take effect.
DIRS="$SCRIPT_DIR/util $SCRIPT_DIR/db $SCRIPT_DIR/table"
DIRS="util db table"
set -f # temporarily disable globbing so that our patterns aren't expanded
PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *_bench.cc -prune"
Expand All @@ -105,8 +109,8 @@ set +f # re-enable globbing

# The sources consist of the portable files, plus the platform-specific port
# file.
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> build_config.mk
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> build_config.mk
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT

if [ "$PLATFORM" = "OS_ANDROID_CROSSCOMPILE" ]; then
# Cross-compiling; do not try any compilation tests.
Expand Down Expand Up @@ -147,10 +151,10 @@ fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"

echo "PLATFORM=$PLATFORM" >> build_config.mk
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> build_config.mk
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> build_config.mk
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> build_config.mk
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> build_config.mk
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> build_config.mk
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> build_config.mk
echo "PLATFORM=$PLATFORM" >> $OUTPUT
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
110 changes: 110 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "leveldb/comparator.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/filter_policy.h"
#include "leveldb/iterator.h"
#include "leveldb/options.h"
#include "leveldb/status.h"
Expand All @@ -21,8 +22,10 @@ using leveldb::CompressionType;
using leveldb::DB;
using leveldb::Env;
using leveldb::FileLock;
using leveldb::FilterPolicy;
using leveldb::Iterator;
using leveldb::Logger;
using leveldb::NewBloomFilterPolicy;
using leveldb::NewLRUCache;
using leveldb::Options;
using leveldb::RandomAccessFile;
Expand Down Expand Up @@ -78,6 +81,47 @@ struct leveldb_comparator_t : public Comparator {
virtual void FindShortSuccessor(std::string* key) const { }
};

struct leveldb_filterpolicy_t : public FilterPolicy {
void* state_;
void (*destructor_)(void*);
const char* (*name_)(void*);
char* (*create_)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length);
unsigned char (*key_match_)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length);

virtual ~leveldb_filterpolicy_t() {
(*destructor_)(state_);
}

virtual const char* Name() const {
return (*name_)(state_);
}

virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
std::vector<const char*> key_pointers(n);
std::vector<size_t> key_sizes(n);
for (int i = 0; i < n; i++) {
key_pointers[i] = keys[i].data();
key_sizes[i] = keys[i].size();
}
size_t len;
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
dst->append(filter, len);
free(filter);
}

virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
return (*key_match_)(state_, key.data(), key.size(),
filter.data(), filter.size());
}
};

struct leveldb_env_t {
Env* rep;
bool is_default;
Expand Down Expand Up @@ -218,6 +262,17 @@ void leveldb_approximate_sizes(
delete[] ranges;
}

void leveldb_compact_range(
leveldb_t* db,
const char* start_key, size_t start_key_len,
const char* limit_key, size_t limit_key_len) {
Slice a, b;
db->rep->CompactRange(
// Pass NULL Slice if corresponding "const char*" is NULL
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
}

void leveldb_destroy_db(
const leveldb_options_t* options,
const char* name,
Expand Down Expand Up @@ -340,6 +395,12 @@ void leveldb_options_set_comparator(
opt->rep.comparator = cmp;
}

void leveldb_options_set_filter_policy(
leveldb_options_t* opt,
leveldb_filterpolicy_t* policy) {
opt->rep.filter_policy = policy;
}

void leveldb_options_set_create_if_missing(
leveldb_options_t* opt, unsigned char v) {
opt->rep.create_if_missing = v;
Expand Down Expand Up @@ -407,6 +468,55 @@ void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
delete cmp;
}

leveldb_filterpolicy_t* leveldb_filterpolicy_create(
void* state,
void (*destructor)(void*),
char* (*create_filter)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length),
unsigned char (*key_may_match)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length),
const char* (*name)(void*)) {
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
result->state_ = state;
result->destructor_ = destructor;
result->create_ = create_filter;
result->key_match_ = key_may_match;
result->name_ = name;
return result;
}

void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
delete filter;
}

leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
// Make a leveldb_filterpolicy_t, but override all of its methods so
// they delegate to a NewBloomFilterPolicy() instead of user
// supplied C functions.
struct Wrapper : public leveldb_filterpolicy_t {
const FilterPolicy* rep_;
~Wrapper() { delete rep_; }
const char* Name() const { return rep_->Name(); }
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
return rep_->CreateFilter(keys, n, dst);
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
return rep_->KeyMayMatch(key, filter);
}
static void DoNothing(void*) { }
};
Wrapper* wrapper = new Wrapper;
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
wrapper->state_ = NULL;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}

leveldb_readoptions_t* leveldb_readoptions_create() {
return new leveldb_readoptions_t;
}
Expand Down
77 changes: 77 additions & 0 deletions db/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,31 @@ static const char* CmpName(void* arg) {
return "foo";
}

// Custom filter policy
static unsigned char fake_filter_result = 1;
static void FilterDestroy(void* arg) { }
static const char* FilterName(void* arg) {
return "TestFilter";
}
static char* FilterCreate(
void* arg,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length) {
*filter_length = 4;
char* result = malloc(4);
memcpy(result, "fake", 4);
return result;
}
unsigned char FilterKeyMatch(
void* arg,
const char* key, size_t length,
const char* filter, size_t filter_length) {
CheckCondition(filter_length == 4);
CheckCondition(memcmp(filter, "fake", 4) == 0);
return fake_filter_result;
}

int main(int argc, char** argv) {
leveldb_t* db;
leveldb_comparator_t* cmp;
Expand All @@ -131,6 +156,7 @@ int main(int argc, char** argv) {
leveldb_readoptions_t* roptions;
leveldb_writeoptions_t* woptions;
char* err = NULL;
int run = -1;

snprintf(dbname, sizeof(dbname), "/tmp/leveldb_c_test-%d",
((int) geteuid()));
Expand Down Expand Up @@ -180,6 +206,14 @@ int main(int argc, char** argv) {
CheckNoError(err);
CheckGet(db, roptions, "foo", "hello");

StartPhase("compactall");
leveldb_compact_range(db, NULL, 0, NULL, 0);
CheckGet(db, roptions, "foo", "hello");

StartPhase("compactrange");
leveldb_compact_range(db, "a", 1, "z", 1);
CheckGet(db, roptions, "foo", "hello");

StartPhase("writebatch");
{
leveldb_writebatch_t* wb = leveldb_writebatch_create();
Expand Down Expand Up @@ -279,6 +313,49 @@ int main(int argc, char** argv) {
CheckGet(db, roptions, "foo", NULL);
CheckGet(db, roptions, "bar", NULL);
CheckGet(db, roptions, "box", "c");
leveldb_options_set_create_if_missing(options, 1);
leveldb_options_set_error_if_exists(options, 1);
}

StartPhase("filter");
for (run = 0; run < 2; run++) {
// First run uses custom filter, second run uses bloom filter
CheckNoError(err);
leveldb_filterpolicy_t* policy;
if (run == 0) {
policy = leveldb_filterpolicy_create(
NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
} else {
policy = leveldb_filterpolicy_create_bloom(10);
}

// Create new database
leveldb_close(db);
leveldb_destroy_db(options, dbname, &err);
leveldb_options_set_filter_policy(options, policy);
db = leveldb_open(options, dbname, &err);
CheckNoError(err);
leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
CheckNoError(err);
leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
CheckNoError(err);
leveldb_compact_range(db, NULL, 0, NULL, 0);

fake_filter_result = 1;
CheckGet(db, roptions, "foo", "foovalue");
CheckGet(db, roptions, "bar", "barvalue");
if (phase == 0) {
// Must not find value when custom filter returns false
fake_filter_result = 0;
CheckGet(db, roptions, "foo", NULL);
CheckGet(db, roptions, "bar", NULL);
fake_filter_result = 1;

CheckGet(db, roptions, "foo", "foovalue");
CheckGet(db, roptions, "bar", "barvalue");
}
leveldb_options_set_filter_policy(options, NULL);
leveldb_filterpolicy_destroy(policy);
}

StartPhase("cleanup");
Expand Down
Loading

0 comments on commit 85584d4

Please sign in to comment.