Skip to content

Commit

Permalink
TablePropertiesCollectorFactory
Browse files Browse the repository at this point in the history
Summary:
This diff addresses task #4296714 and rethinks how users provide us with TablePropertiesCollectors as part of Options.

Here's description of task #4296714:
       I'm debugging #4295529 and noticed that our count of user properties kDeletedKeys is wrong. We're sharing one single InternalKeyPropertiesCollector with all Table Builders. In LOG Files, we're outputting number of kDeletedKeys as connected with a single table, while it's actually the total count of deleted keys since creation of the DB.

       For example, this table has 3155 entries and 1391828 deleted keys.

The problem with current approach that we call methods on a single TablePropertiesCollector for all the tables we create. Even worse, we could do it from multiple threads at the same time and TablePropertiesCollector has no way of knowing which table we're calling it for.

Good part: Looks like nobody inside Facebook is using Options::table_properties_collectors. This means we should be able to painfully change the API.

In this change, I introduce TablePropertiesCollectorFactory. For every table we create, we call `CreateTablePropertiesCollector`, which creates a TablePropertiesCollector for a single table. We then use it sequentially from a single thread, which means it doesn't have to be thread-safe.

Test Plan:
Added a test in table_properties_collector_test that fails on master (build two tables, assert that kDeletedKeys count is correct for the second one).
Also, all other tests

Reviewers: sdong, dhruba, haobo, kailiu

Reviewed By: kailiu

CC: leveldb

Differential Revision: https://reviews.facebook.net/D18579
  • Loading branch information
igorcanadi committed May 13, 2014
1 parent 2082a7d commit 26f5dd9
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 118 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Rocksdb Change Log

## Unreleased (3.1.0)

### Public API changes
* Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories

## 3.0.0 (05/05/2014)

### Public API changes
Expand Down
15 changes: 9 additions & 6 deletions db/column_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,17 @@ ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp,
// All user defined properties collectors will be wrapped by
// UserKeyTablePropertiesCollector since for them they only have the
// knowledge of the user keys; internal keys are invisible to them.
auto& collectors = result.table_properties_collectors;
for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) {
assert(collectors[i]);
collectors[i] =
std::make_shared<UserKeyTablePropertiesCollector>(collectors[i]);
auto& collector_factories = result.table_properties_collector_factories;
for (size_t i = 0; i < result.table_properties_collector_factories.size();
++i) {
assert(collector_factories[i]);
collector_factories[i] =
std::make_shared<UserKeyTablePropertiesCollectorFactory>(
collector_factories[i]);
}
// Add collector to collect internal key statistics
collectors.push_back(std::make_shared<InternalKeyPropertiesCollector>());
collector_factories.push_back(
std::make_shared<InternalKeyPropertiesCollectorFactory>());

return result;
}
Expand Down
49 changes: 36 additions & 13 deletions db/table_properties_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,30 @@ class InternalKeyPropertiesCollector : public TablePropertiesCollector {
uint64_t deleted_keys_ = 0;
};

class InternalKeyPropertiesCollectorFactory
: public TablePropertiesCollectorFactory {
public:
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new InternalKeyPropertiesCollector();
}

virtual const char* Name() const override {
return "InternalKeyPropertiesCollectorFactory";
}
};

// When rocksdb creates a new table, it will encode all "user keys" into
// "internal keys", which contains meta information of a given entry.
//
// This class extracts user key from the encoded internal key when Add() is
// invoked.
class UserKeyTablePropertiesCollector : public TablePropertiesCollector {
public:
explicit UserKeyTablePropertiesCollector(
TablePropertiesCollector* collector) :
UserKeyTablePropertiesCollector(
std::shared_ptr<TablePropertiesCollector>(collector)
) {
}

explicit UserKeyTablePropertiesCollector(
std::shared_ptr<TablePropertiesCollector> collector) :
collector_(collector) {
}
// transfer of ownership
explicit UserKeyTablePropertiesCollector(TablePropertiesCollector* collector)
: collector_(collector) {}

virtual ~UserKeyTablePropertiesCollector() { }
virtual ~UserKeyTablePropertiesCollector() {}

virtual Status Add(const Slice& key, const Slice& value) override;

Expand All @@ -66,7 +70,26 @@ class UserKeyTablePropertiesCollector : public TablePropertiesCollector {
UserCollectedProperties GetReadableProperties() const override;

protected:
std::shared_ptr<TablePropertiesCollector> collector_;
std::unique_ptr<TablePropertiesCollector> collector_;
};

class UserKeyTablePropertiesCollectorFactory
: public TablePropertiesCollectorFactory {
public:
explicit UserKeyTablePropertiesCollectorFactory(
std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory)
: user_collector_factory_(user_collector_factory) {}
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new UserKeyTablePropertiesCollector(
user_collector_factory_->CreateTablePropertiesCollector());
}

virtual const char* Name() const override {
return user_collector_factory_->Name();
}

private:
std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory_;
};

} // namespace rocksdb
89 changes: 45 additions & 44 deletions db/table_properties_collector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,18 @@ class RegularKeysStartWithA: public TablePropertiesCollector {
return UserCollectedProperties{};
}


private:
uint32_t count_ = 0;
};

class RegularKeysStartWithAFactory : public TablePropertiesCollectorFactory {
public:
virtual TablePropertiesCollector* CreateTablePropertiesCollector() {
return new RegularKeysStartWithA();
}
const char* Name() const { return "RegularKeysStartWithA"; }
};

extern uint64_t kBlockBasedTableMagicNumber;
extern uint64_t kPlainTableMagicNumber;
namespace {
Expand Down Expand Up @@ -188,14 +195,14 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {
// for block based table
for (bool encode_as_internal : { true, false }) {
Options options;
auto collector = new RegularKeysStartWithA();
std::shared_ptr<TablePropertiesCollectorFactory> collector_factory(
new RegularKeysStartWithAFactory());
if (encode_as_internal) {
options.table_properties_collectors = {
std::make_shared<UserKeyTablePropertiesCollector>(collector)
};
options.table_properties_collector_factories.emplace_back(
new UserKeyTablePropertiesCollectorFactory(collector_factory));
} else {
options.table_properties_collectors.resize(1);
options.table_properties_collectors[0].reset(collector);
options.table_properties_collector_factories.resize(1);
options.table_properties_collector_factories[0] = collector_factory;
}
test::PlainInternalKeyComparator ikc(options.comparator);
TestCustomizedTablePropertiesCollector(kBlockBasedTableMagicNumber,
Expand All @@ -204,9 +211,8 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) {

// test plain table
Options options;
options.table_properties_collectors.push_back(
std::make_shared<RegularKeysStartWithA>()
);
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory());
options.table_factory = std::make_shared<PlainTableFactory>(8, 8, 0);
test::PlainInternalKeyComparator ikc(options.comparator);
TestCustomizedTablePropertiesCollector(kPlainTableMagicNumber, true, options,
Expand Down Expand Up @@ -235,9 +241,8 @@ void TestInternalKeyPropertiesCollector(

options.table_factory = table_factory;
if (sanitized) {
options.table_properties_collectors = {
std::make_shared<RegularKeysStartWithA>()
};
options.table_properties_collector_factories.emplace_back(
new RegularKeysStartWithAFactory());
// with sanitization, even regular properties collector will be able to
// handle internal keys.
auto comparator = options.comparator;
Expand All @@ -249,40 +254,36 @@ void TestInternalKeyPropertiesCollector(
options);
options.comparator = comparator;
} else {
options.table_properties_collectors = {
std::make_shared<InternalKeyPropertiesCollector>()
};
}

MakeBuilder(options, pikc, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
options.table_properties_collector_factories = {
std::make_shared<InternalKeyPropertiesCollectorFactory>()};
}

ASSERT_OK(builder->Finish());

FakeRandomeAccessFile readable(writable->contents());
TableProperties* props;
Status s = ReadTableProperties(
&readable,
writable->contents().size(),
magic_number,
Env::Default(),
nullptr,
&props
);
ASSERT_OK(s);

std::unique_ptr<TableProperties> props_guard(props);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);
for (int iter = 0; iter < 2; ++iter) {
MakeBuilder(options, pikc, &writable, &builder);
for (const auto& k : keys) {
builder->Add(k.Encode(), "val");
}

if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
ASSERT_OK(builder->Finish());

FakeRandomeAccessFile readable(writable->contents());
TableProperties* props;
Status s =
ReadTableProperties(&readable, writable->contents().size(),
magic_number, Env::Default(), nullptr, &props);
ASSERT_OK(s);

std::unique_ptr<TableProperties> props_guard(props);
auto user_collected = props->user_collected_properties;
uint64_t deleted = GetDeletedKeys(user_collected);
ASSERT_EQ(4u, deleted);

if (sanitized) {
uint32_t starts_with_A = 0;
Slice key(user_collected.at("Count"));
ASSERT_TRUE(GetVarint32(&key, &starts_with_A));
ASSERT_EQ(1u, starts_with_A);
}
}
}
} // namespace
Expand Down
10 changes: 5 additions & 5 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class MergeOperator;
class Snapshot;
class TableFactory;
class MemTableRepFactory;
class TablePropertiesCollector;
class TablePropertiesCollectorFactory;
class Slice;
class SliceTransform;
class Statistics;
Expand Down Expand Up @@ -455,11 +455,11 @@ struct ColumnFamilyOptions {

// This option allows user to to collect their own interested statistics of
// the tables.
// Default: emtpy vector -- no user-defined statistics collection will be
// Default: empty vector -- no user-defined statistics collection will be
// performed.
typedef std::vector<std::shared_ptr<TablePropertiesCollector>>
TablePropertiesCollectors;
TablePropertiesCollectors table_properties_collectors;
typedef std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
TablePropertiesCollectorFactories;
TablePropertiesCollectorFactories table_properties_collector_factories;

// Allows thread-safe inplace updates.
// If inplace_callback function is not set,
Expand Down
23 changes: 19 additions & 4 deletions include/rocksdb/table_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ extern const std::string kPropertiesBlock;

// `TablePropertiesCollector` provides the mechanism for users to collect
// their own interested properties. This class is essentially a collection
// of callback functions that will be invoked during table building.
// of callback functions that will be invoked during table building.
// It is construced with TablePropertiesCollectorFactory. The methods don't
// need to be thread-safe, as we will create exactly one
// TablePropertiesCollector object per table and then call it sequentially
class TablePropertiesCollector {
public:
virtual ~TablePropertiesCollector() {}
Expand All @@ -95,12 +98,24 @@ class TablePropertiesCollector {
// `properties`.
virtual Status Finish(UserCollectedProperties* properties) = 0;

// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;

// Return the human-readable properties, where the key is property name and
// the value is the human-readable form of value.
virtual UserCollectedProperties GetReadableProperties() const = 0;

// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;
};

// Constructs TablePropertiesCollector. Internals create a new
// TablePropertiesCollector for each new table
class TablePropertiesCollectorFactory {
public:
virtual ~TablePropertiesCollectorFactory() {}
// has to be thread-safe
virtual TablePropertiesCollector* CreateTablePropertiesCollector() = 0;

// The name of the properties collector can be used for debugging purpose.
virtual const char* Name() const = 0;
};

// Extra properties
Expand Down
17 changes: 14 additions & 3 deletions include/rocksdb/version.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once

// Also update Makefile if you change these
#define __ROCKSDB_MAJOR__ 3
#define __ROCKSDB_MINOR__ 0
#define __ROCKSDB_PATCH__ 0
#define ROCKSDB_MAJOR 3
#define ROCKSDB_MINOR 1
#define ROCKSDB_PATCH 0

// Do not use these. We made the mistake of declaring macros starting with
// double underscore. Now we have to live with our choice. We'll deprecate these
// at some point
#define __ROCKSDB_MAJOR__ ROCKSDB_MAJOR
#define __ROCKSDB_MINOR__ ROCKSDB_MINOR
#define __ROCKSDB_PATCH__ ROCKSDB_PATCH
30 changes: 16 additions & 14 deletions table/block_based_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ struct BlockBasedTableBuilder::Rep {
std::string compressed_output;
std::unique_ptr<FlushBlockPolicy> flush_block_policy;

std::vector<std::unique_ptr<TablePropertiesCollector>>
table_properties_collectors;

Rep(const Options& opt, const InternalKeyComparator& icomparator,
WritableFile* f, FlushBlockPolicyFactory* flush_block_policy_factory,
CompressionType compression_type, IndexType index_block_type,
Expand All @@ -322,8 +325,13 @@ struct BlockBasedTableBuilder::Rep {
: new FilterBlockBuilder(opt, &internal_comparator)),
flush_block_policy(flush_block_policy_factory->NewFlushBlockPolicy(
options, data_block)) {
options.table_properties_collectors.push_back(
std::make_shared<BlockBasedTablePropertiesCollector>(index_block_type));
for (auto& collector_factories :
options.table_properties_collector_factories) {
table_properties_collectors.emplace_back(
collector_factories->CreateTablePropertiesCollector());
}
table_properties_collectors.emplace_back(
new BlockBasedTablePropertiesCollector(index_block_type));
}
};

Expand Down Expand Up @@ -391,12 +399,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
r->props.raw_key_size += key.size();
r->props.raw_value_size += value.size();

NotifyCollectTableCollectorsOnAdd(
key,
value,
r->options.table_properties_collectors,
r->options.info_log.get()
);
NotifyCollectTableCollectorsOnAdd(key, value, r->table_properties_collectors,
r->options.info_log.get());
}

void BlockBasedTableBuilder::Flush() {
Expand Down Expand Up @@ -590,11 +594,9 @@ Status BlockBasedTableBuilder::Finish() {
property_block_builder.AddTableProperty(r->props);

// Add use collected properties
NotifyCollectTableCollectorsOnFinish(
r->options.table_properties_collectors,
r->options.info_log.get(),
&property_block_builder
);
NotifyCollectTableCollectorsOnFinish(r->table_properties_collectors,
r->options.info_log.get(),
&property_block_builder);

BlockHandle properties_block_handle;
WriteRawBlock(
Expand Down Expand Up @@ -647,7 +649,7 @@ Status BlockBasedTableBuilder::Finish() {
// user collected properties
std::string user_collected;
user_collected.reserve(1024);
for (auto collector : r->options.table_properties_collectors) {
for (const auto& collector : r->table_properties_collectors) {
for (const auto& prop : collector->GetReadableProperties()) {
user_collected.append(prop.first);
user_collected.append("=");
Expand Down
1 change: 1 addition & 0 deletions table/block_based_table_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class BlockBasedTableBuilder : public TableBuilder {
const CompressionType type,
const BlockHandle* handle);
struct Rep;
class BlockBasedTablePropertiesCollectorFactory;
class BlockBasedTablePropertiesCollector;
Rep* rep_;

Expand Down
Loading

0 comments on commit 26f5dd9

Please sign in to comment.