Skip to content

Commit

Permalink
[RocksDB] [Performance] Allow different posix advice to be applied to…
Browse files Browse the repository at this point in the history
… the same table file

Summary:
Current posix advice implementation ties up the access pattern hint with the creation of a file.
It is not possible to apply different advice for different access (random get vs compaction read),
without keeping two open files for the same table. This patch extended the RandomeAccessFile interface
to accept new access hint at anytime. Particularly, we are able to set different access hint on the same
table file based on when/how the file is used.
Two options are added to set the access hint, after the file is first opened and after the file is being
compacted.

Test Plan: make check; db_stress; db_bench

Reviewers: dhruba

Reviewed By: dhruba

CC: MarkCallaghan, leveldb

Differential Revision: https://reviews.facebook.net/D10905
  • Loading branch information
haoboxu committed May 31, 2013
1 parent 2df65c1 commit ab8d2f6
Show file tree
Hide file tree
Showing 12 changed files with 146 additions and 22 deletions.
33 changes: 30 additions & 3 deletions db/db_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,14 @@ static bool FLAGS_use_mmap_writes;
// Allow readaheads to occur for compactions
static bool FLAGS_use_readahead_compactions;

// Advise random access on table file open
static bool FLAGS_advise_random_on_open =
leveldb::Options().advise_random_on_open;

// Access pattern advice when a file is compacted
static auto FLAGS_compaction_fadvice =
leveldb::Options().access_hint_on_compaction_start;

namespace leveldb {

// Helper for quickly generating random data.
Expand Down Expand Up @@ -900,6 +908,7 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
}

if (method != nullptr) {
fprintf(stdout, "DB path: [%s]\n", FLAGS_db);
RunBenchmark(num_threads, name, method);
}
}
Expand Down Expand Up @@ -1138,6 +1147,8 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
options.allow_mmap_reads = FLAGS_use_mmap_reads;
options.allow_mmap_writes = FLAGS_use_mmap_writes;
options.allow_readahead_compactions = FLAGS_use_readahead_compactions;
options.advise_random_on_open = FLAGS_advise_random_on_open;
options.access_hint_on_compaction_start = FLAGS_compaction_fadvice;
Status s;
if(FLAGS_read_only) {
s = DB::OpenForReadOnly(options, FLAGS_db, &db_);
Expand Down Expand Up @@ -1731,8 +1742,9 @@ int main(int argc, char** argv) {
int n;
long l;
char junk;
char hdfsname[2048];
char buf[2048];
char str[512];

if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
} else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
Expand Down Expand Up @@ -1848,8 +1860,8 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--get_approx=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) {
FLAGS_get_approx = n;
} else if (sscanf(argv[i], "--hdfs=%s", hdfsname) == 1) {
FLAGS_env = new leveldb::HdfsEnv(hdfsname);
} else if (sscanf(argv[i], "--hdfs=%s", buf) == 1) {
FLAGS_env = new leveldb::HdfsEnv(buf);
} else if (sscanf(argv[i], "--num_levels=%d%c",
&n, &junk) == 1) {
FLAGS_num_levels = n;
Expand Down Expand Up @@ -1931,6 +1943,21 @@ int main(int argc, char** argv) {
FLAGS_source_compaction_factor = n;
} else if (sscanf(argv[i], "--wal_ttl=%d%c", &n, &junk) == 1) {
FLAGS_WAL_ttl_seconds = static_cast<uint64_t>(n);
} else if (sscanf(argv[i], "--advise_random_on_open=%d%c", &n, &junk) == 1
&& (n == 0 || n ==1 )) {
FLAGS_advise_random_on_open = n;
} else if (sscanf(argv[i], "--compaction_fadvice=%s", buf) == 1) {
if (!strcasecmp(buf, "NONE"))
FLAGS_compaction_fadvice = leveldb::Options::NONE;
else if (!strcasecmp(buf, "NORMAL"))
FLAGS_compaction_fadvice = leveldb::Options::NORMAL;
else if (!strcasecmp(buf, "SEQUENTIAL"))
FLAGS_compaction_fadvice = leveldb::Options::SEQUENTIAL;
else if (!strcasecmp(buf, "WILLNEED"))
FLAGS_compaction_fadvice = leveldb::Options::WILLNEED;
else {
fprintf(stdout, "Unknown compaction fadvice:%s\n", buf);
}
} else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1);
Expand Down
11 changes: 10 additions & 1 deletion db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ Status TableCache::FindTable(const EnvOptions& toptions,
s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics, NO_FILE_OPENS);
if (s.ok()) {
if (options_->advise_random_on_open) {
file->Hint(RandomAccessFile::RANDOM);
}
s = Table::Open(*options_, toptions, std::move(file), file_size, &table);
}

Expand All @@ -74,7 +77,8 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr) {
Table** tableptr,
bool for_compaction) {
if (tableptr != nullptr) {
*tableptr = nullptr;
}
Expand All @@ -92,6 +96,11 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
if (tableptr != nullptr) {
*tableptr = table;
}

if (for_compaction) {
table->SetAccessHintForCompaction();
}

return result;
}

Expand Down
3 changes: 2 additions & 1 deletion db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class TableCache {
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr = nullptr);
Table** tableptr = nullptr,
bool for_compaction = false);

// If a seek to internal key "k" in specified file finds an entry,
// call (*handle_result)(arg, found_key, found_value) repeatedly until
Expand Down
13 changes: 9 additions & 4 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ class Version::LevelFileNumIterator : public Iterator {
static Iterator* GetFileIterator(void* arg,
const ReadOptions& options,
const EnvOptions& soptions,
const Slice& file_value) {
const Slice& file_value,
bool for_compaction) {
TableCache* cache = reinterpret_cast<TableCache*>(arg);
if (file_value.size() != 16) {
return NewErrorIterator(
Expand All @@ -189,7 +190,9 @@ static Iterator* GetFileIterator(void* arg,
return cache->NewIterator(options,
soptions,
DecodeFixed64(file_value.data()),
DecodeFixed64(file_value.data() + 8));
DecodeFixed64(file_value.data() + 8),
nullptr /* don't need reference to table*/,
for_compaction);
}
}

Expand Down Expand Up @@ -1834,13 +1837,15 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
for (size_t i = 0; i < files.size(); i++) {
list[num++] = table_cache_->NewIterator(
options, storage_options_compactions_,
files[i]->number, files[i]->file_size);
files[i]->number, files[i]->file_size, nullptr,
true /* for compaction */);
}
} else {
// Create concatenating iterator for the files from this level
list[num++] = NewTwoLevelIterator(
new Version::LevelFileNumIterator(icmp_, &c->inputs_[which]),
&GetFileIterator, table_cache_, options, storage_options_);
&GetFileIterator, table_cache_, options, storage_options_,
true /* for compaction */);
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions include/leveldb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ class RandomAccessFile {
return 0; // Default implementation to prevent issues with backwards
// compatibility.
};


enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };

virtual void Hint(AccessPattern pattern) {}

};

// A file abstraction for sequential writing. The implementation
Expand Down
10 changes: 10 additions & 0 deletions include/leveldb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,16 @@ struct Options {
// new record will be written to the next block.
// Default is 10.
int block_size_deviation;

// If set true, will hint the underlying file system that the file
// access pattern is random, when a sst file is opened.
// Default: true
bool advise_random_on_open;

// Specify the file access pattern once a compaction is started.
// It will be applied to all input files of a compaction.
// Default: NORMAL
enum { NONE, NORMAL, SEQUENTIAL, WILLNEED } access_hint_on_compaction_start;
};

// Options that control read operations
Expand Down
24 changes: 22 additions & 2 deletions table/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,24 @@ Status Table::Open(const Options& options,
return s;
}

void Table::SetAccessHintForCompaction() {
switch (rep_->options.access_hint_on_compaction_start) {
case Options::NONE:
break;
case Options::NORMAL:
rep_->file->Hint(RandomAccessFile::NORMAL);
break;
case Options::SEQUENTIAL:
rep_->file->Hint(RandomAccessFile::SEQUENTIAL);
break;
case Options::WILLNEED:
rep_->file->Hint(RandomAccessFile::WILLNEED);
break;
default:
assert(false);
}
}

void Table::ReadMeta(const Footer& footer) {
if (rep_->options.filter_policy == nullptr) {
return; // Do not need any metadata
Expand Down Expand Up @@ -273,7 +291,8 @@ Iterator* Table::BlockReader(void* arg,
Iterator* Table::BlockReader(void* arg,
const ReadOptions& options,
const EnvOptions& soptions,
const Slice& index_value) {
const Slice& index_value,
bool for_compaction) {
return BlockReader(arg, options, index_value, nullptr);
}

Expand All @@ -285,7 +304,8 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {

Status Table::InternalGet(const ReadOptions& options, const Slice& k,
void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool)) {
bool (*saver)(void*, const Slice&, const Slice&,
bool)) {
Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
bool done = false;
Expand Down
5 changes: 4 additions & 1 deletion table/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,16 @@ class Table {
// REQUIRES: key is in this table.
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);

void SetAccessHintForCompaction();

private:
struct Rep;
Rep* rep_;

explicit Table(Rep* rep) { rep_ = rep; }
static Iterator* BlockReader(void*, const ReadOptions&,
const EnvOptions& soptions, const Slice&);
const EnvOptions& soptions, const Slice&,
bool for_compaction);
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&,
bool* didIO);

Expand Down
21 changes: 14 additions & 7 deletions table/two_level_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace leveldb {
namespace {

typedef Iterator* (*BlockFunction)(void*, const ReadOptions&,
const EnvOptions& soptions, const Slice&);
const EnvOptions& soptions, const Slice&,
bool for_compaction);

class TwoLevelIterator: public Iterator {
public:
Expand All @@ -23,7 +24,8 @@ class TwoLevelIterator: public Iterator {
BlockFunction block_function,
void* arg,
const ReadOptions& options,
const EnvOptions& soptions);
const EnvOptions& soptions,
bool for_compaction);

virtual ~TwoLevelIterator();

Expand Down Expand Up @@ -74,20 +76,23 @@ class TwoLevelIterator: public Iterator {
// If data_iter_ is non-nullptr, then "data_block_handle_" holds the
// "index_value" passed to block_function_ to create the data_iter_.
std::string data_block_handle_;
bool for_compaction_;
};

TwoLevelIterator::TwoLevelIterator(
Iterator* index_iter,
BlockFunction block_function,
void* arg,
const ReadOptions& options,
const EnvOptions& soptions)
const EnvOptions& soptions,
bool for_compaction)
: block_function_(block_function),
arg_(arg),
options_(options),
soptions_(soptions),
index_iter_(index_iter),
data_iter_(nullptr) {
data_iter_(nullptr),
for_compaction_(for_compaction) {
}

TwoLevelIterator::~TwoLevelIterator() {
Expand Down Expand Up @@ -168,7 +173,8 @@ void TwoLevelIterator::InitDataBlock() {
// data_iter_ is already constructed with this iterator, so
// no need to change anything
} else {
Iterator* iter = (*block_function_)(arg_, options_, soptions_, handle);
Iterator* iter = (*block_function_)(arg_, options_, soptions_, handle,
for_compaction_);
data_block_handle_.assign(handle.data(), handle.size());
SetDataIterator(iter);
}
Expand All @@ -182,9 +188,10 @@ Iterator* NewTwoLevelIterator(
BlockFunction block_function,
void* arg,
const ReadOptions& options,
const EnvOptions& soptions) {
const EnvOptions& soptions,
bool for_compaction) {
return new TwoLevelIterator(index_iter, block_function, arg,
options, soptions);
options, soptions, for_compaction);
}

} // namespace leveldb
6 changes: 4 additions & 2 deletions table/two_level_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ extern Iterator* NewTwoLevelIterator(
void* arg,
const ReadOptions& options,
const EnvOptions& soptions,
const Slice& index_value),
const Slice& index_value,
bool for_compaction),
void* arg,
const ReadOptions& options,
const EnvOptions& soptions);
const EnvOptions& soptions,
bool for_compaction = false);

} // namespace leveldb

Expand Down
24 changes: 24 additions & 0 deletions util/env_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,30 @@ class PosixRandomAccessFile: public RandomAccessFile {
return static_cast<size_t>(rid-id);
}
#endif

virtual void Hint(AccessPattern pattern) {
switch(pattern) {
case NORMAL:
posix_fadvise(fd_, 0, 0, POSIX_FADV_NORMAL);
break;
case RANDOM:
posix_fadvise(fd_, 0, 0, POSIX_FADV_RANDOM);
break;
case SEQUENTIAL:
posix_fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL);
break;
case WILLNEED:
posix_fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED);
break;
case DONTNEED:
posix_fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED);
break;
default:
assert(false);
break;
}
}

};

// mmap() based random-access
Expand Down
12 changes: 11 additions & 1 deletion util/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,15 @@ Options::Options()
is_fd_close_on_exec(true),
skip_log_error_on_recovery(false),
stats_dump_period_sec(3600),
block_size_deviation (10) {
block_size_deviation (10),
advise_random_on_open(true),
access_hint_on_compaction_start(NORMAL) {
}

static const char* const access_hints[] = {
"NONE", "NORMAL", "SEQUENTIAL", "WILLNEED"
};

void
Options::Dump(Logger* log) const
{
Expand Down Expand Up @@ -198,6 +204,10 @@ Options::Dump(Logger* log) const
stats_dump_period_sec);
Log(log," Options.block_size_deviation: %d",
block_size_deviation);
Log(log," Options.advise_random_on_open: %d",
advise_random_on_open);
Log(log," Options.access_hint_on_compaction_start: %s",
access_hints[access_hint_on_compaction_start]);
} // Options::Dump

//
Expand Down

0 comments on commit ab8d2f6

Please sign in to comment.