Skip to content

Commit

Permalink
Extend Get/MultiGet deadline support to table open (facebook#6982)
Browse files Browse the repository at this point in the history
Summary:
Current implementation of the ```read_options.deadline``` option only checks the deadline for random file reads during point lookups. This PR extends the checks to file opens, prefetches and preloads as part of table open.

The main changes are in the ```BlockBasedTable```, partitioned index and filter readers, and ```TableCache``` to take ReadOptions as an additional parameter. In ```BlockBasedTable::Open```, in order to retain existing behavior w.r.t checksum verification and block cache usage, we filter out most of the options in ```ReadOptions``` except ```deadline```. However, having the ```ReadOptions``` gives us more flexibility to honor other options like verify_checksums, fill_cache etc. in the future.

Additional changes in callsites due to function signature changes in ```NewTableReader()``` and ```FilePrefetchBuffer```.
Pull Request resolved: facebook#6982

Test Plan: Add new unit tests in db_basic_test

Reviewed By: riversand963

Differential Revision: D22219515

Pulled By: anand1976

fbshipit-source-id: 8a3b92f4a889808013838603aa3ca35229cd501b
  • Loading branch information
Anand Ananthabhotla authored and facebook-github-bot committed Jun 29, 2020
1 parent d809ae9 commit 9a5886b
Show file tree
Hide file tree
Showing 48 changed files with 604 additions and 356 deletions.
392 changes: 268 additions & 124 deletions db/db_basic_test.cc

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2685,8 +2685,6 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
return NewErrorIterator(
Status::NotSupported("Managed iterator is not supported anymore."));
}
// We will eventually support deadline for iterators too, but safeguard
// for now
if (read_options.deadline != std::chrono::microseconds::zero()) {
return NewErrorIterator(
Status::NotSupported("ReadOptions deadline is not supported"));
Expand Down
3 changes: 2 additions & 1 deletion db/plain_table_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,9 @@ class TestPlainTableFactory : public PlainTableFactory {
column_family_id_(column_family_id),
column_family_name_(std::move(column_family_name)) {}

using PlainTableFactory::NewTableReader;
Status NewTableReader(
const TableReaderOptions& table_reader_options,
const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const override {
Expand Down
58 changes: 34 additions & 24 deletions db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "db/range_tombstone_fragmenter.h"
#include "db/snapshot_impl.h"
#include "db/version_edit.h"
#include "file/file_util.h"
#include "file/filename.h"
#include "file/random_access_file_reader.h"
#include "monitoring/perf_context_imp.h"
Expand Down Expand Up @@ -92,7 +93,7 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
}

Status TableCache::GetTableReader(
const FileOptions& file_options,
const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
std::unique_ptr<TableReader>* table_reader,
Expand All @@ -102,12 +103,19 @@ Status TableCache::GetTableReader(
std::string fname =
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file;
Status s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
nullptr);
FileOptions fopts = file_options;
Status s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.IsPathNotFound()) {
fname = Rocks2LevelTableFileName(fname);
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, nullptr);
s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
nullptr);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
}

Expand All @@ -122,6 +130,7 @@ Status TableCache::GetTableReader(
record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter, ioptions_.listeners));
s = ioptions_.table_factory->NewTableReader(
ro,
TableReaderOptions(ioptions_, prefix_extractor, file_options,
internal_comparator, skip_filters, immortal_tables_,
false /* force_direct_prefetch */, level,
Expand All @@ -141,7 +150,8 @@ void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) {
cache_->Erase(key);
}

Status TableCache::FindTable(const FileOptions& file_options,
Status TableCache::FindTable(const ReadOptions& ro,
const FileOptions& file_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, Cache::Handle** handle,
const SliceTransform* prefix_extractor,
Expand Down Expand Up @@ -169,7 +179,7 @@ Status TableCache::FindTable(const FileOptions& file_options,
}

std::unique_ptr<TableReader> table_reader;
s = GetTableReader(file_options, internal_comparator, fd,
s = GetTableReader(ro, file_options, internal_comparator, fd,
false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader, prefix_extractor,
skip_filters, level, prefetch_index_and_filter_in_cache,
Expand Down Expand Up @@ -212,12 +222,12 @@ InternalIterator* TableCache::NewIterator(
auto& fd = file_meta.fd;
table_reader = fd.table_reader;
if (table_reader == nullptr) {
s = FindTable(file_options, icomparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record_read_stats */, file_read_hist,
skip_filters, level,
true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
s = FindTable(
options, file_options, icomparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
!for_compaction /* record_read_stats */, file_read_hist, skip_filters,
level, true /* prefetch_index_and_filter_in_cache */,
max_file_size_for_l0_meta_pin);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(handle);
}
Expand Down Expand Up @@ -288,7 +298,7 @@ Status TableCache::GetRangeTombstoneIterator(
TableReader* t = fd.table_reader;
Cache::Handle* handle = nullptr;
if (t == nullptr) {
s = FindTable(file_options_, internal_comparator, fd, &handle);
s = FindTable(options, file_options_, internal_comparator, fd, &handle);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
}
Expand Down Expand Up @@ -403,7 +413,7 @@ Status TableCache::Get(const ReadOptions& options,
Cache::Handle* handle = nullptr;
if (!done && s.ok()) {
if (t == nullptr) {
s = FindTable(file_options_, internal_comparator, fd, &handle,
s = FindTable(options, file_options_, internal_comparator, fd, &handle,
prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters,
Expand Down Expand Up @@ -506,8 +516,8 @@ Status TableCache::MultiGet(const ReadOptions& options,
if (s.ok() && !table_range.empty()) {
if (t == nullptr) {
s = FindTable(
file_options_, internal_comparator, fd, &handle, prefix_extractor,
options.read_tier == kBlockCacheTier /* no_io */,
options, file_options_, internal_comparator, fd, &handle,
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
true /* record_read_stats */, file_read_hist, skip_filters, level);
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
if (s.ok()) {
Expand Down Expand Up @@ -591,8 +601,8 @@ Status TableCache::GetTableProperties(
}

Cache::Handle* table_handle = nullptr;
s = FindTable(file_options, internal_comparator, fd, &table_handle,
prefix_extractor, no_io);
s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
&table_handle, prefix_extractor, no_io);
if (!s.ok()) {
return s;
}
Expand All @@ -615,8 +625,8 @@ size_t TableCache::GetMemoryUsageByTableReader(
}

Cache::Handle* table_handle = nullptr;
s = FindTable(file_options, internal_comparator, fd, &table_handle,
prefix_extractor, true);
s = FindTable(ReadOptions(), file_options, internal_comparator, fd,
&table_handle, prefix_extractor, true);
if (!s.ok()) {
return 0;
}
Expand All @@ -640,8 +650,8 @@ uint64_t TableCache::ApproximateOffsetOf(
Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(file_options_, internal_comparator, fd, &table_handle,
prefix_extractor, false /* no_io */,
Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
&table_handle, prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle);
Expand All @@ -667,8 +677,8 @@ uint64_t TableCache::ApproximateSize(
Cache::Handle* table_handle = nullptr;
if (table_reader == nullptr) {
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
Status s = FindTable(file_options_, internal_comparator, fd, &table_handle,
prefix_extractor, false /* no_io */,
Status s = FindTable(ReadOptions(), file_options_, internal_comparator, fd,
&table_handle, prefix_extractor, false /* no_io */,
!for_compaction /* record_read_stats */);
if (s.ok()) {
table_reader = GetTableReaderFromHandle(table_handle);
Expand Down
4 changes: 2 additions & 2 deletions db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class TableCache {
// Find table reader
// @param skip_filters Disables loading/accessing the filter block
// @param level == -1 means not specified
Status FindTable(const FileOptions& toptions,
Status FindTable(const ReadOptions& ro, const FileOptions& toptions,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& file_fd, Cache::Handle**,
const SliceTransform* prefix_extractor = nullptr,
Expand Down Expand Up @@ -195,7 +195,7 @@ class TableCache {

private:
// Build a table reader
Status GetTableReader(const FileOptions& file_options,
Status GetTableReader(const ReadOptions& ro, const FileOptions& file_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, bool sequential_mode,
bool record_read_stats, HistogramImpl* file_read_hist,
Expand Down
7 changes: 4 additions & 3 deletions db/version_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -931,9 +931,10 @@ class VersionBuilder::Rep {
auto* file_meta = files_meta[file_idx].first;
int level = files_meta[file_idx].second;
statuses[file_idx] = table_cache_->FindTable(
file_options_, *(base_vstorage_->InternalComparator()),
file_meta->fd, &file_meta->table_reader_handle, prefix_extractor,
false /*no_io */, true /* record_read_stats */,
ReadOptions(), file_options_,
*(base_vstorage_->InternalComparator()), file_meta->fd,
&file_meta->table_reader_handle, prefix_extractor, false /*no_io */,
true /* record_read_stats */,
internal_stats->GetFileReadHist(level), false, level,
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin);
if (file_meta->table_reader_handle != nullptr) {
Expand Down
13 changes: 8 additions & 5 deletions file/file_prefetch_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#include "util/rate_limiter.h"

namespace ROCKSDB_NAMESPACE {
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
RandomAccessFileReader* reader,
uint64_t offset, size_t n,
bool for_compaction) {
if (!enable_ || reader == nullptr) {
Expand Down Expand Up @@ -87,7 +88,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,

Slice result;
size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
s = reader->Read(IOOptions(), rounddown_offset + chunk_len, read_len, &result,
s = reader->Read(opts, rounddown_offset + chunk_len, read_len, &result,
buffer_.BufferStart() + chunk_len, nullptr, for_compaction);
#ifndef NDEBUG
if (!s.ok() || result.size() < read_len) {
Expand All @@ -103,7 +104,8 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
return s;
}

bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
uint64_t offset, size_t n,
Slice* result, bool for_compaction) {
if (track_min_offset_ && offset < min_offset_read_) {
min_offset_read_ = static_cast<size_t>(offset);
Expand All @@ -122,10 +124,11 @@ bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
assert(max_readahead_size_ >= readahead_size_);
Status s;
if (for_compaction) {
s = Prefetch(file_reader_, offset, std::max(n, readahead_size_),
s = Prefetch(opts, file_reader_, offset, std::max(n, readahead_size_),
for_compaction);
} else {
s = Prefetch(file_reader_, offset, n + readahead_size_, for_compaction);
s = Prefetch(opts, file_reader_, offset, n + readahead_size_,
for_compaction);
}
if (!s.ok()) {
return false;
Expand Down
10 changes: 6 additions & 4 deletions file/file_prefetch_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
#include <atomic>
#include <sstream>
#include <string>

#include "file/random_access_file_reader.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "util/aligned_buffer.h"

namespace ROCKSDB_NAMESPACE {
Expand Down Expand Up @@ -59,8 +61,8 @@ class FilePrefetchBuffer {
// offset : the file offset to start reading from.
// n : the number of bytes to read.
// for_compaction : if prefetch is done for compaction read.
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n,
bool for_compaction = false);
Status Prefetch(const IOOptions& opts, RandomAccessFileReader* reader,
uint64_t offset, size_t n, bool for_compaction = false);

// Tries returning the data for a file raed from this buffer, if that data is
// in the buffer.
Expand All @@ -72,8 +74,8 @@ class FilePrefetchBuffer {
// n : the number of bytes.
// result : output buffer to put the data into.
// for_compaction : if cache read is done for compaction read.
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result,
bool for_compaction = false);
bool TryReadFromCache(const IOOptions& opts, uint64_t offset, size_t n,
Slice* result, bool for_compaction = false);

// The minimum `offset` ever passed to TryReadFromCache(). This will nly be
// tracked if track_min_offset = true.
Expand Down
3 changes: 2 additions & 1 deletion file/file_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,11 @@ IOStatus GenerateOneFileChecksum(FileSystem* fs, const std::string& file_path,

Slice slice;
uint64_t offset = 0;
IOOptions opts;
while (size > 0) {
size_t bytes_to_read =
static_cast<size_t>(std::min(uint64_t{readahead_size}, size));
if (!prefetch_buffer.TryReadFromCache(offset, bytes_to_read, &slice,
if (!prefetch_buffer.TryReadFromCache(opts, offset, bytes_to_read, &slice,
false)) {
return IOStatus::Corruption("file read failed");
}
Expand Down
2 changes: 2 additions & 0 deletions file/random_access_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
#include <atomic>
#include <sstream>
#include <string>

#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/listener.h"
#include "rocksdb/options.h"
#include "rocksdb/rate_limiter.h"
#include "util/aligned_buffer.h"

Expand Down
14 changes: 13 additions & 1 deletion include/rocksdb/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,19 @@ class TableFactory {
const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache = true) const = 0;
bool prefetch_index_and_filter_in_cache = true) const {
ReadOptions ro;
return NewTableReader(ro, table_reader_options, std::move(file), file_size,
table_reader, prefetch_index_and_filter_in_cache);
}

// Overload of the above function that allows the caller to pass in a
// ReadOptions
virtual Status NewTableReader(
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter_in_cache) const = 0;

// Return a table builder to write to a file for this table type.
//
Expand Down
10 changes: 6 additions & 4 deletions table/adaptive/adaptive_table_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber;
extern const uint64_t kCuckooTableMagicNumber;

Status AdaptiveTableFactory::NewTableReader(
const TableReaderOptions& table_reader_options,
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table,
bool /*prefetch_index_and_filter_in_cache*/) const {
bool prefetch_index_and_filter_in_cache) const {
Footer footer;
auto s = ReadFooterFromFile(file.get(), nullptr /* prefetch_buffer */,
IOOptions opts;
auto s = ReadFooterFromFile(opts, file.get(), nullptr /* prefetch_buffer */,
file_size, &footer);
if (!s.ok()) {
return s;
Expand All @@ -59,7 +60,8 @@ Status AdaptiveTableFactory::NewTableReader(
} else if (footer.table_magic_number() == kBlockBasedTableMagicNumber ||
footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) {
return block_based_table_factory_->NewTableReader(
table_reader_options, std::move(file), file_size, table);
ro, table_reader_options, std::move(file), file_size, table,
prefetch_index_and_filter_in_cache);
} else if (footer.table_magic_number() == kCuckooTableMagicNumber) {
return cuckoo_table_factory_->NewTableReader(
table_reader_options, std::move(file), file_size, table);
Expand Down
3 changes: 2 additions & 1 deletion table/adaptive/adaptive_table_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ class AdaptiveTableFactory : public TableFactory {

const char* Name() const override { return "AdaptiveTableFactory"; }

using TableFactory::NewTableReader;
Status NewTableReader(
const TableReaderOptions& table_reader_options,
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table,
bool prefetch_index_and_filter_in_cache = true) const override;
Expand Down
8 changes: 4 additions & 4 deletions table/block_based/binary_search_index_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

namespace ROCKSDB_NAMESPACE {
Status BinarySearchIndexReader::Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
bool use_cache, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context,
const BlockBasedTable* table, const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch,
bool pin, BlockCacheLookupContext* lookup_context,
std::unique_ptr<IndexReader>* index_reader) {
assert(table != nullptr);
assert(table->get_rep());
Expand All @@ -22,7 +22,7 @@ Status BinarySearchIndexReader::Create(
CachableEntry<Block> index_block;
if (prefetch || !use_cache) {
const Status s =
ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache,
ReadIndexBlock(table, prefetch_buffer, ro, use_cache,
/*get_context=*/nullptr, lookup_context, &index_block);
if (!s.ok()) {
return s;
Expand Down
Loading

0 comments on commit 9a5886b

Please sign in to comment.