Skip to content

Commit

Permalink
Add Stats for MultiGet (facebook#7366)
Browse files Browse the repository at this point in the history
Summary:
Add following stats for MultiGet in Histogram to get more insight on MultiGet.
    1. Number of index and filter blocks read from file as part of MultiGet
    request per level.
    2. Number of data blocks read from file per level.
    3. Number of SST files loaded from file system per level.

Pull Request resolved: facebook#7366

Reviewed By: anand1976

Differential Revision: D24127040

Pulled By: akankshamahajan15

fbshipit-source-id: e63a003056b833729b277edc0639c08fb432756b
  • Loading branch information
akankshamahajan15 authored and facebook-github-bot committed Oct 7, 2020
1 parent 8891e9a commit 38d0a36
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 0 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Methods to configure serialize, and compare -- such as TableFactory -- are exposed directly through the Configurable base class (from which these objects inherit). This change will allow for better and more thorough configuration management and retrieval in the future. The options for a Configurable object can be set via the ConfigureFromMap, ConfigureFromString, or ConfigureOption method. The serialized version of the options of an object can be retrieved via the GetOptionString, ToString, or GetOption methods. The list of options supported by an object can be obtained via the GetOptionNames method. The "raw" object (such as the BlockBasedTableOption) for an option may be retrieved via the GetOptions method. Configurable options can be compared via the AreEquivalent method. The settings within a Configurable object may be validated via the ValidateOptions method. The object may be intialized (at which point only mutable options may be updated) via the PrepareOptions method.
* Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail.
* Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface.
* Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level.

## 6.13 (09/12/2020)
### Bug fixes
Expand Down
89 changes: 89 additions & 0 deletions db/db_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1871,6 +1871,95 @@ TEST_F(DBBasicTest, MultiGetBatchedValueSizeMultiLevelMerge) {
}
}

TEST_F(DBBasicTest, MultiGetStats) {
Options options;
options.create_if_missing = true;
options.disable_auto_compactions = true;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.block_size = 1;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.partition_filters = true;
table_options.no_block_cache = true;
table_options.cache_index_and_filter_blocks = false;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu"}, options);

int total_keys = 2000;
std::vector<std::string> keys_str(total_keys);
std::vector<Slice> keys(total_keys);
std::vector<PinnableSlice> values(total_keys);
std::vector<Status> s(total_keys);
ReadOptions read_opts;

Random rnd(309);
// Create Multiple SST files at multiple levels.
for (int i = 0; i < 500; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}
Flush(1);
MoveFilesToLevel(2, 1);

for (int i = 501; i < 1000; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}

Flush(1);
MoveFilesToLevel(2, 1);

for (int i = 1001; i < total_keys; ++i) {
keys_str[i] = "k" + std::to_string(i);
keys[i] = Slice(keys_str[i]);
ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000)));
if (i % 100 == 0) {
Flush(1);
}
}
Flush(1);
Close();

ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(options.statistics->Reset());

db_->MultiGet(read_opts, handles_[1], total_keys, keys.data(), values.data(),
s.data(), false);

ASSERT_EQ(values.size(), total_keys);
HistogramData hist_data_blocks;
HistogramData hist_index_and_filter_blocks;
HistogramData hist_sst;

options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL,
&hist_data_blocks);
options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
&hist_index_and_filter_blocks);
options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst);

// Maximum number of blocks read from a file system in a level.
ASSERT_GT(hist_data_blocks.max, 0);
ASSERT_GT(hist_index_and_filter_blocks.max, 0);
// Maximum number of sst files read from file system in a level.
ASSERT_GT(hist_sst.max, 0);

// Minimun number of blocks read in a level.
ASSERT_EQ(hist_data_blocks.min, 0);
ASSERT_GT(hist_index_and_filter_blocks.min, 0);
// Minimun number of sst files read in a level.
ASSERT_GT(hist_sst.max, 0);
}

// Test class for batched MultiGet with prefix extractor
// Param bool - If true, use partitioned filters
// If false, use full filter block
Expand Down
26 changes: 26 additions & 0 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1968,6 +1968,10 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
&storage_info_.file_indexer_, user_comparator(), internal_comparator());
FdWithKeyRange* f = fp.GetNextFile();
Status s;
uint64_t num_index_read = 0;
uint64_t num_filter_read = 0;
uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;

while (f != nullptr) {
MultiGetRange file_range = fp.CurrentFileRange();
Expand Down Expand Up @@ -2014,6 +2018,11 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
sample_file_read_inc(f->file_metadata);
}
batch_size++;
num_index_read += get_context.get_context_stats_.num_index_read;
num_filter_read += get_context.get_context_stats_.num_filter_read;
num_data_read += get_context.get_context_stats_.num_data_read;
num_sst_read += get_context.get_context_stats_.num_sst_read;

// report the counters before returning
if (get_context.State() != GetContext::kNotFound &&
get_context.State() != GetContext::kMerge &&
Expand Down Expand Up @@ -2069,6 +2078,23 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
continue;
}
}

// Report MultiGet stats per level.
if (fp.IsHitFileLastInLevel()) {
// Dump the stats if this is the last file of this level and reset for
// next level.
RecordInHistogram(db_statistics_,
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
num_index_read + num_filter_read);
RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
num_data_read);
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
num_filter_read = 0;
num_index_read = 0;
num_data_read = 0;
num_sst_read = 0;
}

RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
if (!s.ok() || file_picker_range.empty()) {
break;
Expand Down
8 changes: 8 additions & 0 deletions include/rocksdb/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,14 @@ enum Histograms : uint32_t {
FLUSH_TIME,
SST_BATCH_SIZE,

// MultiGet stats logged per level
// Num of index and filter blocks read from file system per level.
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
// Num of data blocks read from file system per level.
NUM_DATA_BLOCKS_READ_PER_LEVEL,
// Num of sst files read from file system per level.
NUM_SST_READ_PER_LEVEL,

HISTOGRAM_ENUM_MAX,
};

Expand Down
14 changes: 14 additions & 0 deletions java/rocksjni/portal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5385,6 +5385,13 @@ class HistogramTypeJni {
return 0x2D;
case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS:
return 0x2E;
case ROCKSDB_NAMESPACE::Histograms::
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL:
return 0x2F;
case ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL:
return 0x30;
case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL:
return 0x31;
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
// 0x1F for backwards compatibility on current minor version.
return 0x1F;
Expand Down Expand Up @@ -5492,6 +5499,13 @@ class HistogramTypeJni {
return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS;
case 0x2E:
return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS;
case 0x2F:
return ROCKSDB_NAMESPACE::Histograms::
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL;
case 0x30:
return ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL;
case 0x31:
return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL;
case 0x1F:
// 0x1F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
Expand Down
16 changes: 16 additions & 0 deletions java/src/main/java/org/rocksdb/HistogramType.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,22 @@ public enum HistogramType {
*/
BLOB_DB_DECOMPRESSION_MICROS((byte) 0x2E),

/**
* Num of Index and Filter blocks read from file system per level in MultiGet
* request
*/
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL((byte) 0x2F),

/**
* Num of Data blocks read from file system per level in MultiGet request.
*/
NUM_DATA_BLOCKS_READ_PER_LEVEL((byte) 0x30),

/**
* Num of SST files read from file system per level in MultiGet request.
*/
NUM_SST_READ_PER_LEVEL((byte) 0x31),

// 0x1F for backwards compatibility on current minor version.
HISTOGRAM_ENUM_MAX((byte) 0x1F);

Expand Down
4 changes: 4 additions & 0 deletions monitoring/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"},
{FLUSH_TIME, "rocksdb.db.flush.micros"},
{SST_BATCH_SIZE, "rocksdb.sst.batch.size"},
{NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
"rocksdb.num.index.and.filter.blocks.read.per.level"},
{NUM_DATA_BLOCKS_READ_PER_LEVEL, "rocksdb.num.data.blocks.read.per.level"},
{NUM_SST_READ_PER_LEVEL, "rocksdb.num.sst.read.per.level"},
};

std::shared_ptr<Statistics> CreateDBStatistics() {
Expand Down
35 changes: 35 additions & 0 deletions table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,21 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
s = block_fetcher.ReadBlockContents();
raw_block_comp_type = block_fetcher.get_compression_type();
contents = &raw_block_contents;
if (get_context) {
switch (block_type) {
case BlockType::kIndex:
++get_context->get_context_stats_.num_index_read;
break;
case BlockType::kFilter:
++get_context->get_context_stats_.num_filter_read;
break;
case BlockType::kData:
++get_context->get_context_stats_.num_data_read;
break;
default:
break;
}
}
} else {
raw_block_comp_type = contents->get_compression_type();
}
Expand Down Expand Up @@ -1889,6 +1904,22 @@ Status BlockBasedTable::RetrieveBlock(
GetMemoryAllocator(rep_->table_options), for_compaction,
rep_->blocks_definitely_zstd_compressed,
rep_->table_options.filter_policy.get());

if (get_context) {
switch (block_type) {
case BlockType::kIndex:
++(get_context->get_context_stats_.num_index_read);
break;
case BlockType::kFilter:
++(get_context->get_context_stats_.num_filter_read);
break;
case BlockType::kData:
++(get_context->get_context_stats_.num_data_read);
break;
default:
break;
}
}
}

if (!s.ok()) {
Expand Down Expand Up @@ -2553,6 +2584,10 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
}
RetrieveMultipleBlocks(read_options, &data_block_range, &block_handles,
&statuses, &results, scratch, dict);
if (sst_file_range.begin()->get_context) {
++(sst_file_range.begin()
->get_context->get_context_stats_.num_sst_read);
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions table/get_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ struct GetContextStats {
uint64_t num_cache_compression_dict_add = 0;
uint64_t num_cache_compression_dict_add_redundant = 0;
uint64_t num_cache_compression_dict_bytes_insert = 0;
// MultiGet stats.
uint64_t num_filter_read = 0;
uint64_t num_index_read = 0;
uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;
};

// A class to hold context about a point lookup, such as pointer to value
Expand Down

0 comments on commit 38d0a36

Please sign in to comment.