Skip to content

Commit

Permalink
Fb 9718 verify checksums is ignored (facebook#9767)
Browse files Browse the repository at this point in the history
Summary:
Fixes facebook#9718

The verify_checksums flag of read_options should be passed to the read options used by the BlockFetcher in a couple of cases where it is not at present. It will now happen (but did not, previously) on iteration and on [multi]get, where a fetcher is created as part of the iterate/get call.

This may result in much better performance in a few workloads where the client chooses to remove verification.

Pull Request resolved: facebook#9767

Reviewed By: mrambacher

Differential Revision: D35218986

Pulled By: jay-zhuang

fbshipit-source-id: 329d29764bb70fbc7f2673440bc46c107a813bc8
  • Loading branch information
alanpaxton authored and facebook-github-bot committed Mar 29, 2022
1 parent a5e5130 commit b6ad0d9
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 7 deletions.
2 changes: 2 additions & 0 deletions include/rocksdb/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ enum Tickers : uint32_t {
NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT,

BLOCK_CHECKSUM_COMPUTE_COUNT,

TICKER_ENUM_MAX
};

Expand Down
1 change: 1 addition & 0 deletions java/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ JAVA_TESTS = \
org.rocksdb.TtlDBTest\
org.rocksdb.StatisticsTest\
org.rocksdb.StatisticsCollectorTest\
org.rocksdb.VerifyChecksumsTest\
org.rocksdb.WalFilterTest\
org.rocksdb.WALRecoveryModeTest\
org.rocksdb.WriteBatchHandlerTest\
Expand Down
4 changes: 4 additions & 0 deletions java/rocksjni/portal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5086,6 +5086,8 @@ class TickerTypeJni {
return -0x2C;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT:
return -0x2D;
case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT:
return -0x2E;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
Expand Down Expand Up @@ -5455,6 +5457,8 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES;
case -0x2D:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
case -0x2E:
return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT;
case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep
Expand Down
2 changes: 2 additions & 0 deletions java/src/main/java/org/rocksdb/TickerType.java
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ public enum TickerType {
NON_LAST_LEVEL_READ_BYTES((byte) -0x2C),
NON_LAST_LEVEL_READ_COUNT((byte) -0x2D),

BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E),

TICKER_ENUM_MAX((byte) 0x5F);

private final byte value;
Expand Down
213 changes: 213 additions & 0 deletions java/src/test/java/org/rocksdb/VerifyChecksumsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

package org.rocksdb;

import static org.assertj.core.api.Assertions.assertThat;

import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

public class VerifyChecksumsTest {
@ClassRule
public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE =
new RocksNativeLibraryResource();

@Rule public TemporaryFolder dbFolder = new TemporaryFolder();

/**
* Class to factor out the specific DB operations within the test
*/
abstract static class Operations {
final int kv_count;
final List<String> elements = new ArrayList<>();
final List<String> sortedElements = new ArrayList<>();

Operations(final int kv_count) {
this.kv_count = kv_count;
for (int i = 0; i < kv_count; i++) elements.add(MessageFormat.format("{0,number,#}", i));
sortedElements.addAll(elements);
Collections.sort(sortedElements);
}

void fill(final RocksDB db) throws RocksDBException {
for (int i = 0; i < kv_count; i++) {
final String key = MessageFormat.format("key{0}", elements.get(i));
final String value = MessageFormat.format("value{0}", elements.get(i));
// noinspection ObjectAllocationInLoop
db.put(key.getBytes(), value.getBytes());
}
db.flush(new FlushOptions());
}

@SuppressWarnings("ObjectAllocationInLoop")
void get(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
try (final ReadOptions readOptions = new ReadOptions()) {
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);

for (int i = 0; i < kv_count / 10; i++) {
@SuppressWarnings("UnsecureRandomNumberGeneration")
final int index = Double.valueOf(Math.random() * kv_count).intValue();
final String key = MessageFormat.format("key{0}", sortedElements.get(index));
final String expectedValue = MessageFormat.format("value{0}", sortedElements.get(index));

final byte[] value = db.get(readOptions, key.getBytes());
assertThat(value).isEqualTo(expectedValue.getBytes());
}
}
}

@SuppressWarnings("ObjectAllocationInLoop")
void multiGet(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
try (final ReadOptions readOptions = new ReadOptions()) {
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);

final List<byte[]> keys = new ArrayList<>();
final List<String> expectedValues = new ArrayList<>();

for (int i = 0; i < kv_count / 10; i++) {
@SuppressWarnings("UnsecureRandomNumberGeneration")
final int index = Double.valueOf(Math.random() * kv_count).intValue();
keys.add(MessageFormat.format("key{0}", sortedElements.get(index)).getBytes());

expectedValues.add(MessageFormat.format("value{0}", sortedElements.get(index)));
}

final List<byte[]> values = db.multiGetAsList(readOptions, keys);
for (int i = 0; i < keys.size(); i++) {
assertThat(values.get(i)).isEqualTo(expectedValues.get(i).getBytes());
}
}
}

void iterate(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
final ReadOptions readOptions = new ReadOptions();
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);
int i = 0;
try (final RocksIterator rocksIterator = db.newIterator(readOptions)) {
rocksIterator.seekToFirst();
rocksIterator.status();
while (rocksIterator.isValid()) {
final byte[] key = rocksIterator.key();
final byte[] value = rocksIterator.value();
// noinspection ObjectAllocationInLoop
assertThat(key).isEqualTo(
(MessageFormat.format("key{0}", sortedElements.get(i))).getBytes());
// noinspection ObjectAllocationInLoop
assertThat(value).isEqualTo(
(MessageFormat.format("value{0}", sortedElements.get(i))).getBytes());
rocksIterator.next();
rocksIterator.status();
i++;
}
}
assertThat(i).isEqualTo(kv_count);
}

abstract void performOperations(final RocksDB db, final boolean verifyFlag)
throws RocksDBException;
}

private static final int KV_COUNT = 10000;

/**
* Run some operations and count the TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT before and after
* It should GO UP when the read options have checksum verification turned on.
* It shoulld REMAIN UNCHANGED when the read options have checksum verification turned off.
* As the read options refer only to the read operations, there are still a few checksums
* performed outside this (blocks are getting loaded for lots of reasons, not aways directly due
* to reads) but this test provides a good enough proxy for whether the flag is being noticed.
*
* @param operations the DB reading operations to perform which affect the checksum stats
*
* @throws RocksDBException
*/
private void verifyChecksums(final Operations operations) throws RocksDBException {
final String dbPath = dbFolder.getRoot().getAbsolutePath();

// noinspection SingleStatementInBlock
try (final Statistics statistics = new Statistics();
final Options options = new Options().setCreateIfMissing(true).setStatistics(statistics)) {
try (final RocksDB db = RocksDB.open(options, dbPath)) {
// 0
System.out.println(MessageFormat.format(
"newly open {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));
operations.fill(db);
//
System.out.println(MessageFormat.format(
"flushed {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));
}

// 2
System.out.println(MessageFormat.format("closed-after-write {0}",
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));

for (final boolean verifyFlag : new boolean[] {false, true, false, true}) {
try (final RocksDB db = RocksDB.open(options, dbPath)) {
final long beforeOperationsCount =
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT);
System.out.println(MessageFormat.format("re-opened {0}", beforeOperationsCount));
operations.performOperations(db, verifyFlag);
final long afterOperationsCount =
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT);
if (verifyFlag) {
// We don't need to be exact - we are checking that the checksums happen
// exactly how many depends on block size etc etc, so may not be entirely stable
System.out.println(MessageFormat.format("verify=true {0}", afterOperationsCount));
assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount + 20);
} else {
System.out.println(MessageFormat.format("verify=false {0}", afterOperationsCount));
assertThat(afterOperationsCount).isEqualTo(beforeOperationsCount);
}
}
}
}
}

@Test
public void verifyChecksumsInIteration() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
iterate(db, verifyFlag);
}
});
}

@Test
public void verifyChecksumsGet() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
get(db, verifyFlag);
}
});
}

@Test
public void verifyChecksumsMultiGet() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
multiGet(db, verifyFlag);
}
});
}
}
2 changes: 1 addition & 1 deletion monitoring/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
};
{BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}};

const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{DB_GET, "rocksdb.db.get.micros"},
Expand Down
2 changes: 2 additions & 0 deletions table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2597,6 +2597,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
uncompression_dict_status =
rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
nullptr /* prefetch_buffer */, no_io,
read_options.verify_checksums,
sst_file_range.begin()->get_context, &lookup_context,
&uncompression_dict);
uncompression_dict_inited = true;
Expand Down Expand Up @@ -3442,6 +3443,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
CachableEntry<UncompressionDict> uncompression_dict;
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
nullptr /* prefetch_buffer */, false /* no_io */,
false, /* verify_checksums */
nullptr /* get_context */, nullptr /* lookup_context */,
&uncompression_dict);
if (!s.ok()) {
Expand Down
4 changes: 2 additions & 2 deletions table/block_based/block_based_table_reader_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
if (rep_->uncompression_dict_reader) {
const bool no_io = (ro.read_tier == kBlockCacheTier);
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
prefetch_buffer, no_io, get_context, lookup_context,
&uncompression_dict);
prefetch_buffer, no_io, ro.verify_checksums, get_context,
lookup_context, &uncompression_dict);
if (!s.ok()) {
iter->Invalidate(s);
return iter;
Expand Down
5 changes: 3 additions & 2 deletions table/block_based/uncompression_dict_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ Status UncompressionDictReader::ReadUncompressionDictionary(
}

Status UncompressionDictReader::GetOrReadUncompressionDictionary(
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context,
FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums,
GetContext* get_context, BlockCacheLookupContext* lookup_context,
CachableEntry<UncompressionDict>* uncompression_dict) const {
assert(uncompression_dict);

Expand All @@ -90,6 +90,7 @@ Status UncompressionDictReader::GetOrReadUncompressionDictionary(
if (no_io) {
read_options.read_tier = kBlockCacheTier;
}
read_options.verify_checksums = verify_checksums;

return ReadUncompressionDictionary(table_, prefetch_buffer, read_options,
cache_dictionary_blocks(), get_context,
Expand Down
4 changes: 2 additions & 2 deletions table/block_based/uncompression_dict_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class UncompressionDictReader {
std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader);

Status GetOrReadUncompressionDictionary(
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context,
FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums,
GetContext* get_context, BlockCacheLookupContext* lookup_context,
CachableEntry<UncompressionDict>* uncompression_dict) const;

size_t ApproximateMemoryUsage() const;
Expand Down
1 change: 1 addition & 0 deletions table/block_fetcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
io_status_ = status_to_io_status(VerifyBlockChecksum(
footer_.checksum_type(), slice_.data(), block_size_,
file_->file_name(), handle_.offset()));
RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
}
compression_type_ =
BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);
Expand Down

0 comments on commit b6ad0d9

Please sign in to comment.