Skip to content

Commit

Permalink
add VerifyChecksum() to db.h
Browse files Browse the repository at this point in the history
Summary:
We need a tool to check any sst file corruption in the db.
It will check all the sst files in current version and read all the blocks (data, meta, index) with checksum verification. If any verification fails, the function will return non-OK status.
Closes facebook#2498

Differential Revision: D5324269

Pulled By: lightmark

fbshipit-source-id: 6f8a272008b722402a772acfc804524c9d1a483b
  • Loading branch information
lightmark authored and facebook-github-bot committed Aug 9, 2017
1 parent 47ed3bf commit 7848f0b
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 2 deletions.
29 changes: 29 additions & 0 deletions db/convenience.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,35 @@ Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
->DeleteFilesInRange(column_family, begin, end);
}

Status VerifySstFileChecksum(const Options& options,
const EnvOptions& env_options,
const std::string& file_path) {
unique_ptr<RandomAccessFile> file;
uint64_t file_size;
InternalKeyComparator internal_comparator(options.comparator);
ImmutableCFOptions ioptions(options);

Status s = ioptions.env->NewRandomAccessFile(file_path, &file, env_options);
if (s.ok()) {
s = ioptions.env->GetFileSize(file_path, &file_size);
} else {
return s;
}
unique_ptr<TableReader> table_reader;
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file), file_path));
s = ioptions.table_factory->NewTableReader(
TableReaderOptions(ioptions, env_options, internal_comparator,
false /* skip_filters */, -1 /* level */),
std::move(file_reader), file_size, &table_reader,
false /* prefetch_index_and_filter_in_cache */);
if (!s.ok()) {
return s;
}
s = table_reader->VerifyChecksum();
return s;
}

} // namespace rocksdb

#endif // ROCKSDB_LITE
10 changes: 10 additions & 0 deletions db/corruption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "db/log_format.h"
#include "db/version_set.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/write_batch.h"
Expand Down Expand Up @@ -179,6 +180,9 @@ class CorruptionTest : public testing::Test {
}
s = WriteStringToFile(Env::Default(), contents, fname);
ASSERT_TRUE(s.ok()) << s.ToString();
Options options;
EnvOptions env_options;
ASSERT_NOK(VerifySstFileChecksum(options, env_options, fname));
}

void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
Expand Down Expand Up @@ -312,6 +316,7 @@ TEST_F(CorruptionTest, TableFile) {

Corrupt(kTableFile, 100, 1);
Check(99, 99);
ASSERT_NOK(dbi->VerifyChecksum());
}

TEST_F(CorruptionTest, TableFileIndexData) {
Expand All @@ -330,6 +335,7 @@ TEST_F(CorruptionTest, TableFileIndexData) {
// one full file should be readable, since only one was corrupted
// the other file should be fully non-readable, since index was corrupted
Check(5000, 5000);
ASSERT_NOK(dbi->VerifyChecksum());
}

TEST_F(CorruptionTest, MissingDescriptor) {
Expand Down Expand Up @@ -389,10 +395,12 @@ TEST_F(CorruptionTest, CompactionInputError) {

Corrupt(kTableFile, 100, 1);
Check(9, 9);
ASSERT_NOK(dbi->VerifyChecksum());

// Force compactions by writing lots of values
Build(10000);
Check(10000, 10000);
ASSERT_NOK(dbi->VerifyChecksum());
}

TEST_F(CorruptionTest, CompactionInputErrorParanoid) {
Expand Down Expand Up @@ -424,6 +432,7 @@ TEST_F(CorruptionTest, CompactionInputErrorParanoid) {

CorruptTableFileAtLevel(0, 100, 1);
Check(9, 9);
ASSERT_NOK(dbi->VerifyChecksum());

// Write must eventually fail because of corrupted table
Status s;
Expand All @@ -445,6 +454,7 @@ TEST_F(CorruptionTest, UnrelatedKeys) {
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_FlushMemTable();
Corrupt(kTableFile, 100, 1);
ASSERT_NOK(dbi->VerifyChecksum());

std::string tmp1, tmp2;
ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
Expand Down
50 changes: 50 additions & 0 deletions db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#include "port/port.h"
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/merge_operator.h"
Expand All @@ -80,6 +81,7 @@
#include "table/merging_iterator.h"
#include "table/table_builder.h"
#include "table/two_level_iterator.h"
#include "tools/sst_dump_tool_imp.h"
#include "util/auto_roll_logger.h"
#include "util/autovector.h"
#include "util/build_version.h"
Expand Down Expand Up @@ -2740,6 +2742,54 @@ Status DBImpl::IngestExternalFile(
return status;
}

Status DBImpl::VerifyChecksum() {
Status s;
Options options;
EnvOptions env_options;
std::vector<ColumnFamilyData*> cfd_list;
{
InstrumentedMutexLock l(&mutex_);
for (auto cfd : *versions_->GetColumnFamilySet()) {
if (!cfd->IsDropped() && cfd->initialized()) {
cfd->Ref();
cfd_list.push_back(cfd);
}
}
}
std::vector<SuperVersion*> sv_list;
for (auto cfd : cfd_list) {
sv_list.push_back(cfd->GetReferencedSuperVersion(&mutex_));
}
for (auto& sv : sv_list) {
VersionStorageInfo* vstorage = sv->current->storage_info();
for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) {
for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok();
j++) {
const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd;
std::string fname = TableFileName(immutable_db_options_.db_paths,
fd.GetNumber(), fd.GetPathId());
s = rocksdb::VerifySstFileChecksum(options, env_options, fname);
}
}
if (!s.ok()) {
break;
}
}
{
InstrumentedMutexLock l(&mutex_);
for (auto sv : sv_list) {
if (sv && sv->Unref()) {
sv->Cleanup();
delete sv;
}
}
for (auto cfd : cfd_list) {
cfd->Unref();
}
}
return s;
}

void DBImpl::NotifyOnExternalFileIngested(
ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) {
#ifndef ROCKSDB_LITE
Expand Down
2 changes: 2 additions & 0 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ class DBImpl : public DB {
const std::vector<std::string>& external_files,
const IngestExternalFileOptions& ingestion_options) override;

virtual Status VerifyChecksum() override;

#endif // ROCKSDB_LITE

// Similar to GetSnapshot(), but also lets the db know that this snapshot
Expand Down
4 changes: 4 additions & 0 deletions db/db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2235,6 +2235,10 @@ class ModelDB : public DB {
return Status::NotSupported("Not implemented.");
}

virtual Status VerifyChecksum() override {
return Status::NotSupported("Not implemented.");
}

using DB::GetPropertiesOfAllTables;
virtual Status GetPropertiesOfAllTables(
ColumnFamilyHandle* column_family,
Expand Down
5 changes: 5 additions & 0 deletions include/rocksdb/convenience.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
// Snapshots before the delete might not see the data in the given range.
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end);

// Verify the checksum of file
Status VerifySstFileChecksum(const Options& options,
const EnvOptions& env_options,
const std::string& file_path);
#endif // ROCKSDB_LITE

} // namespace rocksdb
2 changes: 2 additions & 0 deletions include/rocksdb/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,8 @@ class DB {
return IngestExternalFile(DefaultColumnFamily(), external_files, options);
}

virtual Status VerifyChecksum() = 0;

// AddFile() is deprecated, please use IngestExternalFile()
ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
ColumnFamilyHandle* column_family,
Expand Down
2 changes: 2 additions & 0 deletions include/rocksdb/utilities/stackable_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class StackableDB : public DB {
return db_->IngestExternalFile(column_family, external_files, options);
}

virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); }

using DB::KeyMayExist;
virtual bool KeyMayExist(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
Expand Down
55 changes: 54 additions & 1 deletion table/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,6 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep,
std::unique_ptr<InternalIterator>* iter) {
// TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
// it is an empty block.
// TODO: we never really verify check sum for meta index block
std::unique_ptr<Block> meta;
Status s = ReadBlockFromFile(
rep->file.get(), rep->footer, ReadOptions(),
Expand Down Expand Up @@ -1746,6 +1745,60 @@ Status BlockBasedTable::Prefetch(const Slice* const begin,
return Status::OK();
}

Status BlockBasedTable::VerifyChecksum() {
Status s;
// Check Meta blocks
std::unique_ptr<Block> meta;
std::unique_ptr<InternalIterator> meta_iter;
s = ReadMetaBlock(rep_, &meta, &meta_iter);
if (s.ok()) {
s = VerifyChecksumInBlocks(meta_iter.get());
if (!s.ok()) {
return s;
}
} else {
return s;
}
// Check Data blocks
BlockIter iiter_on_stack;
InternalIterator* iiter = NewIndexIterator(ReadOptions(), &iiter_on_stack);
std::unique_ptr<InternalIterator> iiter_unique_ptr;
if (iiter != &iiter_on_stack) {
iiter_unique_ptr = std::unique_ptr<InternalIterator>(iiter);
}
if (!iiter->status().ok()) {
// error opening index iterator
return iiter->status();
}
s = VerifyChecksumInBlocks(iiter);
return s;
}

Status BlockBasedTable::VerifyChecksumInBlocks(InternalIterator* index_iter) {
Status s;
for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) {
s = index_iter->status();
if (!s.ok()) {
break;
}
BlockHandle handle;
Slice input = index_iter->value();
s = handle.DecodeFrom(&input);
if (!s.ok()) {
break;
}
BlockContents contents;
s = ReadBlockContents(rep_->file.get(), rep_->footer, ReadOptions(),
handle, &contents, rep_->ioptions,
false /* decompress */, Slice() /*compression dict*/,
rep_->persistent_cache_options);
if (!s.ok()) {
break;
}
}
return s;
}

bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
std::unique_ptr<InternalIterator> iiter(NewIndexIterator(options));
Expand Down
4 changes: 4 additions & 0 deletions table/block_based_table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ class BlockBasedTable : public TableReader {
// convert SST file to a human readable form
Status DumpTable(WritableFile* out_file) override;

Status VerifyChecksum() override;

void Close() override;

~BlockBasedTable();
Expand Down Expand Up @@ -310,6 +312,8 @@ class BlockBasedTable : public TableReader {
static Status ReadMetaBlock(Rep* rep, std::unique_ptr<Block>* meta_block,
std::unique_ptr<InternalIterator>* iter);

Status VerifyChecksumInBlocks(InternalIterator* index_iter);

// Create the filter from the filter block.
FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
const bool is_a_filter_partition) const;
Expand Down
5 changes: 5 additions & 0 deletions table/table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ class TableReader {
return Status::NotSupported("DumpTable() not supported");
}

// check whether there is corruption in this db file
virtual Status VerifyChecksum() {
return Status::NotSupported("VerifyChecksum() not supported");
}

virtual void Close() {}
};

Expand Down
18 changes: 17 additions & 1 deletion tools/sst_dump_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ Status SstFileReader::NewTableReader(
std::move(file_), file_size, &table_reader_);
}

Status SstFileReader::VerifyChecksum() {
return table_reader_->VerifyChecksum();
}

Status SstFileReader::DumpTable(const std::string& out_filename) {
unique_ptr<WritableFile> out_file;
Env* env = Env::Default();
Expand Down Expand Up @@ -349,10 +353,11 @@ void print_help() {
--file=<data_dir_OR_sst_file>
Path to SST file or directory containing SST files

--command=check|scan|raw
--command=check|scan|raw|verify
check: Iterate over entries in files but dont print anything except if an error is encounterd (default command)
scan: Iterate over entries in files and print them to screen
raw: Dump all the table contents to <file_name>_dump.txt
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered

--output_hex
Can be combined with scan command to print the keys and values in Hex
Expand Down Expand Up @@ -580,6 +585,17 @@ int SSTDumpTool::Run(int argc, char** argv) {
}
}

if (command == "verify") {
st = reader.VerifyChecksum();
if (!st.ok()) {
fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(),
st.ToString().c_str());
} else {
fprintf(stdout, "The file is ok\n");
}
continue;
}

if (show_properties || show_summary) {
const rocksdb::TableProperties* table_properties;

Expand Down
1 change: 1 addition & 0 deletions tools/sst_dump_tool_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class SstFileReader {
uint64_t GetReadNumber() { return read_num_; }
TableProperties* GetInitTableProperties() { return table_properties_.get(); }

Status VerifyChecksum();
Status DumpTable(const std::string& out_filename);
Status getStatus() { return init_result_; }

Expand Down

0 comments on commit 7848f0b

Please sign in to comment.