diff --git a/HISTORY.md b/HISTORY.md index 677b984e514..79eb4ca52b4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,11 @@ # Rocksdb Change Log +## Unreleased +### Bug Fixes +* Fixed a major performance bug in which Bloom filters generated by pre-7.0 releases are not read by early 7.0.x releases (and vice-versa) due to changes to FilterPolicy::Name() in #9590. This can severely impact read performance and read I/O on upgrade or downgrade with existing DB, but not data correctness. + +### Public API changes +* Added pure virtual FilterPolicy::CompatibilityName(), which is needed for fixing major performance bug involving FilterPolicy naming in SST metadata without affecting Customizable aspect of FilterPolicy. This change only affects those with their own custom or wrapper FilterPolicy classes. + ## 7.1.0 (03/21/2022) ### Public API changes * Add DB::OpenAndTrimHistory API. This API will open DB and trim data to the timestamp specified by trim_ts (The data with timestamp larger than specified trim bound will be removed). This API should only be used at a timestamp-enabled column families recovery. If the column family doesn't have timestamp enabled, this API won't trim any data on that column family. This API is not compatible with avoid_flush_during_recovery option. diff --git a/db/c.cc b/db/c.cc index 0a181eeccea..4ddf6ad8043 100644 --- a/db/c.cc +++ b/db/c.cc @@ -3752,6 +3752,9 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format( const FilterPolicy* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } + const char* CompatibilityName() const override { + return rep_->CompatibilityName(); + } // No need to override GetFilterBitsBuilder if this one is overridden ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( const ROCKSDB_NAMESPACE::FilterBuildingContext& context) @@ -3789,6 +3792,9 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format( const FilterPolicy* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } + const char* CompatibilityName() const override { + return rep_->CompatibilityName(); + } ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( const ROCKSDB_NAMESPACE::FilterBuildingContext& context) const override { diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index eea41bc0c9e..c613505c5fa 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1638,9 +1638,15 @@ class LevelAndStyleCustomFilterPolicy : public FilterPolicy { policy_l0_other_(NewBloomFilterPolicy(bpk_l0_other)), policy_otherwise_(NewBloomFilterPolicy(bpk_otherwise)) {} + const char* Name() const override { + return "LevelAndStyleCustomFilterPolicy"; + } + // OK to use built-in policy name because we are deferring to a // built-in builder. We aren't changing the serialized format. - const char* Name() const override { return policy_fifo_->Name(); } + const char* CompatibilityName() const override { + return policy_fifo_->CompatibilityName(); + } FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext& context) const override { diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 873520ee059..954d15b4a19 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -90,6 +90,19 @@ class FilterPolicy : public Customizable { virtual ~FilterPolicy(); static const char* Type() { return "FilterPolicy"; } + // The name used for identifying whether a filter on disk is readable + // by this FilterPolicy. If this FilterPolicy is part of a family that + // can read each others filters, such as built-in BloomFilterPolcy and + // RibbonFilterPolicy, the CompatibilityName is a shared family name, + // while kinds of filters in the family can have distinct Customizable + // Names. This function is pure virtual so that wrappers around built-in + // policies are prompted to defer to CompatibilityName() of the wrapped + // policy, which is important for compatibility. + // + // For custom filter policies that are not part of a read-compatible + // family (rare), implementations may return Name(). + virtual const char* CompatibilityName() const = 0; + // Creates a new FilterPolicy based on the input value string and returns the // result The value might be an ID, and ID with properties, or an old-style // policy string. diff --git a/options/customizable_test.cc b/options/customizable_test.cc index 690b394eae6..84f7f46569d 100644 --- a/options/customizable_test.cc +++ b/options/customizable_test.cc @@ -1487,6 +1487,7 @@ class MockFilterPolicy : public FilterPolicy { public: static const char* kClassName() { return "MockFilterPolicy"; } const char* Name() const override { return kClassName(); } + const char* CompatibilityName() const override { return Name(); } FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const override { return nullptr; diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 6c622555dca..41a49556b46 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1605,7 +1605,7 @@ void BlockBasedTableBuilder::WriteFilterBlock( ? BlockBasedTable::kPartitionedFilterBlockPrefix : BlockBasedTable::kFullFilterBlockPrefix; } - key.append(rep_->table_options.filter_policy->Name()); + key.append(rep_->table_options.filter_policy->CompatibilityName()); meta_index_builder->Add(key, filter_block_handle); } } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 76010bc0509..6338cba286c 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ #include "table/block_based/block_prefix_index.h" #include "table/block_based/block_type.h" #include "table/block_based/filter_block.h" +#include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" #include "table/block_based/hash_index_reader.h" #include "table/block_based/partitioned_filter_block.h" @@ -897,33 +899,59 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( const BlockBasedTableOptions& table_options, const int level, size_t file_size, size_t max_file_size_for_l0_meta_pin, BlockCacheLookupContext* lookup_context) { - Status s; - // Find filter handle and filter type if (rep_->filter_policy) { - for (auto filter_type : - {Rep::FilterType::kFullFilter, Rep::FilterType::kPartitionedFilter, - Rep::FilterType::kBlockFilter}) { - std::string prefix; - switch (filter_type) { - case Rep::FilterType::kFullFilter: - prefix = kFullFilterBlockPrefix; - break; - case Rep::FilterType::kPartitionedFilter: - prefix = kPartitionedFilterBlockPrefix; - break; - case Rep::FilterType::kBlockFilter: - prefix = kFilterBlockPrefix; + auto name = rep_->filter_policy->CompatibilityName(); + bool builtin_compatible = + strcmp(name, BuiltinFilterPolicy::kCompatibilityName()) == 0; + + for (const auto& [filter_type, prefix] : + {std::make_pair(Rep::FilterType::kFullFilter, kFullFilterBlockPrefix), + std::make_pair(Rep::FilterType::kPartitionedFilter, + kPartitionedFilterBlockPrefix), + std::make_pair(Rep::FilterType::kBlockFilter, kFilterBlockPrefix)}) { + if (builtin_compatible) { + // This code is only here to deal with a hiccup in early 7.0.x where + // there was an unintentional name change in the SST files metadata. + // It should be OK to remove this in the future (late 2022) and just + // have the 'else' code. + // NOTE: the test:: names below are likely not needed but included + // out of caution + static const std::unordered_set kBuiltinNameAndAliases = { + BuiltinFilterPolicy::kCompatibilityName(), + test::LegacyBloomFilterPolicy::kClassName(), + test::FastLocalBloomFilterPolicy::kClassName(), + test::Standard128RibbonFilterPolicy::kClassName(), + DeprecatedBlockBasedBloomFilterPolicy::kClassName(), + BloomFilterPolicy::kClassName(), + RibbonFilterPolicy::kClassName(), + }; + + // For efficiency, do a prefix seek and see if the first match is + // good. + meta_iter->Seek(prefix); + if (meta_iter->status().ok() && meta_iter->Valid()) { + Slice key = meta_iter->key(); + if (key.starts_with(prefix)) { + key.remove_prefix(prefix.size()); + if (kBuiltinNameAndAliases.find(key.ToString()) != + kBuiltinNameAndAliases.end()) { + Slice v = meta_iter->value(); + Status s = rep_->filter_handle.DecodeFrom(&v); + if (s.ok()) { + rep_->filter_type = filter_type; + break; + } + } + } + } + } else { + std::string filter_block_key = prefix + name; + if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle) + .ok()) { + rep_->filter_type = filter_type; break; - default: - assert(0); - } - std::string filter_block_key = prefix; - filter_block_key.append(rep_->filter_policy->Name()); - if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle) - .ok()) { - rep_->filter_type = filter_type; - break; + } } } } @@ -932,8 +960,8 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); // Find compression dictionary handle - s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName, - &rep_->compression_dict_handle); + Status s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName, + &rep_->compression_dict_handle); if (!s.ok()) { return s; } diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 6594bfb4dc2..cfbc658aa4a 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -1325,6 +1325,16 @@ bool BuiltinFilterPolicy::IsInstanceOf(const std::string& name) const { } } +static const char* kBuiltinFilterMetadataName = "rocksdb.BuiltinBloomFilter"; + +const char* BuiltinFilterPolicy::kCompatibilityName() { + return kBuiltinFilterMetadataName; +} + +const char* BuiltinFilterPolicy::CompatibilityName() const { + return kBuiltinFilterMetadataName; +} + BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key) : warned_(false), aggregate_rounding_balance_(0) { // Sanitize bits_per_key @@ -1372,7 +1382,7 @@ bool BloomLikeFilterPolicy::IsInstanceOf(const std::string& name) const { } const char* ReadOnlyBuiltinFilterPolicy::kClassName() { - return "rocksdb.BuiltinBloomFilter"; + return kBuiltinFilterMetadataName; } const char* DeprecatedBlockBasedBloomFilterPolicy::kClassName() { diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 4ceeed0d053..06566f87126 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -135,6 +135,9 @@ class BuiltinFilterPolicy : public FilterPolicy { FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; static const char* kClassName(); bool IsInstanceOf(const std::string& id) const override; + // All variants of BuiltinFilterPolicy can read each others filters. + const char* CompatibilityName() const override; + static const char* kCompatibilityName(); public: // new // An internal function for the implementation of diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 76f612728db..24d870d4cd6 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -84,6 +84,7 @@ class TestFilterBitsReader : public FilterBitsReader { class TestHashFilter : public FilterPolicy { public: const char* Name() const override { return "TestHashFilter"; } + const char* CompatibilityName() const override { return Name(); } FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const override {