Skip to content

Commit

Permalink
Use user key on sst file for blob verification for Get and MultiGet (f…
Browse files Browse the repository at this point in the history
…acebook#11105)

Summary:
Use the user key on sst file for blob verification for `Get` and `MultiGet` instead of the user key passed from caller.

Add tests for `Get` and `MultiGet` operations when user defined timestamp feature is enabled in a BlobDB.

Pull Request resolved: facebook#11105

Test Plan:
make V=1 db_blob_basic_test
./db_blob_basic_test --gtest_filter="DBBlobTestWithTimestamp.*"

Reviewed By: ltamasi

Differential Revision: D42716487

Pulled By: jowlyzhang

fbshipit-source-id: 5987ecbb7e56ddf46d2467a3649369390789506a
  • Loading branch information
jowlyzhang authored and facebook-github-bot committed Jan 30, 2023
1 parent 79e57a3 commit 24ac53d
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 10 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### Bug Fixes
* Fixed a data race on `ColumnFamilyData::flush_reason` caused by concurrent flushes.
* Fixed an issue in `Get` and `MultiGet` when user-defined timestamps is enabled in combination with BlobDB.

### Feature Removal
* Remove RocksDB Lite.
Expand Down
185 changes: 185 additions & 0 deletions db/blob/db_blob_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/db_test_util.h"
#include "db/db_with_timestamp_test_util.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
#include "utilities/fault_injection_env.h"
Expand Down Expand Up @@ -1771,6 +1772,190 @@ TEST_F(DBBlobBasicTest, WarmCacheWithBlobsSecondary) {
1);
}

class DBBlobWithTimestampTest : public DBBasicTestWithTimestampBase {
protected:
DBBlobWithTimestampTest()
: DBBasicTestWithTimestampBase("db_blob_with_timestamp_test") {}
};

TEST_F(DBBlobWithTimestampTest, GetBlob) {
Options options = GetDefaultOptions();
options.create_if_missing = true;
options.enable_blob_files = true;
options.min_blob_size = 0;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);
WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
constexpr char key[] = "key";
constexpr char blob_value[] = "blob_value";

ASSERT_OK(db_->Put(write_opts, key, ts, blob_value));

ASSERT_OK(Flush());

const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
std::string value;
ASSERT_OK(db_->Get(read_opts, key, &value));
ASSERT_EQ(value, blob_value);
}

TEST_F(DBBlobWithTimestampTest, MultiGetBlobs) {
constexpr size_t min_blob_size = 6;

Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = min_blob_size;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

// Put then retrieve three key-values. The first value is below the size limit
// and is thus stored inline; the other two are stored separately as blobs.
constexpr size_t num_keys = 3;

constexpr char first_key[] = "first_key";
constexpr char first_value[] = "short";
static_assert(sizeof(first_value) - 1 < min_blob_size,
"first_value too long to be inlined");

DestroyAndReopen(options);
WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
ASSERT_OK(db_->Put(write_opts, first_key, ts, first_value));

constexpr char second_key[] = "second_key";
constexpr char second_value[] = "long_value";
static_assert(sizeof(second_value) - 1 >= min_blob_size,
"second_value too short to be stored as blob");

ASSERT_OK(db_->Put(write_opts, second_key, ts, second_value));

constexpr char third_key[] = "third_key";
constexpr char third_value[] = "other_long_value";
static_assert(sizeof(third_value) - 1 >= min_blob_size,
"third_value too short to be stored as blob");

ASSERT_OK(db_->Put(write_opts, third_key, ts, third_value));

ASSERT_OK(Flush());

ReadOptions read_options;
const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
read_options.timestamp = &read_ts_slice;
std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};

{
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;

db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys, &keys[0],
&values[0], &statuses[0]);

ASSERT_OK(statuses[0]);
ASSERT_EQ(values[0], first_value);

ASSERT_OK(statuses[1]);
ASSERT_EQ(values[1], second_value);

ASSERT_OK(statuses[2]);
ASSERT_EQ(values[2], third_value);
}
}

TEST_F(DBBlobWithTimestampTest, GetMergeBlobWithPut) {
Options options = GetDefaultOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v2"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v3"));
ASSERT_OK(Flush());

std::string value;
const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
ASSERT_OK(db_->Get(read_opts, "Key1", &value));
ASSERT_EQ(value, "v1,v2,v3");
}

TEST_F(DBBlobWithTimestampTest, MultiGetMergeBlobWithPut) {
constexpr size_t num_keys = 3;

Options options = GetDefaultOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);

ASSERT_OK(db_->Put(write_opts, "Key0", ts, "v0_0"));
ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1_0"));
ASSERT_OK(db_->Put(write_opts, "Key2", ts, "v2_0"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_1"));
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v1_1"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_2"));
ASSERT_OK(Flush());

const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;

db_->MultiGet(read_opts, db_->DefaultColumnFamily(), num_keys, &keys[0],
&values[0], &statuses[0]);

ASSERT_OK(statuses[0]);
ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");

ASSERT_OK(statuses[1]);
ASSERT_EQ(values[1], "v1_0,v1_1");

ASSERT_OK(statuses[2]);
ASSERT_EQ(values[2], "v2_0");
}

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
13 changes: 8 additions & 5 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2194,8 +2194,9 @@ void Version::MultiGetBlob(

key_context.value->Reset();
blob_reqs_in_file.emplace_back(
key_context.ukey_with_ts, blob_index.offset(), blob_index.size(),
blob_index.compression(), key_context.value, key_context.s);
key_context.get_context->ukey_to_get_blob_value(),
blob_index.offset(), blob_index.size(), blob_index.compression(),
key_context.value, key_context.s);
}
if (blob_reqs_in_file.size() > 0) {
const auto file_size = blob_file_meta->GetBlobFileSize();
Expand All @@ -2204,7 +2205,8 @@ void Version::MultiGetBlob(
}

if (blob_reqs.size() > 0) {
blob_source_->MultiGetBlob(read_options, blob_reqs, /*bytes_read=*/nullptr);
blob_source_->MultiGetBlob(read_options, blob_reqs,
/*bytes_read=*/nullptr);
}

for (auto& ctx : blob_ctxs) {
Expand Down Expand Up @@ -2345,8 +2347,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
constexpr uint64_t* bytes_read = nullptr;

*status = GetBlob(read_options, user_key, *value, prefetch_buffer,
value, bytes_read);
*status =
GetBlob(read_options, get_context.ukey_to_get_blob_value(),
*value, prefetch_buffer, value, bytes_read);
if (!status->ok()) {
if (status->IsIncomplete()) {
get_context.MarkKeyMayExist();
Expand Down
11 changes: 7 additions & 4 deletions table/get_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
if (kNotFound == state_) {
state_ = kFound;
if (do_merge_) {
if (type == kTypeBlobIndex && ucmp_->timestamp_size() != 0) {
ukey_with_ts_found_.PinSelf(parsed_key.user_key);
}
if (LIKELY(pinnable_val_ != nullptr)) {
Slice value_to_use = value;

Expand Down Expand Up @@ -339,7 +342,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
// merge_context_->operand_list
if (type == kTypeBlobIndex) {
PinnableSlice pin_val;
if (GetBlobValue(value, &pin_val) == false) {
if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
return false;
}
Slice blob_value(pin_val);
Expand All @@ -365,7 +368,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
assert(merge_operator_ != nullptr);
if (type == kTypeBlobIndex) {
PinnableSlice pin_val;
if (GetBlobValue(value, &pin_val) == false) {
if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
return false;
}
Slice blob_value(pin_val);
Expand Down Expand Up @@ -545,13 +548,13 @@ void GetContext::MergeWithEntity(Slice entity) {
}
}

bool GetContext::GetBlobValue(const Slice& blob_index,
bool GetContext::GetBlobValue(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value) {
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
constexpr uint64_t* bytes_read = nullptr;

Status status = blob_fetcher_->FetchBlob(
user_key_, blob_index, prefetch_buffer, blob_value, bytes_read);
user_key, blob_index, prefetch_buffer, blob_value, bytes_read);
if (!status.ok()) {
if (status.IsIncomplete()) {
// FIXME: this code is not covered by unit tests
Expand Down
15 changes: 14 additions & 1 deletion table/get_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,23 @@ class GetContext {

bool has_callback() const { return callback_ != nullptr; }

const Slice& ukey_to_get_blob_value() const {
if (!ukey_with_ts_found_.empty()) {
return ukey_with_ts_found_;
} else {
return user_key_;
}
}

uint64_t get_tracing_get_id() const { return tracing_get_id_; }

void push_operand(const Slice& value, Cleanable* value_pinner);

private:
void Merge(const Slice* value);
void MergeWithEntity(Slice entity);
bool GetBlobValue(const Slice& blob_index, PinnableSlice* blob_value);
bool GetBlobValue(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value);

const Comparator* ucmp_;
const MergeOperator* merge_operator_;
Expand All @@ -194,6 +203,10 @@ class GetContext {

GetState state_;
Slice user_key_;
// When a blob index is found with the user key containing timestamp,
// this copies the corresponding user key on record in the sst file
// and is later used for blob verification.
PinnableSlice ukey_with_ts_found_;
PinnableSlice* pinnable_val_;
PinnableWideColumns* columns_;
std::string* timestamp_;
Expand Down

0 comments on commit 24ac53d

Please sign in to comment.