Skip to content

Commit

Permalink
Ingest external SST files with Temperature hints (facebook#8949)
Browse files Browse the repository at this point in the history
Summary:
Add the file temperature to `IngestExternalFileArg` such that when SST files are ingested, user is able to assign the temperature to each SST file. If the temperature vector is empty or its size does not match the file name vector size, all ingested SST files will be assigned with `Temperature::unKnown`.

Pull Request resolved: facebook#8949

Test Plan: add the new test and make check

Reviewed By: siying

Differential Revision: D31127852

Pulled By: zhichao-cao

fbshipit-source-id: 141a81f0f7b473d88f4ab0cb2a21a114cbc6f83c
  • Loading branch information
zhichao-cao authored and facebook-github-bot committed Oct 8, 2021
1 parent 2f1296e commit bcd049c
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 13 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name.
* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method.
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.

## 6.25.0 (2021-09-20)
### Bug Fixes
Expand Down
6 changes: 4 additions & 2 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4558,7 +4558,8 @@ Status DBImpl::IngestExternalFiles(
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
Status es = ingestion_jobs[i].Prepare(
args[i].external_files, args[i].files_checksums,
args[i].files_checksum_func_names, start_file_number, super_version);
args[i].files_checksum_func_names, args[i].file_temperature,
start_file_number, super_version);
// capture first error only
if (!es.ok() && status.ok()) {
status = es;
Expand All @@ -4573,7 +4574,8 @@ Status DBImpl::IngestExternalFiles(
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
Status es = ingestion_jobs[0].Prepare(
args[0].external_files, args[0].files_checksums,
args[0].files_checksum_func_names, next_file_number, super_version);
args[0].files_checksum_func_names, args[0].file_temperature,
next_file_number, super_version);
if (!es.ok()) {
status = es;
}
Expand Down
5 changes: 2 additions & 3 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@ class DBTest2 : public DBTestBase {
#ifndef ROCKSDB_LITE
uint64_t GetSstSizeHelper(Temperature temperature) {
std::string prop;
bool s =
EXPECT_TRUE(
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
ToString(static_cast<uint8_t>(temperature)),
&prop);
assert(s);
&prop));
return static_cast<uint64_t>(std::atoi(prop.c_str()));
}
#endif // ROCKSDB_LITE
Expand Down
130 changes: 130 additions & 0 deletions db/external_sst_file_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,16 @@ class ExternalSSTFileBasicTest
std::string sst_files_dir_;
std::unique_ptr<FaultInjectionTestEnv> fault_injection_test_env_;
bool random_rwfile_supported_;
#ifndef ROCKSDB_LITE
uint64_t GetSstSizeHelper(Temperature temperature) {
std::string prop;
EXPECT_TRUE(
dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
ToString(static_cast<uint8_t>(temperature)),
&prop));
return static_cast<uint64_t>(std::atoi(prop.c_str()));
}
#endif // ROCKSDB_LITE
};

TEST_F(ExternalSSTFileBasicTest, Basic) {
Expand Down Expand Up @@ -478,6 +488,20 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName);
}

// check the temperature of the file being ingested
ColumnFamilyMetaData metadata;
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(1, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[6].files[0].temperature);
auto size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);

// Reopen Db with checksum enabled
Reopen(options);
// Enable verify_file_checksum option
Expand Down Expand Up @@ -598,6 +622,15 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) {
}
ASSERT_OK(s) << s.ToString();
ASSERT_OK(env_->FileExists(file6));
db_->GetColumnFamilyMetaData(&metadata);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);
}

TEST_F(ExternalSSTFileBasicTest, NoCopy) {
Expand Down Expand Up @@ -1666,6 +1699,103 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) {
ASSERT_EQ(Get("k"), "b");
}

TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) {
Options options = CurrentOptions();
const ImmutableCFOptions ioptions(options);
options.bottommost_temperature = Temperature::kWarm;
SstFileWriter sst_file_writer(EnvOptions(), options);
options.level0_file_num_compaction_trigger = 2;
Reopen(options);

auto size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);

// create file01.sst (1000 => 1099) and ingest it
std::string file1 = sst_files_dir_ + "file01.sst";
ASSERT_OK(sst_file_writer.Open(file1));
for (int k = 1000; k < 1100; k++) {
ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val"));
}
ExternalSstFileInfo file1_info;
Status s = sst_file_writer.Finish(&file1_info);
ASSERT_OK(s);
ASSERT_EQ(file1_info.file_path, file1);
ASSERT_EQ(file1_info.num_entries, 100);
ASSERT_EQ(file1_info.smallest_key, Key(1000));
ASSERT_EQ(file1_info.largest_key, Key(1099));

std::vector<std::string> files;
std::vector<std::string> files_checksums;
std::vector<std::string> files_checksum_func_names;
Temperature file_temperature = Temperature::kWarm;

files.push_back(file1);
IngestExternalFileOptions in_opts;
in_opts.move_files = false;
in_opts.snapshot_consistency = true;
in_opts.allow_global_seqno = false;
in_opts.allow_blocking_flush = false;
in_opts.write_global_seqno = true;
in_opts.verify_file_checksum = false;
IngestExternalFileArg arg;
arg.column_family = db_->DefaultColumnFamily();
arg.external_files = files;
arg.options = in_opts;
arg.files_checksums = files_checksums;
arg.files_checksum_func_names = files_checksum_func_names;
arg.file_temperature = file_temperature;
s = db_->IngestExternalFiles({arg});
ASSERT_OK(s);

// check the temperature of the file being ingested
ColumnFamilyMetaData metadata;
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(1, metadata.file_count);
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 1);

// non-bottommost file still has unknown temperature
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Put("bar", "bar"));
ASSERT_OK(Flush());
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0);

// reopen and check the information is persisted
Reopen(options);
db_->GetColumnFamilyMetaData(&metadata);
ASSERT_EQ(2, metadata.file_count);
ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
ASSERT_EQ(Temperature::kWarm, metadata.levels[6].files[0].temperature);
size = GetSstSizeHelper(Temperature::kUnknown);
ASSERT_GT(size, 0);
size = GetSstSizeHelper(Temperature::kWarm);
ASSERT_GT(size, 0);

// check other non-exist temperatures
size = GetSstSizeHelper(Temperature::kHot);
ASSERT_EQ(size, 0);
size = GetSstSizeHelper(Temperature::kCold);
ASSERT_EQ(size, 0);
std::string prop;
ASSERT_TRUE(dbfull()->GetProperty(
DB::Properties::kLiveSstFilesSizeAtTemperature + std::to_string(22),
&prop));
ASSERT_EQ(std::atoi(prop.c_str()), 0);
}

INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),
Expand Down
21 changes: 14 additions & 7 deletions db/external_sst_file_ingestion_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ Status ExternalSstFileIngestionJob::Prepare(
const std::vector<std::string>& external_files_paths,
const std::vector<std::string>& files_checksums,
const std::vector<std::string>& files_checksum_func_names,
uint64_t next_file_number, SuperVersion* sv) {
const Temperature& file_temperature, uint64_t next_file_number,
SuperVersion* sv) {
Status status;

// Read the information of files we are ingesting
Expand Down Expand Up @@ -89,6 +90,11 @@ Status ExternalSstFileIngestionJob::Prepare(
}
}

// Hanlde the file temperature
for (size_t i = 0; i < num_files; i++) {
files_to_ingest_[i].file_temperature = file_temperature;
}

if (ingestion_options_.ingest_behind && files_overlap_) {
return Status::NotSupported("Files have overlapping ranges");
}
Expand Down Expand Up @@ -429,12 +435,13 @@ Status ExternalSstFileIngestionJob::Run() {
current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time);
}

edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
f.fd.GetFileSize(), f.smallest_internal_key,
f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
false, kInvalidBlobFileNumber, oldest_ancester_time,
current_time, f.file_checksum, f.file_checksum_func_name);
FileMetaData f_metadata(
f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(),
f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
f.assigned_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time,
current_time, f.file_checksum, f.file_checksum_func_name);
f_metadata.temperature = f.file_temperature;
edit_.AddFile(f.picked_level, f_metadata);
}
return status;
}
Expand Down
5 changes: 4 additions & 1 deletion db/external_sst_file_ingestion_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct IngestedFileInfo {
std::string file_checksum;
// The name of checksum function that generate the checksum
std::string file_checksum_func_name;
// The temperature of the file to be ingested
Temperature file_temperature = Temperature::kUnknown;
};

class ExternalSstFileIngestionJob {
Expand Down Expand Up @@ -99,7 +101,8 @@ class ExternalSstFileIngestionJob {
Status Prepare(const std::vector<std::string>& external_files_paths,
const std::vector<std::string>& files_checksums,
const std::vector<std::string>& files_checksum_func_names,
uint64_t next_file_number, SuperVersion* sv);
const Temperature& file_temperature, uint64_t next_file_number,
SuperVersion* sv);

// Check if we need to flush the memtable before running the ingestion job
// This will be true if the files we are ingesting are overlapping with any
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,15 @@ struct RangePtr {
// empty (no checksum information is provided for ingestion). Otherwise,
// their sizes should be the same as external_files. The file order should
// be the same in three vectors and guaranteed by the caller.
// Note that, we assume the temperatures of this batch of files to be
// ingested are the same.
struct IngestExternalFileArg {
ColumnFamilyHandle* column_family = nullptr;
std::vector<std::string> external_files;
IngestExternalFileOptions options;
std::vector<std::string> files_checksums;
std::vector<std::string> files_checksum_func_names;
Temperature file_temperature = Temperature::kUnknown;
};

struct GetMergeOperandsOptions {
Expand Down

0 comments on commit bcd049c

Please sign in to comment.