Skip to content

Commit

Permalink
Fix a bug when ingest plaintable sst file (facebook#11969)
Browse files Browse the repository at this point in the history
Summary:
Plaintable doesn't support SeekToLast. And GetIngestedFileInfo is using SeekToLast without checking the validity.

We are using IngestExternalFile or CreateColumnFamilyWithImport with some sst file in PlainTable format . But after running for a while, compaction error often happens. Such as
![image](https://github.com/facebook/rocksdb/assets/13954644/b4fa49fc-73fc-49ce-96c6-f198a30800b8)

I simply add some std::cerr log to find why.
![image](https://github.com/facebook/rocksdb/assets/13954644/2cf1d5ff-48cc-4125-b917-87090f764fcd)
It shows that the smallest key is always equal to largest key.
![image](https://github.com/facebook/rocksdb/assets/13954644/6d43e978-0be0-4306-aae3-f9e4ae366395)
Then I found the root cause is that PlainTable do not support SeekToLast, so the smallest key is always the same with the largest

I try to write an unit test. But it's not easy to reproduce this error.
(This PR is similar to facebook#11266. Sorry for open another PR)

Pull Request resolved: facebook#11969

Reviewed By: ajkr

Differential Revision: D50933854

Pulled By: cbi42

fbshipit-source-id: 6c6af53c1388922cbabbe64ed3be1cdc58df5431
  • Loading branch information
914022466 authored and facebook-github-bot committed Nov 2, 2023
1 parent a429105 commit 2648e0a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
29 changes: 27 additions & 2 deletions db/external_sst_file_ingestion_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -792,8 +792,33 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
}
file_to_ingest->smallest_internal_key.SetFrom(key);

iter->SeekToLast();
pik_status = ParseInternalKey(iter->key(), &key, allow_data_in_errors);
Slice largest;
if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) {
// PlainTable iterator does not support SeekToLast().
largest = iter->key();
for (; iter->Valid(); iter->Next()) {
if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) {
largest = iter->key();
}
}
if (!iter->status().ok()) {
return iter->status();
}
} else {
iter->SeekToLast();
if (!iter->Valid()) {
if (iter->status().ok()) {
// The file contains at least 1 key since iter is valid after
// SeekToFirst().
return Status::Corruption("Can not find largest key in sst file");
} else {
return iter->status();
}
}
largest = iter->key();
}

pik_status = ParseInternalKey(largest, &key, allow_data_in_errors);
if (!pik_status.ok()) {
return Status::Corruption("Corrupted key in external file. ",
pik_status.getState());
Expand Down
28 changes: 26 additions & 2 deletions db/import_column_family_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,32 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
bool bound_set = false;
if (iter->Valid()) {
file_to_import->smallest_internal_key.DecodeFrom(iter->key());
iter->SeekToLast();
file_to_import->largest_internal_key.DecodeFrom(iter->key());
Slice largest;
if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) {
// PlainTable iterator does not support SeekToLast().
largest = iter->key();
for (; iter->Valid(); iter->Next()) {
if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) {
largest = iter->key();
}
}
if (!iter->status().ok()) {
return iter->status();
}
} else {
iter->SeekToLast();
if (!iter->Valid()) {
if (iter->status().ok()) {
// The file contains at least 1 key since iter is valid after
// SeekToFirst().
return Status::Corruption("Can not find largest key in sst file");
} else {
return iter->status();
}
}
largest = iter->key();
}
file_to_import->largest_internal_key.DecodeFrom(largest);
bound_set = true;
}

Expand Down

0 comments on commit 2648e0a

Please sign in to comment.