From ce63eda6f09856d22d20ff98beab247ccbdbf415 Mon Sep 17 00:00:00 2001 From: Mike Kolupaev Date: Tue, 17 Dec 2019 20:07:21 -0800 Subject: [PATCH] Fix use-after-free and double-deleting files in BackgroundCallPurge() (#6193) Summary: The bad code was: ``` mutex.Lock(); // `mutex` protects `container` for (auto& x : container) { mutex.Unlock(); // do stuff to x mutex.Lock(); } ``` It's incorrect because both `x` and the iterator may become invalid if another thread modifies the container while this thread is not holding the mutex. Broken by https://github.com/facebook/rocksdb/pull/5796 - it replaced a `while (!container.empty())` loop with a `for (auto x : container)`. (RocksDB code does a lot of such unlocking+re-locking of mutexes, and this type of bugs comes up a lot :/ ) Pull Request resolved: https://github.com/facebook/rocksdb/pull/6193 Test Plan: Ran some logdevice integration tests that were crashing without this fix. Differential Revision: D19116874 Pulled By: al13n321 fbshipit-source-id: 9672bc4227c1b68f46f7436db2b96811adb8c703 --- db/db_impl/db_impl.cc | 13 ++++++++++--- db/db_impl/db_impl.h | 2 +- file/delete_scheduler.cc | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 5b830c363f4..1df43956935 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1321,19 +1321,26 @@ void DBImpl::BackgroundCallPurge() { delete sv; mutex_.Lock(); } - for (const auto& file : purge_files_) { - const PurgeFileInfo& purge_file = file.second; + + // Can't use iterator to go over purge_files_ because inside the loop we're + // unlocking the mutex that protects purge_files_. + while (!purge_files_.empty()) { + auto it = purge_files_.begin(); + // Need to make a copy of the PurgeFilesInfo before unlocking the mutex. + PurgeFileInfo purge_file = it->second; + const std::string& fname = purge_file.fname; const std::string& dir_to_sync = purge_file.dir_to_sync; FileType type = purge_file.type; uint64_t number = purge_file.number; int job_id = purge_file.job_id; + purge_files_.erase(it); + mutex_.Unlock(); DeleteObsoleteFileImpl(job_id, fname, dir_to_sync, type, number); mutex_.Lock(); } - purge_files_.clear(); bg_purge_scheduled_--; diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index d8c5e1c6040..bbfecb3a647 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1201,7 +1201,7 @@ class DBImpl : public DB { }; // PurgeFileInfo is a structure to hold information of files to be deleted in - // purge_queue_ + // purge_files_ struct PurgeFileInfo { std::string fname; std::string dir_to_sync; diff --git a/file/delete_scheduler.cc b/file/delete_scheduler.cc index d528d671339..23bc5fcea4f 100644 --- a/file/delete_scheduler.cc +++ b/file/delete_scheduler.cc @@ -74,7 +74,8 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path, s = MarkAsTrash(file_path, &trash_file); if (!s.ok()) { - ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash", file_path.c_str()); + ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s", + file_path.c_str(), s.ToString().c_str()); s = fs_->DeleteFile(file_path, IOOptions(), nullptr); if (s.ok()) { sst_file_manager_->OnDeleteFile(file_path);