Skip to content

Commit

Permalink
Mark files as trash by using .trash extension
Browse files Browse the repository at this point in the history
Summary:
SstFileManager move files that need to be deleted into a trash directory.
Deprecate this behaviour and instead add ".trash" extension to files that need to be deleted
Closes facebook#2970

Differential Revision: D5976805

Pulled By: IslamAbdelRahman

fbshipit-source-id: 27374ece4315610b2792c30ffcd50232d4c9a343
  • Loading branch information
IslamAbdelRahman authored and facebook-github-bot committed Oct 27, 2017
1 parent 3ebb7ba commit 0599315
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 127 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## Unreleased
### Public API Change
* `BackupableDBOptions::max_valid_backups_to_open == 0` now means no backups will be opened during BackupEngine initialization. Previously this condition disabled limiting backups opened.
* Deprecate trash_dir param in NewSstFileManager, right now we will rename deleted files to <name>.trash instead of moving them to trash directory

### New Features
* `DBOptions::bytes_per_sync` and `DBOptions::wal_bytes_per_sync` can now be changed dynamically, `DBOptions::wal_bytes_per_sync` will flush all memtables and switch to a new WAL file.
Expand Down
11 changes: 11 additions & 0 deletions db/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
result.avoid_flush_during_recovery = false;
}

#ifndef ROCKSDB_LITE
// When the DB is stopped, it's possible that there are some .trash files that
// were not deleted yet, when we open the DB we will find these .trash files
// and schedule them to be deleted (or delete immediately if SstFileManager
// was not used)
auto sfm = static_cast<SstFileManagerImpl*>(result.sst_file_manager.get());
for (size_t i = 0; i < result.db_paths.size(); i++) {
DeleteScheduler::CleanupDirectory(result.env, sfm, result.db_paths[i].path);
}
#endif

return result;
}

Expand Down
11 changes: 4 additions & 7 deletions db/db_sst_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,10 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
options.disable_auto_compactions = true;
options.env = env_;

std::string trash_dir = test::TmpDir(env_) + "/trash";
int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
Status s;
options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, trash_dir, 0, false, &s));
NewSstFileManager(env_, nullptr, "", 0, false, &s));
ASSERT_OK(s);
options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
Expand Down Expand Up @@ -394,11 +393,10 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100);
options.env = env_;

std::string trash_dir = test::TmpDir(env_) + "/trash";
int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec
Status s;
options.sst_file_manager.reset(NewSstFileManager(
env_, nullptr, trash_dir, rate_bytes_per_sec, false, &s));
options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s));
ASSERT_OK(s);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->TEST_SetMaxTrashDBRatio(1.1);
Expand Down Expand Up @@ -460,9 +458,8 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.env = env_;
std::string trash_dir = test::TmpDir(env_) + "/trash";
options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, trash_dir, 0, false, &s));
NewSstFileManager(env_, nullptr, "", 0, false, &s));
ASSERT_OK(s);
DestroyAndReopen(options);

Expand Down
12 changes: 5 additions & 7 deletions include/rocksdb/sst_file_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "rocksdb/status.h"

Expand All @@ -19,6 +20,7 @@ class Logger;
// SstFileManager is used to track SST files in the DB and control there
// deletion rate.
// All SstFileManager public functions are thread-safe.
// SstFileManager is not extensible.
class SstFileManager {
public:
virtual ~SstFileManager() {}
Expand Down Expand Up @@ -64,17 +66,13 @@ class SstFileManager {
// @param info_log: If not nullptr, info_log will be used to log errors.
//
// == Deletion rate limiting specific arguments ==
// @param trash_dir: Path to the directory where deleted files will be moved
// to be deleted in a background thread while applying rate limiting. If this
// directory doesn't exist, it will be created. This directory should not be
// used by any other process or any other SstFileManager, Set to "" to
// disable deletion rate limiting.
// @param trash_dir: Deprecated, this argument have no effect
// @param rate_bytes_per_sec: How many bytes should be deleted per second, If
// this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
// in 1 second, we will wait for another 3 seconds before we delete other
// files, Set to 0 to disable deletion rate limiting.
// @param delete_existing_trash: If set to true, the newly created
// SstFileManager will delete files that already exist in trash_dir.
// @param delete_existing_trash: Deprecated, this argument have no effect, but
// if user provide trash_dir we will schedule deletes for files in the dir
// @param status: If not nullptr, status will contain any errors that happened
// during creating the missing trash_dir or deleting existing files in trash.
extern SstFileManager* NewSstFileManager(
Expand Down
87 changes: 65 additions & 22 deletions util/delete_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@

namespace rocksdb {

DeleteScheduler::DeleteScheduler(Env* env, const std::string& trash_dir,
int64_t rate_bytes_per_sec, Logger* info_log,
DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
Logger* info_log,
SstFileManagerImpl* sst_file_manager)
: env_(env),
trash_dir_(trash_dir),
total_trash_size_(0),
rate_bytes_per_sec_(rate_bytes_per_sec),
pending_files_(0),
Expand Down Expand Up @@ -63,22 +62,27 @@ Status DeleteScheduler::DeleteFile(const std::string& file_path) {
}

// Move file to trash
std::string path_in_trash;
s = MoveToTrash(file_path, &path_in_trash);
std::string trash_file;
s = MarkAsTrash(file_path, &trash_file);

if (!s.ok()) {
ROCKS_LOG_ERROR(info_log_, "Failed to move %s to trash directory (%s)",
file_path.c_str(), trash_dir_.c_str());
ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash", file_path.c_str());
s = env_->DeleteFile(file_path);
if (s.ok()) {
sst_file_manager_->OnDeleteFile(file_path);
}
return s;
}

// Update the total trash size
uint64_t trash_file_size = 0;
env_->GetFileSize(trash_file, &trash_file_size);
total_trash_size_.fetch_add(trash_file_size);

// Add file to delete queue
{
InstrumentedMutexLock l(&mu_);
queue_.push(path_in_trash);
queue_.push(trash_file);
pending_files_++;
if (pending_files_ == 1) {
cv_.SignalAll();
Expand All @@ -92,44 +96,83 @@ std::map<std::string, Status> DeleteScheduler::GetBackgroundErrors() {
return bg_errors_;
}

Status DeleteScheduler::MoveToTrash(const std::string& file_path,
std::string* path_in_trash) {
const std::string DeleteScheduler::kTrashExtension = ".trash";
bool DeleteScheduler::IsTrashFile(const std::string& file_path) {
return (file_path.size() >= kTrashExtension.size() &&
file_path.rfind(kTrashExtension) ==
file_path.size() - kTrashExtension.size());
}

Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
const std::string& path) {
Status s;
// Figure out the name of the file in trash folder
// Check if there are any files marked as trash in this path
std::vector<std::string> files_in_path;
s = env->GetChildren(path, &files_in_path);
if (!s.ok()) {
return s;
}
for (const std::string& current_file : files_in_path) {
if (!DeleteScheduler::IsTrashFile(current_file)) {
// not a trash file, skip
continue;
}

Status file_delete;
std::string trash_file = path + "/" + current_file;
if (sfm) {
// We have an SstFileManager that will schedule the file delete
sfm->OnAddFile(trash_file);
file_delete = sfm->ScheduleFileDeletion(trash_file);
} else {
// Delete the file immediately
file_delete = env->DeleteFile(trash_file);
}

if (s.ok() && !file_delete.ok()) {
s = file_delete;
}
}

return s;
}

Status DeleteScheduler::MarkAsTrash(const std::string& file_path,
std::string* trash_file) {
// Sanity check of the path
size_t idx = file_path.rfind("/");
if (idx == std::string::npos || idx == file_path.size() - 1) {
return Status::InvalidArgument("file_path is corrupted");
}
*path_in_trash = trash_dir_ + file_path.substr(idx);
std::string unique_suffix = "";

if (*path_in_trash == file_path) {
// This file is already in trash
Status s;
if (DeleteScheduler::IsTrashFile(file_path)) {
// This is already a trash file
return s;
}

*trash_file = file_path + kTrashExtension;
// TODO(tec) : Implement Env::RenameFileIfNotExist and remove
// file_move_mu mutex.
int cnt = 0;
InstrumentedMutexLock l(&file_move_mu_);
while (true) {
s = env_->FileExists(*path_in_trash + unique_suffix);
s = env_->FileExists(*trash_file);
if (s.IsNotFound()) {
// We found a path for our file in trash
*path_in_trash += unique_suffix;
s = env_->RenameFile(file_path, *path_in_trash);
s = env_->RenameFile(file_path, *trash_file);
break;
} else if (s.ok()) {
// Name conflict, generate new random suffix
unique_suffix = env_->GenerateUniqueId();
*trash_file = file_path + std::to_string(cnt) + kTrashExtension;
} else {
// Error during FileExists call, we cannot continue
break;
}
cnt++;
}
if (s.ok()) {
uint64_t trash_file_size = 0;
sst_file_manager_->OnMoveFile(file_path, *path_in_trash, &trash_file_size);
total_trash_size_.fetch_add(trash_file_size);
sst_file_manager_->OnMoveFile(file_path, *trash_file);
}
return s;
}
Expand Down
25 changes: 15 additions & 10 deletions util/delete_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@ class Logger;
class SstFileManagerImpl;

// DeleteScheduler allows the DB to enforce a rate limit on file deletion,
// Instead of deleteing files immediately, files are moved to trash_dir
// Instead of deleteing files immediately, files are marked as trash
// and deleted in a background thread that apply sleep penlty between deletes
// if they are happening in a rate faster than rate_bytes_per_sec,
//
// Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
// case DeleteScheduler will delete files immediately.
class DeleteScheduler {
public:
DeleteScheduler(Env* env, const std::string& trash_dir,
int64_t rate_bytes_per_sec, Logger* info_log,
DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager);

~DeleteScheduler();
Expand All @@ -46,7 +45,7 @@ class DeleteScheduler {
return rate_bytes_per_sec_.store(bytes_per_sec);
}

// Move file to trash directory and schedule it's deletion
// Mark file as trash directory and schedule it's deletion
Status DeleteFile(const std::string& fname);

// Wait for all files being deleteing in the background to finish or for
Expand All @@ -64,26 +63,32 @@ class DeleteScheduler {
max_trash_db_ratio_ = r;
}

static const std::string kTrashExtension;
static bool IsTrashFile(const std::string& file_path);

// Check if there are any .trash filse in path, and schedule their deletion
// Or delete immediately if sst_file_manager is nullptr
static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
const std::string& path);

private:
Status MoveToTrash(const std::string& file_path, std::string* path_in_trash);
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);

Status DeleteTrashFile(const std::string& path_in_trash,
uint64_t* deleted_bytes);

void BackgroundEmptyTrash();

Env* env_;
// Path to the trash directory
std::string trash_dir_;
// total size of trash directory
// total size of trash files
std::atomic<uint64_t> total_trash_size_;
// Maximum number of bytes that should be deleted per second
std::atomic<int64_t> rate_bytes_per_sec_;
// Mutex to protect queue_, pending_files_, bg_errors_, closing_
InstrumentedMutex mu_;
// Queue of files in trash that need to be deleted
// Queue of trash files that need to be deleted
std::queue<std::string> queue_;
// Number of files in trash that are waiting to be deleted
// Number of trash files that are waiting to be deleted
int32_t pending_files_;
// Errors that happened in BackgroundEmptyTrash (file_path => error)
std::map<std::string, Status> bg_errors_;
Expand Down
Loading

0 comments on commit 0599315

Please sign in to comment.