Skip to content

Commit

Permalink
Provide openable snapshots
Browse files Browse the repository at this point in the history
Summary: Store links to live files in directory on same disk

Test Plan:
Take snapshot and open it. Added a test GetSnapshotLink in
db_test.

Reviewers: sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D28713
  • Loading branch information
rven1 committed Nov 14, 2014
1 parent 9be338c commit 6c1b040
Show file tree
Hide file tree
Showing 12 changed files with 300 additions and 0 deletions.
109 changes: 109 additions & 0 deletions db/db_filesnapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "port/port.h"
#include "util/mutexlock.h"
#include "util/sync_point.h"
#include "util/file_util.h"

namespace rocksdb {

Expand Down Expand Up @@ -134,6 +135,114 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) {
return wal_manager_.GetSortedWalFiles(files);
}

// Builds an openable snapshot of RocksDB
Status DBImpl::CreateCheckpoint(const std::string& snapshot_dir) {
Status s;
std::vector<std::string> live_files;
uint64_t manifest_file_size = 0;
uint64_t sequence_number = GetLatestSequenceNumber();
bool same_fs = true;

if (env_->FileExists(snapshot_dir)) {
return Status::InvalidArgument("Directory exists");
}

s = DisableFileDeletions();
if (s.ok()) {
// this will return live_files prefixed with "/"
s = GetLiveFiles(live_files, &manifest_file_size, true);
}
if (!s.ok()) {
EnableFileDeletions(false);
return s;
}

Log(db_options_.info_log,
"Started the snapshot process -- creating snapshot in directory %s",
snapshot_dir.c_str());

std::string full_private_path = snapshot_dir + ".tmp";

// create snapshot directory
s = env_->CreateDir(full_private_path);

// copy/hard link live_files
for (size_t i = 0; s.ok() && i < live_files.size(); ++i) {
uint64_t number;
FileType type;
bool ok = ParseFileName(live_files[i], &number, &type);
if (!ok) {
s = Status::Corruption("Can't parse file name. This is very bad");
break;
}
// we should only get sst, manifest and current files here
assert(type == kTableFile || type == kDescriptorFile ||
type == kCurrentFile);
assert(live_files[i].size() > 0 && live_files[i][0] == '/');
std::string src_fname = live_files[i];

// rules:
// * if it's kTableFile, then it's shared
// * if it's kDescriptorFile, limit the size to manifest_file_size
// * always copy if cross-device link
if ((type == kTableFile) && same_fs) {
Log(db_options_.info_log, "Hard Linking %s", src_fname.c_str());
s = env_->LinkFile(GetName() + src_fname, full_private_path + src_fname);
if (s.IsNotSupported()) {
same_fs = false;
s = Status::OK();
}
}
if ((type != kTableFile) || (!same_fs)) {
Log(db_options_.info_log, "Copying %s", src_fname.c_str());
s = CopyFile(env_, GetName() + src_fname, full_private_path + src_fname,
(type == kDescriptorFile) ? manifest_file_size : 0);
}
}

// we copied all the files, enable file deletions
EnableFileDeletions(false);

if (s.ok()) {
// move tmp private backup to real snapshot directory
s = env_->RenameFile(full_private_path, snapshot_dir);
}
if (s.ok()) {
unique_ptr<Directory> snapshot_directory;
env_->NewDirectory(snapshot_dir, &snapshot_directory);
if (snapshot_directory != nullptr) {
s = snapshot_directory->Fsync();
}
}

if (!s.ok()) {
// clean all the files we might have created
Log(db_options_.info_log, "Snapshot failed -- %s", s.ToString().c_str());
// we have to delete the dir and all its children
std::vector<std::string> subchildren;
env_->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {
Status s1 = env_->DeleteFile(full_private_path + subchild);
if (s1.ok()) {
Log(db_options_.info_log, "Deleted %s",
(full_private_path + subchild).c_str());
}
}
// finally delete the private dir
Status s1 = env_->DeleteDir(full_private_path);
Log(db_options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
s1.ToString().c_str());
return s;
}

// here we know that we succeeded and installed the new snapshot
Log(db_options_.info_log, "Snapshot DONE. All is good");
Log(db_options_.info_log, "Snapshot sequence number: %" PRIu64,
sequence_number);

return s;
}
}

#endif // ROCKSDB_LITE
5 changes: 5 additions & 0 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ class DBImpl : public DB {
ColumnFamilyHandle* column_family,
ColumnFamilyMetaData* metadata) override;

// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// (2) a copied manifest files and other files
virtual Status CreateCheckpoint(const std::string& snapshot_dir);
#endif // ROCKSDB_LITE

// checks if all live files exist on file system and that their file sizes
Expand Down
58 changes: 58 additions & 0 deletions db/db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,60 @@ TEST(DBTest, GetSnapshot) {
} while (ChangeOptions(kSkipHashCuckoo));
}

TEST(DBTest, GetSnapshotLink) {
do {
Options options;
const std::string snapshot_name = test::TmpDir(env_) + "/snapshot";
DB* snapshotDB;
ReadOptions roptions;
std::string result;

options = CurrentOptions(options);
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, options));
ASSERT_OK(DestroyDB(snapshot_name, options));
env_->DeleteDir(snapshot_name);

// Create a database
Status s;
options.create_if_missing = true;
ASSERT_OK(DB::Open(options, dbname_, &db_));
std::string key = std::string("foo");
ASSERT_OK(Put(key, "v1"));
// Take a snapshot
ASSERT_OK(db_->CreateCheckpoint(snapshot_name));
ASSERT_OK(Put(key, "v2"));
ASSERT_EQ("v2", Get(key));
ASSERT_OK(Flush());
ASSERT_EQ("v2", Get(key));
// Open snapshot and verify contents while DB is running
options.create_if_missing = false;
ASSERT_OK(DB::Open(options, snapshot_name, &snapshotDB));
ASSERT_OK(snapshotDB->Get(roptions, key, &result));
ASSERT_EQ("v1", result);
delete snapshotDB;
snapshotDB = nullptr;
delete db_;
db_ = nullptr;

// Destroy original DB
ASSERT_OK(DestroyDB(dbname_, options));

// Open snapshot and verify contents
options.create_if_missing = false;
dbname_ = snapshot_name;
ASSERT_OK(DB::Open(options, dbname_, &db_));
ASSERT_EQ("v1", Get(key));
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, options));

// Restore DB name
dbname_ = test::TmpDir(env_) + "/db_test";
} while (ChangeOptions());
}

TEST(DBTest, GetLevel0Ordering) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
Expand Down Expand Up @@ -7468,6 +7522,10 @@ class ModelDB: public DB {
ColumnFamilyHandle* column_family,
ColumnFamilyMetaData* metadata) {}

virtual Status CreateCheckpoint(const std::string& snapshot_dir) {
return Status::NotSupported("Not supported in Model DB");
}

private:
class ModelIter: public Iterator {
public:
Expand Down
6 changes: 6 additions & 0 deletions hdfs/env_hdfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ class HdfsEnv : public Env {

virtual Status RenameFile(const std::string& src, const std::string& target);

virtual Status LinkFile(const std::string& src, const std::string& target);

virtual Status LockFile(const std::string& fname, FileLock** lock);

virtual Status UnlockFile(FileLock* lock);
Expand Down Expand Up @@ -291,6 +293,10 @@ class HdfsEnv : public Env {

virtual Status RenameFile(const std::string& src, const std::string& target){return notsup;}

virtual Status LinkFile(const std::string& src, const std::string& target) {
return notsup;
}

virtual Status LockFile(const std::string& fname, FileLock** lock){return notsup;}

virtual Status UnlockFile(FileLock* lock){return notsup;}
Expand Down
6 changes: 6 additions & 0 deletions include/rocksdb/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,12 @@ class DB {
virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
}

// Builds an openable snapshot of RocksDB on the same disk, which
// accepts an output directory on the same disk, and under the directory
// (1) hard-linked SST files pointing to existing live SST files
// (2) a copied manifest files and other files
virtual Status CreateCheckpoint(const std::string& snapshot_dir) = 0;
#endif // ROCKSDB_LITE

private:
Expand Down
9 changes: 9 additions & 0 deletions include/rocksdb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ class Env {
virtual Status RenameFile(const std::string& src,
const std::string& target) = 0;

// Hard Link file src to target.
virtual Status LinkFile(const std::string& src,
const std::string& target) = 0;

// Lock the specified file. Used to prevent concurrent access to
// the same db by multiple processes. On failure, stores nullptr in
// *lock and returns non-OK.
Expand Down Expand Up @@ -747,6 +751,11 @@ class EnvWrapper : public Env {
Status RenameFile(const std::string& s, const std::string& t) {
return target_->RenameFile(s, t);
}

Status LinkFile(const std::string& s, const std::string& t) {
return target_->LinkFile(s, t);
}

Status LockFile(const std::string& f, FileLock** l) {
return target_->LockFile(f, l);
}
Expand Down
4 changes: 4 additions & 0 deletions include/rocksdb/utilities/stackable_db.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ class StackableDB : public DB {
return db_->DefaultColumnFamily();
}

virtual Status CreateCheckpoint(const std::string& snapshot_dir) override {
return db_->CreateCheckpoint(snapshot_dir);
}

protected:
DB* db_;
};
Expand Down
11 changes: 11 additions & 0 deletions util/env_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,17 @@ class PosixEnv : public Env {
return result;
}

virtual Status LinkFile(const std::string& src, const std::string& target) {
Status result;
if (link(src.c_str(), target.c_str()) != 0) {
if (errno == EXDEV) {
return Status::NotSupported("No cross FS links allowed");
}
result = IOError(src, errno);
}
return result;
}

virtual Status LockFile(const std::string& fname, FileLock** lock) {
*lock = nullptr;
Status result;
Expand Down
59 changes: 59 additions & 0 deletions util/file_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include <string>
#include <algorithm>
#include "util/file_util.h"
#include "rocksdb/env.h"
#include "db/filename.h"

namespace rocksdb {

// Utility function to copy a file up to a specified length
Status CopyFile(Env* env, const std::string& source,
const std::string& destination, uint64_t size) {
const EnvOptions soptions;
unique_ptr<SequentialFile> srcfile;
Status s;
s = env->NewSequentialFile(source, &srcfile, soptions);
unique_ptr<WritableFile> destfile;
if (s.ok()) {
s = env->NewWritableFile(destination, &destfile, soptions);
} else {
return s;
}

if (size == 0) {
// default argument means copy everything
if (s.ok()) {
s = env->GetFileSize(source, &size);
} else {
return s;
}
}

char buffer[4096];
Slice slice;
while (size > 0) {
uint64_t bytes_to_read =
std::min(static_cast<uint64_t>(sizeof(buffer)), size);
if (s.ok()) {
s = srcfile->Read(bytes_to_read, &slice, buffer);
}
if (s.ok()) {
if (slice.size() == 0) {
return Status::Corruption("file too small");
}
s = destfile->Append(slice);
}
if (!s.ok()) {
return s;
}
size -= slice.size();
}
return Status::OK();
}

} // namespace rocksdb
18 changes: 18 additions & 0 deletions util/file_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include <string>

#pragma once
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/env.h"

namespace rocksdb {

extern Status CopyFile(Env* env, const std::string& source,
const std::string& destination, uint64_t size = 0);

} // namespace rocksdb
13 changes: 13 additions & 0 deletions util/mock_env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,19 @@ Status MockEnv::RenameFile(const std::string& src, const std::string& dest) {
return Status::OK();
}

Status MockEnv::LinkFile(const std::string& src, const std::string& dest) {
auto s = NormalizePath(src);
auto t = NormalizePath(dest);
MutexLock lock(&mutex_);
if (file_map_.find(s) == file_map_.end()) {
return Status::IOError(s, "File not found");
}

DeleteFileInternal(t);
file_map_[t] = file_map_[s];
return Status::OK();
}

Status MockEnv::NewLogger(const std::string& fname,
shared_ptr<Logger>* result) {
auto fn = NormalizePath(fname);
Expand Down
2 changes: 2 additions & 0 deletions util/mock_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class MockEnv : public EnvWrapper {
virtual Status RenameFile(const std::string& src,
const std::string& target);

virtual Status LinkFile(const std::string& src, const std::string& target);

virtual Status NewLogger(const std::string& fname,
shared_ptr<Logger>* result);

Expand Down

0 comments on commit 6c1b040

Please sign in to comment.