Skip to content

Commit

Permalink
Env function for bulk metadata retrieval
Browse files Browse the repository at this point in the history
Summary:
Added this new function, which returns filename, size, and modified
timestamp for each file in the provided directory. The default implementation
retrieves the metadata sequentially using existing functions. In the next diff
I'll make HdfsEnv override this function to use libhdfs's bulk get function.

This won't work on windows due to the path separator.

Test Plan:
new unit test

  $ ./env_test --gtest_filter=EnvPosixTest.ConsistentChildrenMetadata

Reviewers: yhchiang, sdong

Reviewed By: sdong

Subscribers: IslamAbdelRahman, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D53781
  • Loading branch information
ajkr committed Feb 9, 2016
1 parent 4a8cbf4 commit 59b3ee6
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 3 deletions.
21 changes: 21 additions & 0 deletions include/rocksdb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ struct EnvOptions {

class Env {
public:
struct FileAttributes {
// File name
std::string name;

// Size of file in bytes
uint64_t size_bytes;
};

Env() : thread_status_updater_(nullptr) {}

virtual ~Env();
Expand Down Expand Up @@ -177,6 +185,15 @@ class Env {
virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) = 0;

// Store in *result the attributes of the children of the specified directory.
// In case the implementation lists the directory prior to iterating the files
// and files are concurrently deleted, the deleted files will be omitted from
// result.
// The name attributes are relative to "dir".
// Original contents of *results are dropped.
virtual Status GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result);

// Delete the named file.
virtual Status DeleteFile(const std::string& fname) = 0;

Expand Down Expand Up @@ -789,6 +806,10 @@ class EnvWrapper : public Env {
std::vector<std::string>* r) override {
return target_->GetChildren(dir, r);
}
Status GetChildrenFileAttributes(
const std::string& dir, std::vector<FileAttributes>* result) override {
return target_->GetChildrenFileAttributes(dir, result);
}
Status DeleteFile(const std::string& f) override {
return target_->DeleteFile(f);
}
Expand Down
10 changes: 7 additions & 3 deletions port/win/env_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,11 @@ class WinEnv : public Env {
return status;
}

virtual Status Env::GetChildrenFileMetadata(
const std::string& dir, std::vector<FileMetadata>* result) override {
return Status::NotSupported("Not supported in WinEnv");
}

virtual Status CreateDir(const std::string& name) override {
Status result;

Expand Down Expand Up @@ -1723,9 +1728,8 @@ class WinEnv : public Env {

virtual Status GetHostName(char* name, uint64_t len) override {
Status s;
DWORD nSize =
static_cast<DWORD>(std::min<uint64_t>(len,
std::numeric_limits<DWORD>::max()));
DWORD nSize = static_cast<DWORD>(
std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));

if (!::GetComputerNameA(name, &nSize)) {
auto lastError = GetLastError();
Expand Down
26 changes: 26 additions & 0 deletions util/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ Status Env::ReuseWritableFile(const std::string& fname,
return NewWritableFile(fname, result, options);
}

Status Env::GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result) {
assert(result != nullptr);
std::vector<std::string> child_fnames;
Status s = GetChildren(dir, &child_fnames);
if (!s.ok()) {
return s;
}
result->resize(child_fnames.size());
size_t result_size = 0;
for (size_t i = 0; i < child_fnames.size(); ++i) {
const std::string path = dir + "/" + child_fnames[i];
if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
if (FileExists(path).IsNotFound()) {
// The file may have been deleted since we listed the directory
continue;
}
return s;
}
(*result)[result_size].name = std::move(child_fnames[i]);
result_size++;
}
result->resize(result_size);
return Status::OK();
}

SequentialFile::~SequentialFile() {
}

Expand Down
36 changes: 36 additions & 0 deletions util/env_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,42 @@ TEST_F(EnvPosixTest, Preallocation) {
ASSERT_EQ(last_allocated_block, 7UL);
}

// Test that the two ways to get children file attributes (in bulk or
// individually) behave consistently.
TEST_F(EnvPosixTest, ConsistentChildrenAttributes) {
const EnvOptions soptions;
const int kNumChildren = 10;

std::string data;
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << test::TmpDir() << "/testfile_" << i;
const std::string path = oss.str();
unique_ptr<WritableFile> file;
ASSERT_OK(env_->NewWritableFile(path, &file, soptions));
file->Append(data);
data.append("test");
}

std::vector<Env::FileAttributes> file_attrs;
ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(), &file_attrs));
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << "testfile_" << i;
const std::string name = oss.str();
const std::string path = test::TmpDir() + "/" + name;

auto file_attrs_iter = std::find_if(
file_attrs.begin(), file_attrs.end(),
[&name](const Env::FileAttributes& fm) { return fm.name == name; });
ASSERT_TRUE(file_attrs_iter != file_attrs.end());
uint64_t size;
ASSERT_OK(env_->GetFileSize(path, &size));
ASSERT_EQ(size, 4 * i);
ASSERT_EQ(size, file_attrs_iter->size_bytes);
}
}

// Test that all WritableFileWrapper forwards all calls to WritableFile.
TEST_F(EnvPosixTest, WritableFileWrapper) {
class Base : public WritableFile {
Expand Down

0 comments on commit 59b3ee6

Please sign in to comment.