Skip to content

Commit

Permalink
Make ldb automagically determine the file type and use the correct du…
Browse files Browse the repository at this point in the history
…mping function

Summary:
This set of changes implements the following design: `ldb` will utilize `--path` parameter which can be used to specify a file name. Tool will then apply some heuristic to determine how to output the data properly. The design decision is not to probe the file content, but use file names to determine what dumping function to call.

Usage examples:

Understands that path points to a manifest file and dumps it.
`./ldb --path=/tmp/test_db/MANIFEST-000023 dump`

Understands that path points to a WAL file and dumps it.
`./ldb --path=/tmp/test_db/000024.log dump --header`

Understands that path points to a SST file and dumps it.
`./ldb --path=/tmp/test_db/000007.sst dump`

Figures out that none of the supported file types are applicable and outputs
an appropriate error message.
`./ldb --path=/tmp/cron.log dump`

Test Plan:
Basics:

git diff
make clean
make -j 32 commit-prereq
arc lint

More specific testing (done as part of commit-prereq, but can be iterated separately when making isolated changes):

make clean
make ldb
python tools/ldb_test.py
make rocksdb_dump
make rocksdb_undump
sh tools/rocksdb_dump_test.sh

Reviewers: rven, IslamAbdelRahman, yhchiang, kradhakrishnan, anthony, igor, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D52269
  • Loading branch information
Gunnar Kudrjavets committed Jan 6, 2016
1 parent ba83447 commit b1a3b4c
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 42 deletions.
5 changes: 3 additions & 2 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction.
* After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more.
* Increase default options.delayed_write_rate to 2MB/s.
* Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb.

## 4.3.0 (12/8/2015)
### New Features
Expand All @@ -21,8 +22,8 @@

## 4.2.0 (11/9/2015)
### New Features
* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions.
* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families.
* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions.
* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families.
* Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances.

### Public API Changes
Expand Down
130 changes: 97 additions & 33 deletions tools/ldb_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace rocksdb {
using namespace std;

const string LDBCommand::ARG_DB = "db";
const string LDBCommand::ARG_PATH = "path";
const string LDBCommand::ARG_HEX = "hex";
const string LDBCommand::ARG_KEY_HEX = "key_hex";
const string LDBCommand::ARG_VALUE_HEX = "value_hex";
Expand All @@ -62,6 +63,14 @@ const string LDBCommand::ARG_CREATE_IF_MISSING = "create_if_missing";

const char* LDBCommand::DELIM = " ==> ";

namespace {

void DumpWalFile(std::string wal_file, bool print_header, bool print_values,
LDBCommandExecuteResult* exec_state);

void DumpSstFile(std::string filename, bool output_hex, bool show_properties);
};

LDBCommand* LDBCommand::InitFromCmdLineArgs(
int argc,
char** argv,
Expand Down Expand Up @@ -394,8 +403,10 @@ bool LDBCommand::ValidateCmdLineOptions() {
}
}

if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end()) {
fprintf(stderr, "%s must be specified\n", ARG_DB.c_str());
if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end() &&
option_map_.find(ARG_PATH) == option_map_.end()) {
fprintf(stderr, "Either %s or %s must be specified.\n", ARG_DB.c_str(),
ARG_PATH.c_str());
return false;
}

Expand Down Expand Up @@ -733,21 +744,20 @@ const string InternalDumpCommand::ARG_INPUT_KEY_HEX = "input_key_hex";

InternalDumpCommand::InternalDumpCommand(const vector<string>& params,
const map<string, string>& options,
const vector<string>& flags) :
LDBCommand(options, flags, true,
BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX,
ARG_FROM, ARG_TO, ARG_MAX_KEYS,
ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_INPUT_KEY_HEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
delim_("."),
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {

const vector<string>& flags)
: LDBCommand(
options, flags, true,
BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
delim_("."),
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {
has_from_ = ParseStringOption(options, ARG_FROM, &from_);
has_to_ = ParseStringOption(options, ARG_TO, &to_);

Expand Down Expand Up @@ -891,21 +901,20 @@ const string DBDumperCommand::ARG_STATS = "stats";
const string DBDumperCommand::ARG_TTL_BUCKET = "bucket";

DBDumperCommand::DBDumperCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
LDBCommand(options, flags, true,
BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX,
ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_TTL_START,
ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {

const map<string, string>& options,
const vector<string>& flags)
: LDBCommand(options, flags, true,
BuildCmdLineOptions(
{ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM,
ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP, ARG_PATH})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {
map<string, string>::const_iterator itr = options.find(ARG_FROM);
if (itr != options.end()) {
null_from_ = false;
Expand Down Expand Up @@ -954,6 +963,11 @@ DBDumperCommand::DBDumperCommand(const vector<string>& params,
to_ = HexToString(to_);
}
}

itr = options.find(ARG_PATH);
if (itr != options.end()) {
path_ = itr->second;
}
}

void DBDumperCommand::Help(string& ret) {
Expand All @@ -969,13 +983,63 @@ void DBDumperCommand::Help(string& ret) {
ret.append(" [--" + ARG_TTL_BUCKET + "=<N>]");
ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]");
ret.append("\n");
}

/**
* Handles two separate cases:
*
* 1) --db is specified - just dump the database.
*
* 2) --path is specified - determine based on file extension what dumping
* function to call. Please note that we intentionally use the extension
* and avoid probing the file contents under the assumption that renaming
* the files is not a supported scenario.
*
*/
void DBDumperCommand::DoCommand() {
if (!db_) {
return;
assert(!path_.empty());
string fileName = GetFileNameFromPath(path_);
uint64_t number;
FileType type;

exec_state_ = LDBCommandExecuteResult::Succeed("");

if (!ParseFileName(fileName, &number, &type)) {
exec_state_ =
LDBCommandExecuteResult::Failed("Can't parse file type: " + path_);
return;
}

switch (type) {
case kLogFile:
DumpWalFile(path_, /* print_header_ */ true, /* print_values_ */ true,
&exec_state_);
break;
case kTableFile:
DumpSstFile(path_, is_key_hex_, /* show_properties */ true);
break;
case kDescriptorFile:
DumpManifestFile(path_, /* verbose_ */ false, is_key_hex_,
/* json_ */ false);
break;
default:
exec_state_ = LDBCommandExecuteResult::Failed(
"File type not supported: " + path_);
break;
}

} else {
DoDumpCommand();
}
}

void DBDumperCommand::DoDumpCommand() {
assert(nullptr != db_);
assert(path_.empty());

// Parse command line args
uint64_t count = 0;
if (print_stats_) {
Expand Down
30 changes: 23 additions & 7 deletions tools/ldb_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class LDBCommand {

// Command-line arguments
static const string ARG_DB;
static const string ARG_PATH;
static const string ARG_HEX;
static const string ARG_KEY_HEX;
static const string ARG_VALUE_HEX;
Expand Down Expand Up @@ -90,10 +91,8 @@ class LDBCommand {
}

virtual ~LDBCommand() {
if (db_ != nullptr) {
delete db_;
db_ = nullptr;
}
delete db_;
db_ = nullptr;
}

/* Run the command, and return the execute result. */
Expand All @@ -104,12 +103,12 @@ class LDBCommand {

if (db_ == nullptr && !NoDBOpen()) {
OpenDB();
if (!exec_state_.IsNotStarted()) {
return;
}
}

// We'll intentionally proceed even if the DB can't be opened because users
// can also specify a filename, not just a directory.
DoCommand();

if (exec_state_.IsNotStarted()) {
exec_state_ = LDBCommandExecuteResult::Succeed("");
}
Expand Down Expand Up @@ -441,6 +440,22 @@ class DBDumperCommand: public LDBCommand {
virtual void DoCommand() override;

private:
/**
* Extract file name from the full path. We handle both the forward slash (/)
* and backslash (\) to make sure that different OS-s are supported.
*/
static string GetFileNameFromPath(const string& s) {
std::size_t n = s.find_last_of("/\\");

if (std::string::npos == n) {
return s;
} else {
return s.substr(n + 1);
}
}

void DoDumpCommand();

bool null_from_;
string from_;
bool null_to_;
Expand All @@ -450,6 +465,7 @@ class DBDumperCommand: public LDBCommand {
bool count_only_;
bool count_delim_;
bool print_stats_;
string path_;

static const string ARG_COUNT_ONLY;
static const string ARG_COUNT_DELIM;
Expand Down
53 changes: 53 additions & 0 deletions tools/ldb_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,12 @@ def testDumpLiveFiles(self):
def getManifests(self, directory):
return glob.glob(directory + "/MANIFEST-*")

def getSSTFiles(self, directory):
return glob.glob(directory + "/*.sst")

def getWALFiles(self, directory):
return glob.glob(directory + "/*.log")

def copyManifests(self, src, dest):
return 0 == run_err_null("cp " + src + " " + dest)

Expand Down Expand Up @@ -439,6 +445,53 @@ def testManifestDump(self):
% (dbPath, manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)
# Make sure that using the dump with --path will result in identical
# output as just using manifest_dump.
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)

def testSSTDump(self):
print "Running testSSTDump..."

dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK")
self.assertRunOK("put sst2 sst2_val", "OK")
self.assertRunOK("get sst1", "sst1_val")

# Pattern to expect from SST dump.
regex = ".*Sst file format:.*"
expected_pattern = re.compile(regex)

sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (sst_files[0]),
expected_pattern, unexpected=False,
isPattern=True)

def testWALDump(self):
print "Running testWALDump..."

dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put wal1 wal1_val --create_if_missing", "OK")
self.assertRunOK("put wal2 wal2_val", "OK")
self.assertRunOK("get wal1", "wal1_val")

# Pattern to expect from WAL dump.
regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*"
expected_pattern = re.compile(regex)

wal_files = self.getWALFiles(dbPath)
self.assertTrue(len(wal_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (wal_files[0]),
expected_pattern, unexpected=False,
isPattern=True)

def testListColumnFamilies(self):
print "Running testListColumnFamilies..."
Expand Down

0 comments on commit b1a3b4c

Please sign in to comment.