diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 6d2327fc7b..1dd63a193d 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -48,6 +48,7 @@ #include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/ref_counted.h" #include "kudu/gutil/strings/split.h" +#include "kudu/gutil/strings/strip.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/integration-tests/cluster_itest_util.h" #include "kudu/integration-tests/external_mini_cluster.h" @@ -151,11 +152,11 @@ class ToolTest : public KuduTest { Status s = Subprocess::Call(args, "", &out, &err); if (stdout) { *stdout = out; - StripWhiteSpace(stdout); + StripTrailingNewline(stdout); } if (stderr) { *stderr = err; - StripWhiteSpace(stderr); + StripTrailingNewline(stderr); } if (stdout_lines) { *stdout_lines = strings::Split(out, "\n", strings::SkipEmpty()); @@ -386,6 +387,7 @@ TEST_F(ToolTest, TestModeHelp) { { const vector kLocalReplicaModeRegexes = { "cmeta.*Operate on a local tablet replica's consensus", + "data_size.*Summarize the data size", "dump.*Dump a Kudu filesystem", "copy_from_remote.*Copy a tablet replica", "delete.*Delete a tablet replica from the local filesystem", @@ -1108,6 +1110,58 @@ TEST_F(ToolTest, TestLocalReplicaOps) { ASSERT_STR_CONTAINS(stdout, "Superblock:"); ASSERT_STR_CONTAINS(stdout, debug_str); } + { + string stdout; + NO_FATALS(RunActionStdoutString( + Substitute("local_replica data_size $0 $1", + kTestTablet, fs_paths), &stdout)); + SCOPED_TRACE(stdout); + + string expected = R"( + table id | tablet id | rowset id | block type | size +-----------------+-------------+-----------+------------------+------ + KuduTableTestId | test-tablet | 0 | c10 (key) | 164B + KuduTableTestId | test-tablet | 0 | c11 (int_val) | 113B + KuduTableTestId | test-tablet | 0 | c12 (string_val) | 138B + KuduTableTestId | test-tablet | 0 | REDO | 0B + KuduTableTestId | test-tablet | 0 | UNDO | 169B + KuduTableTestId | test-tablet | 0 | BLOOM | 4.1K + KuduTableTestId | test-tablet | 0 | PK | 0B + KuduTableTestId | test-tablet | 0 | * | 4.6K + KuduTableTestId | test-tablet | * | c10 (key) | 164B + KuduTableTestId | test-tablet | * | c11 (int_val) | 113B + KuduTableTestId | test-tablet | * | c12 (string_val) | 138B + KuduTableTestId | test-tablet | * | REDO | 0B + KuduTableTestId | test-tablet | * | UNDO | 169B + KuduTableTestId | test-tablet | * | BLOOM | 4.1K + KuduTableTestId | test-tablet | * | PK | 0B + KuduTableTestId | test-tablet | * | * | 4.6K + KuduTableTestId | * | * | c10 (key) | 164B + KuduTableTestId | * | * | c11 (int_val) | 113B + KuduTableTestId | * | * | c12 (string_val) | 138B + KuduTableTestId | * | * | REDO | 0B + KuduTableTestId | * | * | UNDO | 169B + KuduTableTestId | * | * | BLOOM | 4.1K + KuduTableTestId | * | * | PK | 0B + KuduTableTestId | * | * | * | 4.6K +)"; + // Preprocess stdout and our expected table so that we are less + // sensitive to small variations in encodings, id assignment, etc. + for (string* p : {&stdout, &expected}) { + // Replace any string of digits with a single '#'. + StripString(p, "0123456789.", '#'); + StripDupCharacters(p, '#', 0); + // Collapse whitespace to a single space. + StripDupCharacters(p, ' ', 0); + // Strip the leading and trailing whitespace. + StripWhiteSpace(p); + // Collapse '-'s to a single '-' so that different width columns + // don't change the width of the header line. + StripDupCharacters(p, '-', 0); + } + + EXPECT_EQ(stdout, expected); + } { string stdout; NO_FATALS(RunActionStdoutString(Substitute("local_replica list $0", diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc index 0cb438c423..0c92eeb8c4 100644 --- a/src/kudu/tools/tool_action_local_replica.cc +++ b/src/kudu/tools/tool_action_local_replica.cc @@ -131,6 +131,11 @@ const char* const kSeparatorLine = const char* const kTermArg = "term"; +const char* const kTabletIdGlobArg = "tablet_id_pattern"; +const char* const kTabletIdGlobArgDesc = "Tablet identifier pattern. " + "This argument supports basic glob syntax: '*' matches 0 or more wildcard " + "characters."; + string Indent(int indent) { return string(indent, ' '); } @@ -371,6 +376,126 @@ Status DeleteLocalReplica(const RunnerContext& context) { return Status::OK(); } +Status SummarizeSize(FsManager* fs, + const vector& blocks, + StringPiece block_type, + int64_t* running_sum) { + int64_t local_sum = 0; + for (const auto& b : blocks) { + unique_ptr rb; + RETURN_NOT_OK_PREPEND(fs->OpenBlock(b, &rb), + Substitute("could not open block $0", b.ToString())); + uint64_t size = 0; + RETURN_NOT_OK_PREPEND(rb->Size(&size), + Substitute("could not get size for block $0", b.ToString())); + local_sum += size; + if (VLOG_IS_ON(1)) { + cout << Substitute("$0 block $1: $2 bytes $3", + block_type, b.ToString(), + size, HumanReadableNumBytes::ToString(size)) << endl; + } + } + *running_sum += local_sum; + return Status::OK(); +} + +namespace { +struct TabletSizeStats { + int64_t redo_bytes = 0; + int64_t undo_bytes = 0; + int64_t bloom_bytes = 0; + int64_t pk_index_bytes = 0; + map column_bytes; + + void Add(const TabletSizeStats& other) { + redo_bytes += other.redo_bytes; + undo_bytes += other.undo_bytes; + bloom_bytes += other.bloom_bytes; + pk_index_bytes += other.pk_index_bytes; + for (const auto& p : other.column_bytes) { + column_bytes[p.first] += p.second; + } + } + + void AddToTable(const string& table_id, + const string& tablet_id, + const string& rowset_id, + DataTable* table) const { + vector> to_print(column_bytes.begin(), column_bytes.end()); + to_print.emplace_back("REDO", redo_bytes); + to_print.emplace_back("UNDO", undo_bytes); + to_print.emplace_back("BLOOM", bloom_bytes); + to_print.emplace_back("PK", pk_index_bytes); + + int64_t total = 0; + for (const auto& e : to_print) { + table->AddRow({table_id, tablet_id, rowset_id, e.first, + HumanReadableNumBytes::ToString(e.second)}); + total += e.second; + } + table->AddRow({table_id, tablet_id, rowset_id, "*", HumanReadableNumBytes::ToString(total)}); + } +}; +} // anonymous namespace + +Status SummarizeDataSize(const RunnerContext& context) { + const string& tablet_id_pattern = FindOrDie(context.required_args, kTabletIdGlobArg); + unique_ptr fs; + RETURN_NOT_OK(FsInit(&fs)); + + vector tablets; + RETURN_NOT_OK(fs->ListTabletIds(&tablets)); + + unordered_map size_stats_by_table_id; + + DataTable output_table({ "table id", "tablet id", "rowset id", "block type", "size" }); + + for (const string& tablet_id : tablets) { + TabletSizeStats tablet_stats; + if (!MatchPattern(tablet_id, tablet_id_pattern)) continue; + scoped_refptr meta; + RETURN_NOT_OK_PREPEND(TabletMetadata::Load(fs.get(), tablet_id, &meta), + Substitute("could not load tablet metadata for $0", tablet_id)); + const string& table_id = meta->table_id(); + for (const shared_ptr& rs_meta : meta->rowsets()) { + TabletSizeStats rowset_stats; + RETURN_NOT_OK(SummarizeSize(fs.get(), rs_meta->redo_delta_blocks(), + "REDO", &rowset_stats.redo_bytes)); + RETURN_NOT_OK(SummarizeSize(fs.get(), rs_meta->undo_delta_blocks(), + "UNDO", &rowset_stats.undo_bytes)); + RETURN_NOT_OK(SummarizeSize(fs.get(), { rs_meta->bloom_block() }, + "Bloom", &rowset_stats.bloom_bytes)); + if (rs_meta->has_adhoc_index_block()) { + RETURN_NOT_OK(SummarizeSize(fs.get(), { rs_meta->adhoc_index_block() }, + "PK index", &rowset_stats.pk_index_bytes)); + } + const auto& column_blocks_by_id = rs_meta->GetColumnBlocksById(); + for (const auto& e : column_blocks_by_id) { + const auto& col_id = e.first; + const auto& block = e.second; + const auto& col_idx = meta->schema().find_column_by_id(col_id); + string col_key = Substitute( + "c$0 ($1)", col_id, + (col_idx != Schema::kColumnNotFound) ? + meta->schema().column(col_idx).name() : "?"); + RETURN_NOT_OK(SummarizeSize( + fs.get(), { block }, col_key, &rowset_stats.column_bytes[col_key])); + } + rowset_stats.AddToTable(table_id, tablet_id, std::to_string(rs_meta->id()), &output_table); + tablet_stats.Add(rowset_stats); + } + tablet_stats.AddToTable(table_id, tablet_id, "*", &output_table); + size_stats_by_table_id[table_id].Add(tablet_stats); + } + for (const auto& e : size_stats_by_table_id) { + const auto& table_id = e.first; + const auto& stats = e.second; + stats.AddToTable(table_id, "*", "*", &output_table); + } + RETURN_NOT_OK(output_table.PrintTo(cout)); + return Status::OK(); +} + Status DumpWals(const RunnerContext& context) { unique_ptr fs_manager; RETURN_NOT_OK(FsInit(&fs_manager)); @@ -847,10 +972,20 @@ unique_ptr BuildLocalReplicaMode() { .AddOptionalParameter("clean_unsafe") .Build(); + unique_ptr data_size = + ActionBuilder("data_size", &SummarizeDataSize) + .Description("Summarize the data size/space usage of the given local replica(s).") + .AddRequiredParameter({ kTabletIdGlobArg, kTabletIdGlobArgDesc }) + .AddOptionalParameter("fs_wal_dir") + .AddOptionalParameter("fs_data_dirs") + .AddOptionalParameter("format") + .Build(); + return ModeBuilder("local_replica") .Description("Operate on local tablet replicas via the local filesystem") .AddMode(std::move(cmeta)) .AddAction(std::move(copy_from_remote)) + .AddAction(std::move(data_size)) .AddAction(std::move(delete_local_replica)) .AddAction(std::move(list)) .AddMode(BuildDumpMode())