From cf0eb4ddae4fbcff23cc1a39171ce7ae997f1997 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Thu, 25 Aug 2016 18:23:50 -0700 Subject: [PATCH] ksck: colorize and clean up output Dan and I were looking at some ksck output earlier and found it somewhat hard to read. This colorizes the output and also cleans up the format to be slightly less messy. The output now goes to stdout instead of stderr as well, so it's easier to pipe to a file or 'less', which is a common use case. Example output: Tablet 0e4e79de2c2547c091779b13735d2b73 of table 'my_table' is under-replicated: 1 replica(s) not RUNNING 63c4785c3b6f47f5a1c23ffbf6e52b71 (ts-023.foo.com:7050): RUNNING cd20c68cd23c4cf986eda0936a5691cc (ts-004.foo.com:7050): RUNNING [LEADER] 5edf82f0516b4897b3a7991a7e67d71c (ts-028.foo.com:7050): bad state State: NOT_STARTED Data state: TABLET_DATA_COPYING Last status: TabletCopy: Downloading block 4611685629730158289 (3491/9569) Change-Id: Icc5196d63cbcbcbb2a9aba1ff17377b678c104bd Reviewed-on: http://gerrit.cloudera.org:8080/4129 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo --- src/kudu/tools/CMakeLists.txt | 2 + src/kudu/tools/color.cc | 80 +++++++++ src/kudu/tools/color.h | 37 ++++ src/kudu/tools/ksck-test.cc | 70 ++++---- src/kudu/tools/ksck.cc | 247 ++++++++++++++------------ src/kudu/tools/ksck.h | 9 +- src/kudu/tools/tool_action_cluster.cc | 1 + 7 files changed, 299 insertions(+), 147 deletions(-) create mode 100644 src/kudu/tools/color.cc create mode 100644 src/kudu/tools/color.h diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt index 614bf11b3b..fe95bad67d 100644 --- a/src/kudu/tools/CMakeLists.txt +++ b/src/kudu/tools/CMakeLists.txt @@ -30,6 +30,7 @@ set(LINK_LIBS ) add_library(kudu_tools_util + color.cc data_gen_util.cc) target_link_libraries(kudu_tools_util ${LINK_LIBS}) @@ -75,6 +76,7 @@ add_library(ksck ) target_link_libraries(ksck consensus + kudu_tools_util master_proto server_base_proto tserver_proto diff --git a/src/kudu/tools/color.cc b/src/kudu/tools/color.cc new file mode 100644 index 0000000000..e90761d4a5 --- /dev/null +++ b/src/kudu/tools/color.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/tools/color.h" + +#include +#include +#include + +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/flag_tags.h" + +DEFINE_string(color, "auto", + "Specifies whether output should be colorized. The default value " + "'auto' colorizes output if the output is a terminal. The other " + "valid values are 'always' or 'never'."); +TAG_FLAG(color, stable); + +static bool ValidateColorFlag(const char* flagname, const string& value) { + if (value == "always" || + value == "auto" || + value == "never") { + return true; + } + LOG(ERROR) << "option 'color' expects \"always\", \"auto\", or \"never\""; + return false; +} +static bool dummy = google::RegisterFlagValidator( + &FLAGS_color, &ValidateColorFlag); + + +namespace kudu { +namespace tools { + +namespace { +bool UseColor() { + if (FLAGS_color == "never") return false; + if (FLAGS_color == "always") return true; + return isatty(STDOUT_FILENO); +} + +const char* StringForCode(AnsiCode color) { + if (!UseColor()) return ""; + + // Codes from: https://en.wikipedia.org/wiki/ANSI_escape_code + switch (color) { + case AnsiCode::RED: return "\x1b[31m"; + case AnsiCode::GREEN: return "\x1b[32m"; + case AnsiCode::YELLOW: return "\x1b[33m"; + case AnsiCode::BLUE: return "\x1b[34m"; + case AnsiCode::RESET: return "\x1b[m"; + } + LOG(FATAL); + return ""; +} +} // anonymous namespace + +string Color(AnsiCode color, StringPiece s) { + return strings::Substitute("$0$1$2", + StringForCode(color), + s, + StringForCode(AnsiCode::RESET)); +} + +} // namespace tools +} // namespace kudu diff --git a/src/kudu/tools/color.h b/src/kudu/tools/color.h new file mode 100644 index 0000000000..ef0afbe2ec --- /dev/null +++ b/src/kudu/tools/color.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include + +#include "kudu/gutil/strings/stringpiece.h" + +namespace kudu { +namespace tools { + +enum class AnsiCode { + RED, + YELLOW, + GREEN, + BLUE, + RESET +}; + +std::string Color(AnsiCode color, StringPiece s); + +} // namespace tools +} // namespace kudu diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc index 1a7a040923..aae4656a40 100644 --- a/src/kudu/tools/ksck-test.cc +++ b/src/kudu/tools/ksck-test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -25,6 +26,8 @@ #include "kudu/util/scoped_cleanup.h" #include "kudu/util/test_util.h" +DECLARE_string(color); + namespace kudu { namespace tools { @@ -112,6 +115,7 @@ class KsckTest : public KuduTest { : master_(new MockKsckMaster()), cluster_(new KsckCluster(static_pointer_cast(master_))), ksck_(new Ksck(cluster_)) { + FLAGS_color = "never"; unordered_map> tablet_servers; for (int i = 0; i < 3; i++) { string name = Substitute("ts-id-$0", i); @@ -289,25 +293,22 @@ TEST_F(KsckTest, TestBadTabletServer) { "ts-id-1 (): Network error: Network failure"); ASSERT_STR_CONTAINS( err_stream_.str(), - "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n" - "------------------------------------------------------------\n" - "WARNING: Should have a replica on TS ts-id-1 (), but TS is unavailable\n" - "INFO: OK state on TS ts-id-0 (): RUNNING\n" - "INFO: OK state on TS ts-id-2 (): RUNNING\n"); + "Tablet tablet-id-0 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" + " ts-id-0 (): RUNNING [LEADER]\n" + " ts-id-1 (): TS unavailable\n" + " ts-id-2 (): RUNNING\n"); ASSERT_STR_CONTAINS( err_stream_.str(), - "WARNING: Detected problems with Tablet tablet-id-1 of table 'test'\n" - "------------------------------------------------------------\n" - "WARNING: Should have a replica on TS ts-id-1 (), but TS is unavailable\n" - "INFO: OK state on TS ts-id-0 (): RUNNING\n" - "INFO: OK state on TS ts-id-2 (): RUNNING\n"); + "Tablet tablet-id-1 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" + " ts-id-0 (): RUNNING [LEADER]\n" + " ts-id-1 (): TS unavailable\n" + " ts-id-2 (): RUNNING\n"); ASSERT_STR_CONTAINS( err_stream_.str(), - "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n" - "------------------------------------------------------------\n" - "WARNING: Should have a replica on TS ts-id-1 (), but TS is unavailable\n" - "INFO: OK state on TS ts-id-0 (): RUNNING\n" - "INFO: OK state on TS ts-id-2 (): RUNNING\n"); + "Tablet tablet-id-2 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n" + " ts-id-0 (): RUNNING [LEADER]\n" + " ts-id-1 (): TS unavailable\n" + " ts-id-2 (): RUNNING\n"); } TEST_F(KsckTest, TestZeroTabletReplicasCheck) { @@ -340,7 +341,7 @@ TEST_F(KsckTest, TestOneSmallReplicatedTable) { Status s = ksck_->ChecksumData(ChecksumOptions()); EXPECT_EQ("Not found: No table found. Filter: table_filters=xyz", s.ToString()); ASSERT_STR_CONTAINS(err_stream_.str(), - "INFO: The cluster doesn't have any matching tables"); + "The cluster doesn't have any matching tables"); // Test filtering with a matching table pattern. err_stream_.str(""); @@ -365,7 +366,8 @@ TEST_F(KsckTest, TestOneOneTabletBrokenTable) { Status s = RunKsck(); EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); ASSERT_STR_CONTAINS(err_stream_.str(), - "Tablet tablet-id-1 of table 'test' has 2 instead of 3 replicas"); + "Tablet tablet-id-1 of table 'test' is under-replicated: " + "configuration has 2 replicas vs desired 3"); } TEST_F(KsckTest, TestMismatchedAssignments) { @@ -377,9 +379,11 @@ TEST_F(KsckTest, TestMismatchedAssignments) { Status s = RunKsck(); EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); ASSERT_STR_CONTAINS(err_stream_.str(), - "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n" - "------------------------------------------------------------\n" - "WARNING: Missing a tablet replica on tablet server ts-id-0 ()\n"); + "Tablet tablet-id-2 of table 'test' is under-replicated: " + "1 replica(s) not RUNNING\n" + " ts-id-0 (): missing [LEADER]\n" + " ts-id-1 (): RUNNING\n" + " ts-id-2 (): RUNNING\n"); } TEST_F(KsckTest, TestTabletNotRunning) { @@ -389,19 +393,19 @@ TEST_F(KsckTest, TestTabletNotRunning) { EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString()); ASSERT_STR_CONTAINS( err_stream_.str(), - "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n" - "------------------------------------------------------------\n" - "WARNING: Bad state on TS ts-id-0 (): FAILED\n" - " Last status: \n" - " Data state: TABLET_DATA_UNKNOWN\n" - "WARNING: Bad state on TS ts-id-1 (): FAILED\n" - " Last status: \n" - " Data state: TABLET_DATA_UNKNOWN\n" - "WARNING: Bad state on TS ts-id-2 (): FAILED\n" - " Last status: \n" - " Data state: TABLET_DATA_UNKNOWN\n" - "ERROR: Tablet tablet-id-0 of table 'test' does not have a majority of " - "replicas in RUNNING state\n"); + "Tablet tablet-id-0 of table 'test' is unavailable: 3 replica(s) not RUNNING\n" + " ts-id-0 (): bad state [LEADER]\n" + " State: FAILED\n" + " Data state: TABLET_DATA_UNKNOWN\n" + " Last status: \n" + " ts-id-1 (): bad state\n" + " State: FAILED\n" + " Data state: TABLET_DATA_UNKNOWN\n" + " Last status: \n" + " ts-id-2 (): bad state\n" + " State: FAILED\n" + " Data state: TABLET_DATA_UNKNOWN\n" + " Last status: \n"); } } // namespace tools diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc index 8d7e4f5c1e..7165280e47 100644 --- a/src/kudu/tools/ksck.cc +++ b/src/kudu/tools/ksck.cc @@ -18,8 +18,10 @@ #include "kudu/tools/ksck.h" #include +#include #include #include +#include #include #include "kudu/consensus/quorum_util.h" @@ -29,6 +31,7 @@ #include "kudu/gutil/strings/human_readable.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/gutil/strings/util.h" +#include "kudu/tools/color.h" #include "kudu/util/atomic.h" #include "kudu/util/blocking_queue.h" #include "kudu/util/locks.h" @@ -38,7 +41,6 @@ namespace kudu { namespace tools { -using std::cerr; using std::cout; using std::endl; using std::ostream; @@ -60,26 +62,23 @@ DEFINE_uint64(checksum_snapshot_timestamp, ChecksumOptions::kCurrentTimestamp, DEFINE_int32(fetch_replica_info_concurrency, 20, "Number of concurrent tablet servers to fetch replica info from."); -// The stream to write output to. If this is NULL, defaults to cerr. +// The stream to write output to. If this is NULL, defaults to cout. // This is used by tests to capture output. ostream* g_err_stream = NULL; -// Print an informational message to cerr. +// Print an informational message to cout. static ostream& Out() { - return (g_err_stream ? *g_err_stream : cerr); -} -static ostream& Info() { - return Out() << "INFO: "; + return (g_err_stream ? *g_err_stream : cout); } -// Print a warning message to cerr. +// Print a warning message to cout. static ostream& Warn() { - return Out() << "WARNING: "; + return Out() << Color(AnsiCode::YELLOW, "WARNING: "); } -// Print an error message to cerr. +// Print an error message to cout. static ostream& Error() { - return Out() << "ERROR: "; + return Out() << Color(AnsiCode::RED, "WARNING: "); } namespace { @@ -151,7 +150,7 @@ Status Ksck::CheckMasterRunning() { VLOG(1) << "Connecting to the Master"; Status s = cluster_->master()->Connect(); if (s.ok()) { - Info() << "Connected to the Master" << endl; + Out() << "Connected to the Master" << endl; } return s; } @@ -188,7 +187,7 @@ Status Ksck::FetchInfoFromTabletServers() { pool->Wait(); if (bad_servers.Load() == 0) { - Info() << Substitute("Fetched info from all $0 Tablet Servers", servers_count) << endl; + Out() << Substitute("Fetched info from all $0 Tablet Servers", servers_count) << endl; return Status::OK(); } else { Warn() << Substitute("Fetched info from $0 Tablet Servers, $1 weren't reachable", @@ -221,15 +220,16 @@ Status Ksck::CheckTablesConsistency() { if (!VerifyTable(table)) { bad_tables_count++; } + Out() << endl; } if (tables_checked == 0) { - Info() << "The cluster doesn't have any matching tables" << endl; + Out() << "The cluster doesn't have any matching tables" << endl; return Status::OK(); } if (bad_tables_count == 0) { - Info() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked) << endl; + Out() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked) << endl; return Status::OK(); } else { Warn() << Substitute("$0 out of $1 table(s) are not in a healthy state", @@ -289,7 +289,7 @@ class ChecksumResultReporter : public RefCountedThreadSafe& table) { - bool good_table = true; const auto all_tablets = table->tablets(); vector> tablets; std::copy_if(all_tablets.begin(), all_tablets.end(), std::back_inserter(tablets), @@ -555,124 +554,146 @@ bool Ksck::VerifyTable(const shared_ptr& table) { }); int table_num_replicas = table->num_replicas(); - VLOG(1) << Substitute("Verifying $0 tablets for table $1 configured with num_replicas = $2", + VLOG(1) << Substitute("Verifying $0 tablet(s) for table $1 configured with num_replicas = $2", tablets.size(), table->name(), table_num_replicas); - int bad_tablets_count = 0; - for (const shared_ptr &tablet : tablets) { - if (!VerifyTablet(tablet, table_num_replicas)) { - bad_tablets_count++; - } + map result_counts; + for (const auto& tablet : tablets) { + auto tablet_result = VerifyTablet(tablet, table_num_replicas); + result_counts[tablet_result]++; } - if (bad_tablets_count == 0) { - Info() << Substitute("Table $0 is HEALTHY ($1 tablets checked)", - table->name(), tablets.size()) << endl; + if (result_counts[CheckResult::OK] == tablets.size()) { + Out() << Substitute("Table $0 is $1 ($2 tablet(s) checked)", + table->name(), + Color(AnsiCode::GREEN, "HEALTHY"), + tablets.size()) << endl; + return true; } else { - Warn() << Substitute("Table $0 has $1 bad tablets", table->name(), bad_tablets_count) << endl; - good_table = false; + if (result_counts[CheckResult::UNAVAILABLE] > 0) { + Out() << Substitute("Table $0 has $1 $2 tablet(s)", + table->name(), + result_counts[CheckResult::UNAVAILABLE], + Color(AnsiCode::RED, "unavailable")) << endl; + } + if (result_counts[CheckResult::UNDER_REPLICATED] > 0) { + Out() << Substitute("Table $0 has $1 $2 tablet(s)", + table->name(), + result_counts[CheckResult::UNDER_REPLICATED], + Color(AnsiCode::YELLOW, "under-replicated")) << endl; + } + return false; } - return good_table; } -bool Ksck::VerifyTablet(const shared_ptr& tablet, int table_num_replicas) { - string tablet_str = Substitute("Tablet $0 of table '$1'", +Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr& tablet, int table_num_replicas) { + const string tablet_str = Substitute("Tablet $0 of table '$1'", tablet->id(), tablet->table()->name()); - vector > replicas = tablet->replicas(); - vector warnings, errors, infos; - if (check_replica_count_ && replicas.size() != table_num_replicas) { - warnings.push_back(Substitute("$0 has $1 instead of $2 replicas", - tablet_str, replicas.size(), table_num_replicas)); - } - int leaders_count = 0; - int followers_count = 0; - int alive_count = 0; - int running_count = 0; - for (const shared_ptr replica : replicas) { + + // Consolidate the state of each replica into a simple struct for easier analysis. + struct ReplicaState { + KsckTabletReplica* replica; + KsckTabletServer* ts = nullptr; + tablet::TabletStatePB state = tablet::UNKNOWN; + boost::optional status_pb; + }; + + vector replica_states; + for (const shared_ptr replica : tablet->replicas()) { + replica_states.emplace_back(); + auto* repl_state = &replica_states.back(); + repl_state->replica = replica.get(); + VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1", tablet->id(), replica->ts_uuid()); // Check for agreement on tablet assignment and state between the master // and the tablet server. auto ts = FindPtrOrNull(cluster_->tablet_servers(), replica->ts_uuid()); - if (ts && ts->is_healthy()) { - alive_count++; - auto state = ts->ReplicaState(tablet->id()); - if (state != tablet::UNKNOWN) { - VLOG(1) << Substitute("Tablet server $0 agrees that it hosts a replica of $1", - ts->ToString(), tablet_str); - } + repl_state->ts = ts.get(); - switch (state) { - case tablet::RUNNING: - VLOG(1) << Substitute("Tablet replica for $0 on TS $1 is RUNNING", - tablet_str, ts->ToString()); - running_count++; - infos.push_back(Substitute("OK state on TS $0: $1", - ts->ToString(), tablet::TabletStatePB_Name(state))); - break; - - case tablet::UNKNOWN: - warnings.push_back(Substitute("Missing a tablet replica on tablet server $0", - ts->ToString())); - break; - - default: { - const auto& status_pb = ts->tablet_status_map().at(tablet->id()); - warnings.push_back( - Substitute("Bad state on TS $0: $1\n" - " Last status: $2\n" - " Data state: $3", - ts->ToString(), tablet::TabletStatePB_Name(state), - status_pb.last_status(), - tablet::TabletDataState_Name(status_pb.tablet_data_state()))); - break; - } + if (ts && ts->is_healthy()) { + repl_state->state = ts->ReplicaState(tablet->id()); + if (ContainsKey(ts->tablet_status_map(), tablet->id())) { + repl_state->status_pb = ts->tablet_status_map().at(tablet->id()); } - } else { - // no TS or unhealthy TS - warnings.push_back(Substitute("Should have a replica on TS $0, but TS is unavailable", - ts ? ts->ToString() : replica->ts_uuid())); - } - if (replica->is_leader()) { - VLOG(1) << Substitute("Replica at $0 is a LEADER", replica->ts_uuid()); - leaders_count++; - } else if (replica->is_follower()) { - VLOG(1) << Substitute("Replica at $0 is a FOLLOWER", replica->ts_uuid()); - followers_count++; } } - if (leaders_count == 0) { - errors.push_back("No leader detected"); - } - VLOG(1) << Substitute("$0 has $1 leader and $2 followers", - tablet_str, leaders_count, followers_count); - int majority_size = consensus::MajoritySize(table_num_replicas); - if (alive_count < majority_size) { - errors.push_back(Substitute("$0 does not have a majority of replicas on live tablet servers", - tablet_str)); - } else if (running_count < majority_size) { - errors.push_back(Substitute("$0 does not have a majority of replicas in RUNNING state", - tablet_str)); - } - - bool has_issues = !warnings.empty() || !errors.empty(); - if (has_issues) { - Warn() << "Detected problems with " << tablet_str << endl - << "------------------------------------------------------------" << endl; - for (const auto& s : warnings) { - Warn() << s << endl; + + // Summarize the states. + int leaders_count = 0; + int running_count = 0; + for (const auto& r : replica_states) { + if (r.replica->is_leader()) { + leaders_count++; } - for (const auto& s : errors) { - Error() << s << endl; + if (r.state == tablet::RUNNING) { + running_count++; } - // We only print the 'INFO' messages on tablets that have some issues. - // Otherwise, it's a bit verbose. - for (const auto& s : infos) { - Info() << s << endl; + } + + // Determine the overall health state of the tablet. + CheckResult result = CheckResult::OK; + int num_voters = replica_states.size(); + int majority_size = consensus::MajoritySize(num_voters); + if (running_count < majority_size) { + Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING", + tablet_str, + Color(AnsiCode::RED, "unavailable"), + num_voters - running_count) << endl; + result = CheckResult::UNAVAILABLE; + } else if (running_count < num_voters) { + Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING", + tablet_str, + Color(AnsiCode::YELLOW, "under-replicated"), + num_voters - running_count) << endl; + result = CheckResult::UNDER_REPLICATED; + } else if (check_replica_count_ && num_voters < table_num_replicas) { + Out() << Substitute("$0 is $1: configuration has $2 replicas vs desired $3", + tablet_str, + Color(AnsiCode::YELLOW, "under-replicated"), + num_voters, + table_num_replicas) << endl; + result = CheckResult::UNDER_REPLICATED; + } else if (leaders_count != 1) { + Out() << Substitute("$0 is $1: expected one LEADER replica", + tablet_str, Color(AnsiCode::RED, "unavailable")) << endl; + result = CheckResult::UNAVAILABLE; + } + + // In the case that we found something wrong, dump info on all the replicas + // to make it easy to debug. + if (result != CheckResult::OK) { + for (const ReplicaState& r : replica_states) { + string ts_str = r.ts ? r.ts->ToString() : r.replica->ts_uuid(); + const char* leader_str = r.replica->is_leader() ? " [LEADER]" : ""; + + Out() << " " << ts_str << ": "; + if (!r.ts || !r.ts->is_healthy()) { + Out() << Color(AnsiCode::YELLOW, "TS unavailable") << leader_str << endl; + continue; + } + if (r.state == tablet::RUNNING) { + Out() << Color(AnsiCode::GREEN, "RUNNING") << leader_str << endl; + continue; + } + if (r.status_pb == boost::none) { + Out() << Color(AnsiCode::YELLOW, "missing") << leader_str << endl; + continue; + } + + Out() << Color(AnsiCode::YELLOW, "bad state") << leader_str << endl; + Out() << Substitute( + " State: $0\n" + " Data state: $1\n" + " Last status: $2\n", + Color(AnsiCode::BLUE, tablet::TabletStatePB_Name(r.state)), + Color(AnsiCode::BLUE, tablet::TabletDataState_Name(r.status_pb->tablet_data_state())), + Color(AnsiCode::BLUE, r.status_pb->last_status())); } + Out() << endl; } - return !has_issues; + return result; } } // namespace tools diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h index cb78686f9c..c51283114b 100644 --- a/src/kudu/tools/ksck.h +++ b/src/kudu/tools/ksck.h @@ -375,11 +375,18 @@ class Ksck { Status ChecksumData(const ChecksumOptions& options); private: + enum class CheckResult { + OK, + UNDER_REPLICATED, + UNAVAILABLE + }; + bool VerifyTable(const std::shared_ptr& table); bool VerifyTableWithTimeout(const std::shared_ptr& table, const MonoDelta& timeout, const MonoDelta& retry_interval); - bool VerifyTablet(const std::shared_ptr& tablet, int table_num_replicas); + CheckResult VerifyTablet(const std::shared_ptr& tablet, + int table_num_replicas); const std::shared_ptr cluster_; diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc index d198a9bbbc..93ed5db72a 100644 --- a/src/kudu/tools/tool_action_cluster.cc +++ b/src/kudu/tools/tool_action_cluster.cc @@ -135,6 +135,7 @@ unique_ptr BuildClusterMode() { .AddRequiredParameter({ "master_address", "Kudu Master RPC address of form hostname:port" }) .AddOptionalParameter("checksum_scan") .AddOptionalParameter("checksum_snapshot") + .AddOptionalParameter("color") .AddOptionalParameter("tables") .AddOptionalParameter("tablets") .Build();