Skip to content

Commit

Permalink
[ksck] KUDU-2964 Remove per server warn message when GetFlags call fails
Browse files Browse the repository at this point in the history
Logging individual master/tablet server warning message on
failure to fetch flags is unnecessarily verbose since
the cause is almost always the same, either the server is unavailable
or GetFlags RPC is not available because server is old.

There is already a summary of number of masters/tablet servers
that failed to respond to GetFlags request. So remove the per server
warning message.

Tests:
- Invoked ksck against KUDU version 1.4.0.

Warnings:
==================
master flag check error: 1 of 1 masters' flags were not available
tserver flag check error: 2 of 2 tservers' flags were not available

Change-Id: I06dd417b9bf5274f90bbdb9338600ea518e82809
Reviewed-on: http://gerrit.cloudera.org:8080/14460
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <[email protected]>
  • Loading branch information
bbhavsar authored and andrwng committed Oct 18, 2019
1 parent 524ed47 commit f5e657d
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 32 deletions.
78 changes: 65 additions & 13 deletions src/kudu/tools/ksck-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,15 @@ namespace tools {

class MockKsckMaster : public KsckMaster {
public:
explicit MockKsckMaster(const string& address, const string& uuid)
explicit MockKsckMaster(const string& address, const string& uuid, bool is_get_flags_available)
: KsckMaster(address),
fetch_info_status_(Status::OK()) {
fetch_info_status_(Status::OK()),
is_get_flags_available_(is_get_flags_available) {
uuid_ = uuid;
version_ = "mock-version";
flags_ = GetFlagsResponsePB{};
if (is_get_flags_available_) {
flags_ = GetFlagsResponsePB{};
}
}

Status Init() override {
Expand All @@ -109,8 +112,12 @@ class MockKsckMaster : public KsckMaster {
}

Status FetchUnusualFlags() override {
flags_state_ = KsckFetchState::FETCHED;
return Status::OK();
if (is_get_flags_available_) {
flags_state_ = KsckFetchState::FETCHED;
return Status::OK();
}
flags_state_ = KsckFetchState::FETCH_FAILED;
return Status::RemoteError("GetFlags not available");
}

// Public because the unit tests mutate these variables directly.
Expand All @@ -120,17 +127,22 @@ class MockKsckMaster : public KsckMaster {
using KsckMaster::cstate_;
using KsckMaster::flags_;
using KsckMaster::version_;
private:
const bool is_get_flags_available_;
};

class MockKsckTabletServer : public KsckTabletServer {
public:
explicit MockKsckTabletServer(const string& uuid)
explicit MockKsckTabletServer(const string& uuid, bool is_get_flags_available)
: KsckTabletServer(uuid),
fetch_info_status_(Status::OK()),
fetch_info_health_(ServerHealth::HEALTHY),
address_("<mock>") {
address_("<mock>"),
is_get_flags_available_(is_get_flags_available) {
version_ = "mock-version";
flags_ = GetFlagsResponsePB{};
if (is_get_flags_available_) {
flags_ = GetFlagsResponsePB{};
}
}

Status FetchInfo(ServerHealth* health) override {
Expand All @@ -150,8 +162,12 @@ class MockKsckTabletServer : public KsckTabletServer {
}

Status FetchUnusualFlags() override {
flags_state_ = KsckFetchState::FETCHED;
return Status::OK();
if (is_get_flags_available_) {
flags_state_ = KsckFetchState::FETCHED;
return Status::OK();
}
flags_state_ = KsckFetchState::FETCH_FAILED;
return Status::RemoteError("GetFlags not available");
}

void FetchCurrentTimestampAsync() override {}
Expand Down Expand Up @@ -189,6 +205,7 @@ class MockKsckTabletServer : public KsckTabletServer {

private:
const string address_;
const bool is_get_flags_available_;
};

class MockKsckCluster : public KsckCluster {
Expand Down Expand Up @@ -230,7 +247,9 @@ class KsckTest : public KuduTest {
: cluster_(new MockKsckCluster()),
ksck_(new Ksck(cluster_, &err_stream_)) {
FLAGS_color = "never";
}

void SetUp() override {
// Set up the master consensus state.
consensus::ConsensusStatePB cstate;
cstate.set_current_term(0);
Expand All @@ -244,15 +263,15 @@ class KsckTest : public KuduTest {
for (int i = 0; i < 3; i++) {
const string uuid = Substitute("master-id-$0", i);
const string addr = Substitute("master-$0", i);
shared_ptr<MockKsckMaster> master = std::make_shared<MockKsckMaster>(addr, uuid);
auto master = std::make_shared<MockKsckMaster>(addr, uuid, IsGetFlagsAvailable());
master->cstate_ = cstate;
cluster_->masters_.push_back(master);
}

KsckCluster::TSMap tablet_servers;
for (int i = 0; i < 3; i++) {
string name = Substitute("ts-id-$0", i);
shared_ptr<MockKsckTabletServer> ts(new MockKsckTabletServer(name));
auto ts = std::make_shared<MockKsckTabletServer>(name, IsGetFlagsAvailable());
InsertOrDie(&tablet_servers, ts->uuid(), ts);
}
cluster_->tablet_servers_.swap(tablet_servers);
Expand Down Expand Up @@ -425,6 +444,10 @@ class KsckTest : public KuduTest {
return json_stream.str();
}

virtual bool IsGetFlagsAvailable() const {
return true;
}

shared_ptr<MockKsckCluster> cluster_;
shared_ptr<Ksck> ksck_;
// This is used as a stack. First the unit test is responsible to create a plan to follow, that
Expand All @@ -437,6 +460,13 @@ class KsckTest : public KuduTest {
std::ostringstream err_stream_;
};

class GetFlagsUnavailableKsckTest : public KsckTest {
protected:
bool IsGetFlagsAvailable() const override {
return false;
}
};

// Helpful macros for checking JSON fields vs. expected values.
// In all cases, the meaning of the parameters are as follows:
// 'reader' is the JsonReader that owns the parsed JSON data.
Expand Down Expand Up @@ -887,6 +917,12 @@ void CheckJsonStringVsKsckResults(const string& json,
CheckJsonVsErrors(r, "errors", results.error_messages);
}

void CheckMessageNotPresent(const vector<Status>& messages, const string& msg) {
for (const auto& status : messages) {
ASSERT_STR_NOT_CONTAINS(status.ToString(), msg);
}
}

TEST_F(KsckTest, TestServersOk) {
ASSERT_OK(RunKsck());
const string err_string = err_stream_.str();
Expand Down Expand Up @@ -1076,6 +1112,14 @@ TEST_F(KsckTest, TestMasterFlagCheck) {
" hidden_unsafe_flag | 1 | hidden,unsafe | master-1");
}

TEST_F(GetFlagsUnavailableKsckTest, TestMasterFlagsUnavailable) {
ASSERT_OK(ksck_->CheckMasterHealth());
ASSERT_TRUE(ksck_->CheckMasterUnusualFlags().IsIncomplete());

static const string flags_msg = "unable to get flag information for master";
CheckMessageNotPresent(ksck_->results().warning_messages, flags_msg);
}

TEST_F(KsckTest, TestWrongUUIDTabletServer) {
CreateOneTableOneTablet();

Expand Down Expand Up @@ -1202,6 +1246,14 @@ TEST_F(KsckTest, TestTserverFlagCheck) {
" hidden_unsafe_flag | 1 | hidden,unsafe | <mock>");
}

TEST_F(GetFlagsUnavailableKsckTest, TestTserverFlagsUnavailable) {
ASSERT_OK(ksck_->FetchInfoFromTabletServers());
ASSERT_TRUE(ksck_->CheckTabletServerUnusualFlags().IsIncomplete());

static const string flags_msg = "unable to get flag information for tablet server";
CheckMessageNotPresent(ksck_->results().warning_messages, flags_msg);
}

TEST_F(KsckTest, TestOneTableCheck) {
CreateOneTableOneTablet();
FLAGS_checksum_scan = true;
Expand Down Expand Up @@ -1613,7 +1665,7 @@ TEST_F(KsckTest, TestChecksumWithAllPeersUnhealthy) {
const char* const new_uuid = "new";
EmplaceOrDie(&cluster_->tablet_servers_,
new_uuid,
std::make_shared<MockKsckTabletServer>(new_uuid));
std::make_shared<MockKsckTabletServer>(new_uuid, IsGetFlagsAvailable()));

// The checksum should fail for tablet because none of its replicas are
// available to provide a timestamp.
Expand Down
26 changes: 7 additions & 19 deletions src/kudu/tools/ksck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <glog/logging.h>

#include "kudu/consensus/quorum_util.h"
#include "kudu/gutil/basictypes.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/strings/join.h"
Expand Down Expand Up @@ -205,15 +206,9 @@ Status Ksck::CheckMasterHealth() {
}

// Fetch the flags information.
// Failing to gather flags is only a warning.
s = master->FetchUnusualFlags();
if (!s.ok()) {
std::lock_guard<simple_spinlock> lock(master_summaries_lock);
results_.warning_messages.emplace_back(s.CloneAndPrepend(Substitute(
"unable to get flag information for master $0 ($1)",
master->uuid(),
master->address())));
}
// Flag retrieval is not supported by older versions; failure is tracked in
// CheckMasterUnusualFlags().
ignore_result(master->FetchUnusualFlags());
}));
}
pool_->Wait();
Expand Down Expand Up @@ -376,16 +371,9 @@ Status Ksck::FetchInfoFromTabletServers() {
}

// Fetch the flags information.
// Failing to gather flags is only a warning since fetching the flags
// is not supported in older versions.
s = ts->FetchUnusualFlags();
if (!s.ok()) {
std::lock_guard<simple_spinlock> lock(tablet_server_summaries_lock);
results_.warning_messages.emplace_back(s.CloneAndPrepend(Substitute(
"unable to get flag information for tablet server $0 ($1)",
ts->uuid(),
ts->address())));
}
// Flag retrieval is not supported by older versions; failure is tracked in
// CheckTabletServerUnusualFlags().
ignore_result(ts->FetchUnusualFlags());
}));
}
pool_->Wait();
Expand Down

0 comments on commit f5e657d

Please sign in to comment.