diff --git a/src/kudu/master/auto_rebalancer.cc b/src/kudu/master/auto_rebalancer.cc index 72eb7dc4b1..55b9f5ceac 100644 --- a/src/kudu/master/auto_rebalancer.cc +++ b/src/kudu/master/auto_rebalancer.cc @@ -155,7 +155,8 @@ AutoRebalancerTask::AutoRebalancerTask(CatalogManager* catalog_manager, /*run_policy_fixer*/true, /*run_cross_location_rebalancing*/true, /*run_intra_location_rebalancing*/true, - FLAGS_auto_rebalancing_load_imbalance_threshold))), + FLAGS_auto_rebalancing_load_imbalance_threshold, + /*force_rebalance_replicas_on_maintenance_tservers*/false))), random_generator_(random_device_()), number_of_loop_iterations_for_test_(0), moves_scheduled_this_round_for_test_(0) { diff --git a/src/kudu/rebalance/rebalancer.cc b/src/kudu/rebalance/rebalancer.cc index 7b74b30ef1..9f2e0a2b87 100644 --- a/src/kudu/rebalance/rebalancer.cc +++ b/src/kudu/rebalance/rebalancer.cc @@ -60,20 +60,20 @@ using strings::Substitute; namespace kudu { namespace rebalance { -Rebalancer::Config::Config( - vector ignored_tservers_param, - vector master_addresses, - vector table_filters, - size_t max_moves_per_server, - size_t max_staleness_interval_sec, - int64_t max_run_time_sec, - bool move_replicas_from_ignored_tservers, - bool move_rf1_replicas, - bool output_replica_distribution_details, - bool run_policy_fixer, - bool run_cross_location_rebalancing, - bool run_intra_location_rebalancing, - double load_imbalance_threshold) +Rebalancer::Config::Config(vector ignored_tservers_param, + vector master_addresses, + vector table_filters, + size_t max_moves_per_server, + size_t max_staleness_interval_sec, + int64_t max_run_time_sec, + bool move_replicas_from_ignored_tservers, + bool move_rf1_replicas, + bool output_replica_distribution_details, + bool run_policy_fixer, + bool run_cross_location_rebalancing, + bool run_intra_location_rebalancing, + double load_imbalance_threshold, + bool force_rebalance_replicas_on_maintenance_tservers) : ignored_tservers(ignored_tservers_param.begin(), ignored_tservers_param.end()), master_addresses(std::move(master_addresses)), table_filters(std::move(table_filters)), @@ -86,7 +86,9 @@ Rebalancer::Config::Config( run_policy_fixer(run_policy_fixer), run_cross_location_rebalancing(run_cross_location_rebalancing), run_intra_location_rebalancing(run_intra_location_rebalancing), - load_imbalance_threshold(load_imbalance_threshold) { + load_imbalance_threshold(load_imbalance_threshold), + force_rebalance_replicas_on_maintenance_tservers( + force_rebalance_replicas_on_maintenance_tservers) { DCHECK_GE(max_moves_per_server, 0); } diff --git a/src/kudu/rebalance/rebalancer.h b/src/kudu/rebalance/rebalancer.h index edf292ca42..31d3364bd3 100644 --- a/src/kudu/rebalance/rebalancer.h +++ b/src/kudu/rebalance/rebalancer.h @@ -42,6 +42,7 @@ struct ClusterRawInfo { std::vector tserver_summaries; std::vector table_summaries; std::vector tablet_summaries; + std::unordered_set tservers_in_maintenance_mode; }; // A class implementing logic for Kudu cluster rebalancing. @@ -66,7 +67,8 @@ class Rebalancer { bool run_policy_fixer = true, bool run_cross_location_rebalancing = true, bool run_intra_location_rebalancing = true, - double load_imbalance_threshold = kLoadImbalanceThreshold); + double load_imbalance_threshold = kLoadImbalanceThreshold, + bool force_rebalance_replicas_on_maintenance_tservers = false); // UUIDs of ignored servers. If empty, run the rebalancing on // all tablet servers in the cluster only when all tablet servers @@ -132,6 +134,8 @@ class Rebalancer { // The per-table location load imbalance threshold for the cross-location // balancing algorithm. double load_imbalance_threshold; + + bool force_rebalance_replicas_on_maintenance_tservers; }; // Represents a concrete move of a replica from one tablet server to another. diff --git a/src/kudu/tools/rebalancer_tool-test.cc b/src/kudu/tools/rebalancer_tool-test.cc index d2aed3d96f..37437c758c 100644 --- a/src/kudu/tools/rebalancer_tool-test.cc +++ b/src/kudu/tools/rebalancer_tool-test.cc @@ -279,6 +279,71 @@ TEST_P(RebalanceStartCriteriaTest, UnknownIgnoredTServer) { } } +TEST_P(RebalanceStartCriteriaTest, TabletServerInMaintenanceMode) { + const bool is_343_scheme = (GetParam() == Kudu1097::Enable); + const vector kMasterFlags = { + Substitute("--raft_prepare_replacement_before_eviction=$0", is_343_scheme), + }; + const vector kTserverFlags = { + Substitute("--raft_prepare_replacement_before_eviction=$0", is_343_scheme), + }; + + FLAGS_num_tablet_servers = 5; + NO_FATALS(BuildAndStart(kTserverFlags, kMasterFlags)); + + auto* ts = cluster_->tablet_server(0); + string master_addr = cluster_->master()->bound_rpc_addr().ToString(); + + // Set the tserver into maintenance mode. + { + string out; + string err; + Status s = + RunKuduTool({"tserver", "state", "enter_maintenance", master_addr, ts->uuid()}, &out, &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + } + + // Rebalancer doesn't start if any tserver is in maintenance mode. + { + string out; + string err; + Status s = RunKuduTool({"cluster", "rebalance", master_addr}, &out, &err); + ASSERT_TRUE(s.IsRuntimeError()) << ToolRunInfo(s, out, err); + ASSERT_STR_CONTAINS(err, "unacceptable state MAINTENANCE_MODE"); + } + + // Rebalancer starts when specifying the maintenance tserver in '--ignored_tservers'. + { + string out; + string err; + Status s = RunKuduTool( + {"cluster", "rebalance", master_addr, "--ignored_tservers=" + ts->uuid()}, &out, &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + } + + // Rebalancer starts when specifying '--force_rebalance_replicas_on_maintenance_tservers'. + { + string out; + string err; + Status s = RunKuduTool( + {"cluster", "rebalance", master_addr, "--force_rebalance_replicas_on_maintenance_tservers"}, + &out, + &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + } + + // Now exit maintenance mode and rebalancer could start. + { + string out; + string err; + Status s = + RunKuduTool({"tserver", "state", "exit_maintenance", master_addr, ts->uuid()}, &out, &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + s = RunKuduTool({"cluster", "rebalance", master_addr}, &out, &err); + ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); + } +} + // Make sure the rebalancer doesn't start if specified too many ignored tservers. class RebalanceStartSafetyTest : public AdminCliTest, @@ -874,12 +939,8 @@ TEST_F(IgnoredTserverGoesDownDuringRebalancingTest, TserverDown) { "--max_moves_per_server=1", }, &out, &err); ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err); - // The rebalancer tool should not crash. ASSERT_STR_NOT_CONTAINS(s.ToString(), kExitOnSignalStr); - // The rebalancer tool would log some information of the unhealthy server. - ASSERT_STR_CONTAINS(err, Substitute("ignoring unhealthy tablet server $0", - cluster_->tablet_server(ignored_tserver_idx)->uuid())); // Restart the ignored tablet server ASSERT_OK(cluster_->tablet_server(ignored_tserver_idx)->Restart()); diff --git a/src/kudu/tools/rebalancer_tool.cc b/src/kudu/tools/rebalancer_tool.cc index 3008ac836b..45d22b44e3 100644 --- a/src/kudu/tools/rebalancer_tool.cc +++ b/src/kudu/tools/rebalancer_tool.cc @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -37,19 +36,15 @@ #include #include -#include "kudu/common/wire_protocol.h" -#include "kudu/common/wire_protocol.pb.h" #include "kudu/gutil/basictypes.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/master.pb.h" -#include "kudu/master/master.proxy.h" #include "kudu/rebalance/cluster_status.h" #include "kudu/rebalance/placement_policy_util.h" #include "kudu/rebalance/rebalance_algo.h" #include "kudu/rebalance/rebalancer.h" -#include "kudu/rpc/response_callback.h" #include "kudu/tools/ksck.h" #include "kudu/tools/ksck_remote.h" #include "kudu/tools/ksck_results.h" @@ -62,9 +57,7 @@ using kudu::cluster_summary::ServerHealth; using kudu::cluster_summary::ServerHealthSummary; using kudu::cluster_summary::TableSummary; using kudu::cluster_summary::TabletSummary; -using kudu::master::ListTabletServersRequestPB; -using kudu::master::ListTabletServersResponsePB; -using kudu::master::MasterServiceProxy; +using kudu::master::TServerStatePB; using kudu::rebalance::BuildTabletExtraInfoMap; using kudu::rebalance::ClusterInfo; using kudu::rebalance::ClusterRawInfo; @@ -110,9 +103,6 @@ Status RebalancerTool::PrintStats(ostream& out) { ClusterInfo ci; RETURN_NOT_OK(BuildClusterInfo(raw_info, MovesInProgress(), &ci)); - // Print information about replica count of healthy ignored tservers. - RETURN_NOT_OK(PrintIgnoredTserversStats(ci, out)); - const auto& ts_id_by_location = ci.locality.servers_by_location; if (ts_id_by_location.empty()) { // Nothing to report about: there are no tablet servers reported. @@ -120,6 +110,9 @@ Status RebalancerTool::PrintStats(ostream& out) { return Status::OK(); } + // Print information about replica count of healthy ignored tservers. + RETURN_NOT_OK(PrintIgnoredTserversStats(ci, out)); + if (ts_id_by_location.size() == 1) { // That's about printing information about the whole cluster. return PrintLocationBalanceStats(ts_id_by_location.begin()->first, @@ -169,8 +162,7 @@ Status RebalancerTool::Run(RunStatus* result_status, size_t* moves_count) { } ClusterRawInfo raw_info; - RETURN_NOT_OK( - KsckResultsToClusterRawInfo(boost::none, ksck_->results(), &raw_info)); + RETURN_NOT_OK(KsckResultsToClusterRawInfo(boost::none, ksck_->results(), &raw_info)); ClusterInfo ci; RETURN_NOT_OK(BuildClusterInfo(raw_info, MovesInProgress(), &ci)); @@ -263,10 +255,9 @@ Status RebalancerTool::Run(RunStatus* result_status, size_t* moves_count) { return Status::OK(); } -Status RebalancerTool::KsckResultsToClusterRawInfo( - const boost::optional& location, - const KsckResults& ksck_info, - ClusterRawInfo* raw_info) { +Status RebalancerTool::KsckResultsToClusterRawInfo(const boost::optional& location, + const KsckResults& ksck_info, + ClusterRawInfo* raw_info) { DCHECK(raw_info); const auto& cluster_status = ksck_info.cluster_status; @@ -344,9 +335,17 @@ Status RebalancerTool::KsckResultsToClusterRawInfo( } } + unordered_set tservers_in_maintenance_mode; + for (const auto& ts : tserver_summaries) { + if (ContainsKeyValuePair(ksck_info.ts_states, ts.uuid, TServerStatePB::MAINTENANCE_MODE)) { + tservers_in_maintenance_mode.emplace(ts.uuid); + } + } + raw_info->tserver_summaries = std::move(tserver_summaries); raw_info->table_summaries = std::move(table_summaries); raw_info->tablet_summaries = std::move(tablet_summaries); + raw_info->tservers_in_maintenance_mode = std::move(tservers_in_maintenance_mode); return Status::OK(); } @@ -588,9 +587,9 @@ Status RebalancerTool::RunWith(Runner* runner, RunStatus* result_status) { break; } - auto has_errors = false; + bool has_errors = false; while (!is_timed_out) { - auto is_scheduled = runner->ScheduleNextMove(&has_errors, &is_timed_out); + bool is_scheduled = runner->ScheduleNextMove(&has_errors, &is_timed_out); resync_state |= has_errors; if (resync_state || is_timed_out) { break; @@ -610,10 +609,9 @@ Status RebalancerTool::RunWith(Runner* runner, RunStatus* result_status) { // Poll for the status of pending operations. If some of the in-flight // operations are completed, it might be possible to schedule new ones // by calling Runner::ScheduleNextMove(). - auto has_pending_moves = false; - auto has_updates = runner->UpdateMovesInProgressStatus(&has_errors, - &is_timed_out, - &has_pending_moves); + bool has_pending_moves = false; + bool has_updates = + runner->UpdateMovesInProgressStatus(&has_errors, &is_timed_out, &has_pending_moves); if (has_updates) { // Reset the start of the staleness interval: there was some updates // on the status of scheduled move operations. @@ -721,6 +719,38 @@ void RebalancerTool::BaseRunner::UpdateOnMoveCompleted(const string& ts_uuid) { DCHECK(ts_per_op_count_updated); } +Status RebalancerTool::BaseRunner::CheckTabletServers(const ClusterRawInfo& raw_info) { + // For simplicity, allow to run the rebalancing only when all tablet servers + // are in good shape (except those specified in 'ignored_tservers'). + // Otherwise, the rebalancing might interfere with the + // automatic re-replication or get unexpected errors while moving replicas. + for (const auto& s : raw_info.tserver_summaries) { + if (s.health != ServerHealth::HEALTHY && !ContainsKey(ignored_tservers_, s.uuid)) { + return Status::IllegalState(Substitute("tablet server $0 ($1): unacceptable health status $2", + s.uuid, + s.address, + ServerHealthToString(s.health))); + } + } + if (rebalancer_->config_.force_rebalance_replicas_on_maintenance_tservers) { + return Status::OK(); + } + // Avoid moving replicas to tablet servers that are set maintenance mode. + for (const string& ts_uuid : raw_info.tservers_in_maintenance_mode) { + if (!ContainsKey(ignored_tservers_, ts_uuid)) { + return Status::IllegalState( + Substitute("tablet server $0: unacceptable state MAINTENANCE_MODE.\n" + "You can continue rebalancing in one of the following ways:\n" + "1. Set the tserver uuid into the '--ignored_tservers' flag to ignored it.\n" + "2. Set '--force_rebalance_replicas_on_maintenance_tservers' to force " + "rebalancing replicas among all known tservers.\n" + "3. Exit maintenance mode on the tserver.", + ts_uuid)); + } + } + return Status::OK(); +} + RebalancerTool::AlgoBasedRunner::AlgoBasedRunner( RebalancerTool* rebalancer, std::unordered_set ignored_tservers, @@ -923,23 +953,7 @@ Status RebalancerTool::AlgoBasedRunner::GetNextMovesImpl( const auto& loc = location(); ClusterRawInfo raw_info; RETURN_NOT_OK(rebalancer_->GetClusterRawInfo(loc, &raw_info)); - - // For simplicity, allow to run the rebalancing only when all tablet servers - // are in good shape (except those specified in 'ignored_tservers_'). - // Otherwise, the rebalancing might interfere with the - // automatic re-replication or get unexpected errors while moving replicas. - for (const auto& s : raw_info.tserver_summaries) { - if (s.health != ServerHealth::HEALTHY) { - if (ContainsKey(ignored_tservers_, s.uuid)) { - LOG(INFO) << Substitute("ignoring unhealthy tablet server $0 ($1).", - s.uuid, s.address); - continue; - } - return Status::IllegalState( - Substitute("tablet server $0 ($1): unacceptable health status $2", - s.uuid, s.address, ServerHealthToString(s.health))); - } - } + RETURN_NOT_OK(CheckTabletServers(raw_info)); TabletsPlacementInfo tpi; if (!loc) { @@ -1235,24 +1249,8 @@ Status RebalancerTool::ReplaceBasedRunner::GetNextMovesImpl( vector* replica_moves) { ClusterRawInfo raw_info; RETURN_NOT_OK(rebalancer_->GetClusterRawInfo(boost::none, &raw_info)); + RETURN_NOT_OK(CheckTabletServers(raw_info)); - // For simplicity, allow to run the rebalancing only when all tablet servers - // are in good shape (except those specified in 'ignored_tservers_'). - // Otherwise, the rebalancing might interfere with the - // automatic re-replication or get unexpected errors while moving replicas. - // TODO(aserbin): move it somewhere else? - for (const auto& s : raw_info.tserver_summaries) { - if (s.health != ServerHealth::HEALTHY) { - if (ContainsKey(ignored_tservers_, s.uuid)) { - LOG(INFO) << Substitute("ignoring unhealthy tablet server $0 ($1).", - s.uuid, s.address); - continue; - } - return Status::IllegalState( - Substitute("tablet server $0 ($1): unacceptable health status $2", - s.uuid, s.address, ServerHealthToString(s.health))); - } - } ClusterInfo ci; RETURN_NOT_OK(rebalancer_->BuildClusterInfo(raw_info, scheduled_moves_, &ci)); return GetReplaceMoves(ci, raw_info, replica_moves); @@ -1437,23 +1435,9 @@ Status RebalancerTool::IgnoredTserversRunner::CheckIgnoredTServers(const Cluster max_replication_factor)); } - LeaderMasterProxy proxy(client_); - ListTabletServersRequestPB req; - ListTabletServersResponsePB resp; - req.set_include_states(true); - RETURN_NOT_OK((proxy.SyncRpc( - req, &resp, "ListTabletServers", &MasterServiceProxy::ListTabletServersAsync))); - if (resp.has_error()) { - return StatusFromPB(resp.error().status()); - } - - const auto& servers = resp.servers(); - for (const auto& server : servers) { - const auto& ts_uuid = server.instance_id().permanent_uuid(); - if (!ContainsKey(ci.tservers_to_empty, ts_uuid)) { - continue; - } - if (server.state() != master::TServerStatePB::MAINTENANCE_MODE) { + for (const auto& it : ci.tservers_to_empty) { + const string& ts_uuid = it.first; + if (!ContainsKey(raw_info.tservers_in_maintenance_mode, ts_uuid)) { return Status::IllegalState(Substitute( "You should set maintenance mode for tablet server $0 first", ts_uuid)); } diff --git a/src/kudu/tools/rebalancer_tool.h b/src/kudu/tools/rebalancer_tool.h index c44172fb41..fb601b4c67 100644 --- a/src/kudu/tools/rebalancer_tool.h +++ b/src/kudu/tools/rebalancer_tool.h @@ -97,6 +97,10 @@ class RebalancerTool : public rebalance::Rebalancer { // (i.e. succeeded or failed). void UpdateOnMoveCompleted(const std::string& ts_uuid); + // Check if all the tablets servers (excluding those specified in 'ignored_tservers') + // are healthy and available for replica placement. + Status CheckTabletServers(const rebalance::ClusterRawInfo& raw_info); + // A pointer to the Rebalancer object. RebalancerTool* rebalancer_; diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc index 38fb4a4ce5..1c70264c4b 100644 --- a/src/kudu/tools/tool_action_cluster.cc +++ b/src/kudu/tools/tool_action_cluster.cc @@ -153,6 +153,15 @@ DEFINE_double(load_imbalance_threshold, "proven to be a good choice between 'ideal' and 'good enough' " "replica distributions."); +DEFINE_bool(force_rebalance_replicas_on_maintenance_tservers, false, + "This flag only takes effect in the case that some tservers are set maintenance " + "mode but not specified in 'ignored_tservers'. If set true, the tool would rebalance " + "all known replicas among all known tservers. The side effect of this is new " + "replicas may be moved to maintenance tservers which are likely to be restarted or " + "decommissioned soon. It is generally more recommended to specify all maintenance " + "tservers in 'ignored_tservers', so that the rebalancer tool would ignore these " + "tservers' states and replicas on them when planning replica moves."); + static bool ValidateMoveSingleReplicas(const char* flag_name, const string& flag_value) { const vector allowed_values = { "auto", "enabled", "disabled" }; @@ -321,7 +330,8 @@ Status RunRebalance(const RunnerContext& context) { !FLAGS_disable_policy_fixer, !FLAGS_disable_cross_location_rebalancing, !FLAGS_disable_intra_location_rebalancing, - FLAGS_load_imbalance_threshold)); + FLAGS_load_imbalance_threshold, + FLAGS_force_rebalance_replicas_on_maintenance_tservers)); // Print info on pre-rebalance distribution of replicas. RETURN_NOT_OK(rebalancer.PrintStats(cout)); @@ -413,6 +423,7 @@ unique_ptr BuildClusterMode() { .AddOptionalParameter("disable_intra_location_rebalancing") .AddOptionalParameter("disable_policy_fixer") .AddOptionalParameter("fetch_info_concurrency") + .AddOptionalParameter("force_rebalance_replicas_on_maintenance_tservers") .AddOptionalParameter("ignored_tservers") .AddOptionalParameter("load_imbalance_threshold") .AddOptionalParameter("max_moves_per_server") @@ -432,4 +443,3 @@ unique_ptr BuildClusterMode() { } // namespace tools } // namespace kudu -