From 6e7f02da5bb5eb120725d643b948366ac1cfd8ed Mon Sep 17 00:00:00 2001 From: Alexey Serbin Date: Sat, 1 May 2021 18:54:00 -0700 Subject: [PATCH] [tests] fix flakiness in TableLocationsCacheMultiMasterTest The TableLocationsCacheMultiMasterTest.ResetCache scenario showed signs of flakiness. It turned out that the root case of the issue was the absence of Raft leader leases. This patch fixes the issue. I ran the scenario using dist-test before and after this patch (DEBUG builds): before: 4 out of 256 failed http://dist-test.cloudera.org/job?job_id=aserbin.1619919271.11533 after : 0 out of 256 failed http://dist-test.cloudera.org/job?job_id=aserbin.1619919744.16115 Change-Id: Id53f0c537bdf1ec65da7d4bbf695864011f3e2ae Reviewed-on: http://gerrit.cloudera.org:8080/17384 Tested-by: Alexey Serbin Reviewed-by: Grant Henke --- src/kudu/integration-tests/table_locations-itest.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/kudu/integration-tests/table_locations-itest.cc b/src/kudu/integration-tests/table_locations-itest.cc index c5ecebba74..3338fae594 100644 --- a/src/kudu/integration-tests/table_locations-itest.cc +++ b/src/kudu/integration-tests/table_locations-itest.cc @@ -40,9 +40,9 @@ #include "kudu/common/common.pb.h" #include "kudu/common/partial_row.h" #include "kudu/common/row_operations.h" +#include "kudu/common/row_operations.pb.h" #include "kudu/common/schema.h" #include "kudu/common/wire_protocol.h" -#include "kudu/common/wire_protocol.pb.h" #include "kudu/gutil/ref_counted.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/integration-tests/cluster_itest_util.h" @@ -1265,6 +1265,7 @@ TEST_F(TableLocationsCacheMultiMasterTest, ResetCache) { int former_leader_master_idx; ASSERT_OK(cluster_->GetLeaderMasterIndex(&former_leader_master_idx)); + int leader_master_idx = -1; ASSERT_EVENTUALLY([&] { // Induce a change in master leadership (maybe, even few of them, up to the // number of masters in the cluster). @@ -1278,15 +1279,14 @@ TEST_F(TableLocationsCacheMultiMasterTest, ResetCache) { 3 * kRaftHeartbeatIntervalMs)); ASSERT_OK(cluster_->master(idx)->Resume()); } - int leader_master_idx; ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx)); ASSERT_NE(former_leader_master_idx, leader_master_idx); }); + // An extra sanity check. + ASSERT_NE(-1, leader_master_idx); // Make sure all the cache's metrics are reset once master just has become // a leader. - int leader_master_idx; - ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_master_idx)); NO_FATALS(CheckCacheMetricsReset(leader_master_idx)); }