From 475dc29d53dca0a30ab87d67aed62dc97cd88dcc Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 8 Apr 2020 12:20:03 +0800 Subject: [PATCH] executor/inspect: Add a `status_address` of `inspection_result` table to indicate the `status address`. (#15589) --- executor/inspection_result.go | 104 ++++++---- executor/inspection_result_test.go | 299 +++++++++++++++-------------- infoschema/tables.go | 1 + 3 files changed, 217 insertions(+), 187 deletions(-) diff --git a/executor/inspection_result.go b/executor/inspection_result.go index fdd5974bdfb7d..3ae1c7479245b 100644 --- a/executor/inspection_result.go +++ b/executor/inspection_result.go @@ -37,8 +37,9 @@ import ( type ( // inspectionResult represents a abnormal diagnosis result inspectionResult struct { - tp string - instance string + tp string + instance string + statusAddress string // represents the diagnostics item, e.g: `ddl.lease` `raftstore.cpuusage` item string // diagnosis result value base on current cluster status @@ -106,9 +107,11 @@ var inspectionRules = []inspectionRule{ type inspectionResultRetriever struct { dummyCloser - retrieved bool - extractor *plannercore.InspectionResultTableExtractor - timeRange plannercore.QueryTimeRange + retrieved bool + extractor *plannercore.InspectionResultTableExtractor + timeRange plannercore.QueryTimeRange + instanceToStatusAddress map[string]string + statusToInstanceAddress map[string]string } func (e *inspectionResultRetriever) retrieve(ctx context.Context, sctx sessionctx.Context) ([][]types.Datum, error) { @@ -134,6 +137,24 @@ func (e *inspectionResultRetriever) retrieve(ctx context.Context, sctx sessionct } }) + if e.instanceToStatusAddress == nil { + // Get cluster info. + e.instanceToStatusAddress = make(map[string]string) + e.statusToInstanceAddress = make(map[string]string) + sql := "select instance,status_address from information_schema.cluster_info;" + rows, _, err := sctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(sql) + if err != nil { + sctx.GetSessionVars().StmtCtx.AppendWarning(fmt.Errorf("get cluster info failed: %v", err)) + } + for _, row := range rows { + if row.Len() < 2 { + continue + } + e.instanceToStatusAddress[row.GetString(0)] = row.GetString(1) + e.statusToInstanceAddress[row.GetString(1)] = row.GetString(0) + } + } + rules := inspectionFilter{set: e.extractor.Rules} items := inspectionFilter{set: e.extractor.Items, timeRange: e.timeRange} var finalRows [][]types.Datum @@ -163,11 +184,18 @@ func (e *inspectionResultRetriever) retrieve(ctx context.Context, sctx sessionct return results[i].instance < results[j].instance }) for _, result := range results { + if len(result.instance) == 0 { + result.instance = e.statusToInstanceAddress[result.statusAddress] + } + if len(result.statusAddress) == 0 { + result.statusAddress = e.instanceToStatusAddress[result.instance] + } finalRows = append(finalRows, types.MakeDatums( name, result.item, result.tp, result.instance, + result.statusAddress, result.actual, result.expected, result.severity, @@ -579,13 +607,13 @@ func (criticalErrorInspection) inspectError(ctx context.Context, sctx sessionctx result := inspectionResult{ tp: rule.tp, // NOTE: all tables which can be inspected here whose first label must be `instance` - instance: row.GetString(0), - item: rule.item, - actual: actual, - expected: "0", - severity: "critical", - detail: detail, - degree: degree, + statusAddress: row.GetString(0), + item: rule.item, + actual: actual, + expected: "0", + severity: "critical", + detail: detail, + degree: degree, } results = append(results, result) } @@ -614,14 +642,14 @@ func (criticalErrorInspection) inspectForServerDown(ctx context.Context, sctx se } detail := fmt.Sprintf("%s %s disconnect with prometheus around time '%s'", row.GetString(0), row.GetString(1), row.GetTime(2)) result := inspectionResult{ - tp: row.GetString(0), - instance: row.GetString(1), - item: item, - actual: "", - expected: "", - severity: "critical", - detail: detail, - degree: 10000 + float64(len(results)), + tp: row.GetString(0), + statusAddress: row.GetString(1), + item: item, + actual: "", + expected: "", + severity: "critical", + detail: detail, + degree: 10000 + float64(len(results)), } results = append(results, result) } @@ -748,8 +776,8 @@ func (thresholdCheckInspection) inspectThreshold1(ctx context.Context, sctx sess var sql string if len(rule.configKey) > 0 { - sql = fmt.Sprintf("select t2.instance, t1.cpu, (t2.value * %[2]f) as threshold, t2.value from "+ - "(select instance as status_address, max(value) as cpu from metrics_schema.tikv_thread_cpu %[4]s and name like '%[1]s' group by instance) as t1 join "+ + sql = fmt.Sprintf("select t1.status_address, t1.cpu, (t2.value * %[2]f) as threshold, t2.value from "+ + "(select status_address, max(sum_value) as cpu from (select instance as status_address, sum(value) as sum_value from metrics_schema.tikv_thread_cpu %[4]s and name like '%[1]s' group by instance, time) as tmp group by tmp.status_address) as t1 join "+ "(select instance, value from information_schema.cluster_config where type='tikv' and `key` = '%[3]s') as t2 join "+ "(select instance,status_address from information_schema.cluster_info where type='tikv') as t3 "+ "on t1.status_address=t3.status_address and t2.instance=t3.instance where t1.cpu > (t2.value * %[2]f)", rule.component, rule.threshold, rule.configKey, condition) @@ -774,14 +802,14 @@ func (thresholdCheckInspection) inspectThreshold1(ctx context.Context, sctx sess } detail := fmt.Sprintf("the '%s' max cpu-usage of %s tikv is too high", rule.item, row.GetString(0)) result := inspectionResult{ - tp: "tikv", - instance: row.GetString(0), - item: rule.item, - actual: actual, - expected: expected, - severity: "warning", - detail: detail, - degree: degree, + tp: "tikv", + statusAddress: row.GetString(0), + item: rule.item, + actual: actual, + expected: expected, + severity: "warning", + detail: detail, + degree: degree, } results = append(results, result) } @@ -950,14 +978,14 @@ func (thresholdCheckInspection) inspectThreshold2(ctx context.Context, sctx sess detail = fmt.Sprintf(detail, row.GetString(0)) } result := inspectionResult{ - tp: rule.tp, - instance: row.GetString(0), - item: rule.item, - actual: actual, - expected: expected, - severity: "warning", - detail: detail, - degree: degree, + tp: rule.tp, + statusAddress: row.GetString(0), + item: rule.item, + actual: actual, + expected: expected, + severity: "warning", + detail: detail, + degree: degree, } results = append(results, result) } diff --git a/executor/inspection_result_test.go b/executor/inspection_result_test.go index 97f2e9c34fc09..354f3f7d588aa 100644 --- a/executor/inspection_result_test.go +++ b/executor/inspection_result_test.go @@ -200,8 +200,12 @@ func (s *inspectionResultSuite) setupForInspection(c *C, mockData map[string][][ // mock cluster information configurations[infoschema.TableClusterInfo] = variable.TableSnapshot{ Rows: [][]types.Datum{ - types.MakeDatums("tikv", "tikv-0", "tikv-0", "4.0", "a234c", "", ""), - types.MakeDatums("tikv", "tikv-1", "tikv-1", "4.0", "a234c", "", ""), + types.MakeDatums("pd", "pd-0", "pd-0", "4.0", "a234c", "", ""), + types.MakeDatums("tidb", "tidb-0", "tidb-0s", "4.0", "a234c", "", ""), + types.MakeDatums("tidb", "tidb-1", "tidb-1s", "4.0", "a234c", "", ""), + types.MakeDatums("tikv", "tikv-0", "tikv-0s", "4.0", "a234c", "", ""), + types.MakeDatums("tikv", "tikv-1", "tikv-1s", "4.0", "a234c", "", ""), + types.MakeDatums("tikv", "tikv-2", "tikv-2s", "4.0", "a234c", "", ""), }, } } @@ -229,19 +233,22 @@ func (s *inspectionResultSuite) TestThresholdCheckInspection(c *C) { mockData := map[string][][]types.Datum{ // columns: time, instance, name, value "tikv_thread_cpu": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_normal0", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-1", "cop_normal0", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_normal1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_high1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_low1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "grpc_1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "raftstore_1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "apply_0", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_norm1", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_high2", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_low0", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "sched_2", 10.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "split_check", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_normal0", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_normal1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-1s", "cop_normal0", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_high1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_high2", 10.0), + types.MakeDatums(datetime("2020-02-14 05:21:00"), "tikv-0s", "cop_high1", 5.0), + types.MakeDatums(datetime("2020-02-14 05:22:00"), "tikv-0s", "cop_high1", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_low1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "grpc_1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "raftstore_1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "apply_0", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_norm1", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_high2", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_low0", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "sched_2", 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "split_check", 10.0), }, "pd_tso_wait_duration": {}, "tidb_get_token_duration": {}, @@ -263,48 +270,48 @@ func (s *inspectionResultSuite) TestThresholdCheckInspection(c *C) { ctx := s.setupForInspection(c, mockData, nil) defer s.tearDownForInspection(c) - rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item") + rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance,status_address, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item") c.Assert(err, IsNil) result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) result.Check(testkit.Rows( - "apply-cpu tikv tikv-0 10.00 < 1.60, config: raftstore.apply-pool-size=2 the 'apply-cpu' max cpu-usage of tikv-0 tikv is too high", - "coprocessor-high-cpu tikv tikv-0 10.00 < 3.60, config: readpool.coprocessor.high-concurrency=4 the 'coprocessor-high-cpu' max cpu-usage of tikv-0 tikv is too high", - "coprocessor-low-cpu tikv tikv-0 10.00 < 3.60, config: readpool.coprocessor.low-concurrency=4 the 'coprocessor-low-cpu' max cpu-usage of tikv-0 tikv is too high", - "coprocessor-normal-cpu tikv tikv-0 10.00 < 3.60, config: readpool.coprocessor.normal-concurrency=4 the 'coprocessor-normal-cpu' max cpu-usage of tikv-0 tikv is too high", - "coprocessor-normal-cpu tikv tikv-1 10.00 < 7.20, config: readpool.coprocessor.normal-concurrency=8 the 'coprocessor-normal-cpu' max cpu-usage of tikv-1 tikv is too high", - "grpc-cpu tikv tikv-0 10.00 < 7.20, config: server.grpc-concurrency=8 the 'grpc-cpu' max cpu-usage of tikv-0 tikv is too high", - "raftstore-cpu tikv tikv-0 10.00 < 1.60, config: raftstore.store-pool-size=2 the 'raftstore-cpu' max cpu-usage of tikv-0 tikv is too high", - "scheduler-worker-cpu tikv tikv-0 10.00 < 5.10, config: storage.scheduler-worker-pool-size=6 the 'scheduler-worker-cpu' max cpu-usage of tikv-0 tikv is too high", - "split-check-cpu tikv tikv-0 10.00 < 0.00 the 'split-check-cpu' max cpu-usage of tikv-0 tikv is too high", - "storage-readpool-high-cpu tikv tikv-0 10.00 < 3.60, config: readpool.storage.high-concurrency=4 the 'storage-readpool-high-cpu' max cpu-usage of tikv-0 tikv is too high", - "storage-readpool-low-cpu tikv tikv-0 10.00 < 3.60, config: readpool.storage.low-concurrency=4 the 'storage-readpool-low-cpu' max cpu-usage of tikv-0 tikv is too high", - "storage-readpool-normal-cpu tikv tikv-0 10.00 < 3.60, config: readpool.storage.normal-concurrency=4 the 'storage-readpool-normal-cpu' max cpu-usage of tikv-0 tikv is too high", + "apply-cpu tikv tikv-0 tikv-0s 10.00 < 1.60, config: raftstore.apply-pool-size=2 the 'apply-cpu' max cpu-usage of tikv-0s tikv is too high", + "coprocessor-high-cpu tikv tikv-0 tikv-0s 20.00 < 3.60, config: readpool.coprocessor.high-concurrency=4 the 'coprocessor-high-cpu' max cpu-usage of tikv-0s tikv is too high", + "coprocessor-low-cpu tikv tikv-0 tikv-0s 10.00 < 3.60, config: readpool.coprocessor.low-concurrency=4 the 'coprocessor-low-cpu' max cpu-usage of tikv-0s tikv is too high", + "coprocessor-normal-cpu tikv tikv-0 tikv-0s 20.00 < 3.60, config: readpool.coprocessor.normal-concurrency=4 the 'coprocessor-normal-cpu' max cpu-usage of tikv-0s tikv is too high", + "coprocessor-normal-cpu tikv tikv-1 tikv-1s 10.00 < 7.20, config: readpool.coprocessor.normal-concurrency=8 the 'coprocessor-normal-cpu' max cpu-usage of tikv-1s tikv is too high", + "grpc-cpu tikv tikv-0 tikv-0s 10.00 < 7.20, config: server.grpc-concurrency=8 the 'grpc-cpu' max cpu-usage of tikv-0s tikv is too high", + "raftstore-cpu tikv tikv-0 tikv-0s 10.00 < 1.60, config: raftstore.store-pool-size=2 the 'raftstore-cpu' max cpu-usage of tikv-0s tikv is too high", + "scheduler-worker-cpu tikv tikv-0 tikv-0s 10.00 < 5.10, config: storage.scheduler-worker-pool-size=6 the 'scheduler-worker-cpu' max cpu-usage of tikv-0s tikv is too high", + "split-check-cpu tikv tikv-0 tikv-0s 10.00 < 0.00 the 'split-check-cpu' max cpu-usage of tikv-0s tikv is too high", + "storage-readpool-high-cpu tikv tikv-0 tikv-0s 10.00 < 3.60, config: readpool.storage.high-concurrency=4 the 'storage-readpool-high-cpu' max cpu-usage of tikv-0s tikv is too high", + "storage-readpool-low-cpu tikv tikv-0 tikv-0s 10.00 < 3.60, config: readpool.storage.low-concurrency=4 the 'storage-readpool-low-cpu' max cpu-usage of tikv-0s tikv is too high", + "storage-readpool-normal-cpu tikv tikv-0 tikv-0s 10.00 < 3.60, config: readpool.storage.normal-concurrency=4 the 'storage-readpool-normal-cpu' max cpu-usage of tikv-0s tikv is too high", )) // construct some mock normal data mockData["tikv_thread_cpu"] = [][]types.Datum{ // columns: time, instance, name, value - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_normal0", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_high1", 0.1), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "cop_low1", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "grpc_1", 7.21), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "grpc_2", 0.21), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "raftstore_1", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "apply_0", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_norm1", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_high2", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "store_read_low0", 1.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "sched_2", 0.3), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "split_check", 0.5), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_normal0", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_high1", 0.1), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "cop_low1", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "grpc_1", 7.21), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "grpc_2", 0.21), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "raftstore_1", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "apply_0", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_norm1", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_high2", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "store_read_low0", 1.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "sched_2", 0.3), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "split_check", 0.5), } ctx = context.WithValue(ctx, "__mockMetricsTableData", mockData) - rs, err = tk.Se.Execute(ctx, "select item, type, instance, value, reference from information_schema.inspection_result where rule='threshold-check' order by item") + rs, err = tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance,status_address, value, reference from information_schema.inspection_result where rule='threshold-check' order by item") c.Assert(err, IsNil) result = tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) - result.Check(testkit.Rows("grpc-cpu tikv tikv-0 7.21 < 7.20, config: server.grpc-concurrency=8")) + result.Check(testkit.Rows("grpc-cpu tikv tikv-0 tikv-0s 7.42 < 7.20, config: server.grpc-concurrency=8")) } func (s *inspectionResultSuite) TestThresholdCheckInspection2(c *C) { @@ -321,43 +328,43 @@ func (s *inspectionResultSuite) TestThresholdCheckInspection2(c *C) { types.MakeDatums(datetime("2020-02-14 05:20:00"), "pd-0", 0.999, 0.06), }, "tidb_get_token_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tidb-0", 0.999, 0.02*10e5), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tidb-0s", 0.999, 0.02*10e5), }, "tidb_load_schema_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tidb-0", 0.99, 2.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tidb-0s", 0.99, 2.0), }, "tikv_scheduler_command_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "get", 0.99, 2.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "write", 0.99, 5.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "get", 0.99, 2.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "write", 0.99, 5.0), }, "tikv_handle_snapshot_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "gen", 0.999, 40.0), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "read", 0.999, 10.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "gen", 0.999, 40.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "read", 0.999, 10.0), }, "tikv_storage_async_request_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "write", 0.999, 0.2), - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "snapshot", 0.999, 0.06), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "write", 0.999, 0.2), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "snapshot", 0.999, 0.06), }, "tikv_engine_write_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "write_max", "kv", 0.2*10e5), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "write_max", "kv", 0.2*10e5), }, "tikv_engine_max_get_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "get_max", "kv", 0.06*10e5), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "get_max", "kv", 0.06*10e5), }, "tikv_engine_max_seek_duration": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "seek_max", "raft", 0.06*10e5), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "seek_max", "raft", 0.06*10e5), }, "tikv_scheduler_pending_commands": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", 1001.0), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", 1001.0), }, "tikv_block_index_cache_hit": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "kv", 0.94), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "kv", 0.94), }, "tikv_block_data_cache_hit": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "kv", 0.79), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "kv", 0.79), }, "tikv_block_filter_cache_hit": { - types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0", "kv", 0.93), + types.MakeDatums(datetime("2020-02-14 05:20:00"), "tikv-0s", "kv", 0.93), }, "tikv_thread_cpu": {}, "pd_scheduler_store_status": {}, @@ -367,25 +374,25 @@ func (s *inspectionResultSuite) TestThresholdCheckInspection2(c *C) { ctx := s.setupForInspection(c, mockData, nil) defer s.tearDownForInspection(c) - rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item") + rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance, status_address, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item") c.Assert(err, IsNil) result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) result.Check(testkit.Rows( - "data-block-cache-hit tikv tikv-0 0.790 > 0.800 min data-block-cache-hit rate of tikv-0 tikv is too low", - "filter-block-cache-hit tikv tikv-0 0.930 > 0.950 min filter-block-cache-hit rate of tikv-0 tikv is too low", - "get-token-duration tidb tidb-0 0.020 < 0.001 max duration of tidb-0 tidb get-token-duration is too slow", - "handle-snapshot-duration tikv tikv-0 40.000 < 30.000 max duration of tikv-0 tikv handle-snapshot-duration is too slow", - "index-block-cache-hit tikv tikv-0 0.940 > 0.950 min index-block-cache-hit rate of tikv-0 tikv is too low", - "load-schema-duration tidb tidb-0 2.000 < 1.000 max duration of tidb-0 tidb load-schema-duration is too slow", - "rocksdb-get-duration tikv tikv-0 0.060 < 0.050 max duration of tikv-0 tikv rocksdb-get-duration is too slow", - "rocksdb-seek-duration tikv tikv-0 0.060 < 0.050 max duration of tikv-0 tikv rocksdb-seek-duration is too slow", - "rocksdb-write-duration tikv tikv-0 0.200 < 0.100 max duration of tikv-0 tikv rocksdb-write-duration is too slow", - "scheduler-cmd-duration tikv tikv-0 5.000 < 0.100 max duration of tikv-0 tikv scheduler-cmd-duration is too slow", - "scheduler-pending-cmd-count tikv tikv-0 1001.000 < 1000.000 tikv-0 tikv scheduler has too many pending commands", - "storage-snapshot-duration tikv tikv-0 0.060 < 0.050 max duration of tikv-0 tikv storage-snapshot-duration is too slow", - "storage-write-duration tikv tikv-0 0.200 < 0.100 max duration of tikv-0 tikv storage-write-duration is too slow", - "tso-duration tidb pd-0 0.060 < 0.050 max duration of pd-0 tidb tso-duration is too slow", + "data-block-cache-hit tikv tikv-0 tikv-0s 0.790 > 0.800 min data-block-cache-hit rate of tikv-0s tikv is too low", + "filter-block-cache-hit tikv tikv-0 tikv-0s 0.930 > 0.950 min filter-block-cache-hit rate of tikv-0s tikv is too low", + "get-token-duration tidb tidb-0 tidb-0s 0.020 < 0.001 max duration of tidb-0s tidb get-token-duration is too slow", + "handle-snapshot-duration tikv tikv-0 tikv-0s 40.000 < 30.000 max duration of tikv-0s tikv handle-snapshot-duration is too slow", + "index-block-cache-hit tikv tikv-0 tikv-0s 0.940 > 0.950 min index-block-cache-hit rate of tikv-0s tikv is too low", + "load-schema-duration tidb tidb-0 tidb-0s 2.000 < 1.000 max duration of tidb-0s tidb load-schema-duration is too slow", + "rocksdb-get-duration tikv tikv-0 tikv-0s 0.060 < 0.050 max duration of tikv-0s tikv rocksdb-get-duration is too slow", + "rocksdb-seek-duration tikv tikv-0 tikv-0s 0.060 < 0.050 max duration of tikv-0s tikv rocksdb-seek-duration is too slow", + "rocksdb-write-duration tikv tikv-0 tikv-0s 0.200 < 0.100 max duration of tikv-0s tikv rocksdb-write-duration is too slow", + "scheduler-cmd-duration tikv tikv-0 tikv-0s 5.000 < 0.100 max duration of tikv-0s tikv scheduler-cmd-duration is too slow", + "scheduler-pending-cmd-count tikv tikv-0 tikv-0s 1001.000 < 1000.000 tikv-0s tikv scheduler has too many pending commands", + "storage-snapshot-duration tikv tikv-0 tikv-0s 0.060 < 0.050 max duration of tikv-0s tikv storage-snapshot-duration is too slow", + "storage-write-duration tikv tikv-0 tikv-0s 0.200 < 0.100 max duration of tikv-0s tikv storage-write-duration is too slow", + "tso-duration tidb pd-0 pd-0 0.060 < 0.050 max duration of pd-0 tidb tso-duration is too slow", )) } @@ -428,20 +435,20 @@ func (s *inspectionResultSuite) TestThresholdCheckInspection3(c *C) { defer s.tearDownForInspection(c) rs, err := tk.Se.Execute(ctx, `select /*+ time_range('2020-02-14 04:20:00','2020-02-14 05:23:00') */ - item, type, instance, value, reference, details from information_schema.inspection_result + item, type, instance,status_address, value, reference, details from information_schema.inspection_result where rule='threshold-check' and item in ('leader-score-balance','region-score-balance','region-count','region-health','store-available-balance','leader-drop') order by item`) c.Assert(err, IsNil) result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) result.Check(testkit.Rows( - "leader-drop tikv tikv-2 10000 <= 50 tikv-2 tikv has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 0", - "leader-drop tikv tikv-0 5000 <= 50 tikv-0 tikv has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 5000", - "leader-score-balance tikv tikv-1 50.00% < 5.00% tikv-0 max leader_score is 100.00, much more than tikv-1 min leader_score 50.00", - "region-count tikv tikv-0 20001.00 <= 20000 tikv-0 tikv has too many regions", - "region-health pd pd-0 110.00 < 100 the count of extra-perr and learner-peer and pending-peer are 110, it means the scheduling is too frequent or too slow", - "region-score-balance tikv tikv-1 10.00% < 5.00% tikv-0 max region_score is 100.00, much more than tikv-1 min region_score 90.00", - "store-available-balance tikv tikv-1 30.00% < 20.00% tikv-0 max store_available is 100.00, much more than tikv-1 min store_available 70.00")) + "leader-drop tikv tikv-2 tikv-2s 10000 <= 50 tikv-2 tikv has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 0", + "leader-drop tikv tikv-0 tikv-0s 5000 <= 50 tikv-0 tikv has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 5000", + "leader-score-balance tikv tikv-1 tikv-1s 50.00% < 5.00% tikv-0 max leader_score is 100.00, much more than tikv-1 min leader_score 50.00", + "region-count tikv tikv-0 tikv-0s 20001.00 <= 20000 tikv-0 tikv has too many regions", + "region-health pd pd-0 pd-0 110.00 < 100 the count of extra-perr and learner-peer and pending-peer are 110, it means the scheduling is too frequent or too slow", + "region-score-balance tikv tikv-1 tikv-1s 10.00% < 5.00% tikv-0 max region_score is 100.00, much more than tikv-1 min region_score 90.00", + "store-available-balance tikv tikv-1 tikv-1s 30.00% < 20.00% tikv-0 max store_available is 100.00, much more than tikv-1 min store_available 70.00")) } func (s *inspectionResultSuite) TestCriticalErrorInspection(c *C) { @@ -463,10 +470,6 @@ func (s *inspectionResultSuite) TestCriticalErrorInspection(c *C) { c.Assert(failpoint.Enable(fpName2, fmt.Sprintf(`return("%s")`, fpExpr)), IsNil) defer func() { c.Assert(failpoint.Disable(fpName2), IsNil) }() - fpName := "github.com/pingcap/tidb/executor/mockMetricsTableData" - c.Assert(failpoint.Enable(fpName, "return"), IsNil) - defer func() { c.Assert(failpoint.Disable(fpName), IsNil) }() - datetime := func(str string) types.Time { return s.parseTime(c, tk.Se, str) } @@ -475,98 +478,96 @@ func (s *inspectionResultSuite) TestCriticalErrorInspection(c *C) { mockData := map[string][][]types.Datum{ // columns: time, instance, type, value "tikv_critical_error_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "type1", 0.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", "type1", 1.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-2", "type2", 5.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "type1", 0.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1s", "type1", 1.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-2s", "type2", 5.0), }, // columns: time, instance, value "tidb_panic_count_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 4.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 0.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1", 1.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0s", 4.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0s", 0.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1s", 1.0), }, // columns: time, instance, value "tidb_binlog_error_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-1", 4.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-2", 0.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-3", 1.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-1s", 4.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-2s", 0.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-3s", 1.0), }, // columns: time, instance, db, type, stage, value "tikv_scheduler_is_busy_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "db1", "type1", "stage1", 1.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "db2", "type1", "stage2", 2.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1", "db1", "type2", "stage1", 3.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "db1", "type1", "stage2", 4.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "db2", "type1", "stage1", 5.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "db1", "type2", "stage2", 6.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "db1", "type1", "stage1", 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0s", "db2", "type1", "stage2", 2.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1s", "db1", "type2", "stage1", 3.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0s", "db1", "type1", "stage2", 4.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0s", "db2", "type1", "stage1", 5.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1s", "db1", "type2", "stage2", 6.0), }, // columns: time, instance, db, value "tikv_coprocessor_is_busy_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "db1", 1.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "db2", 2.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1", "db1", 3.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "db1", 4.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "db2", 5.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "db1", 6.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "db1", 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0s", "db2", 2.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1s", "db1", 3.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0s", "db1", 4.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0s", "db2", 5.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1s", "db1", 6.0), }, // columns: time, instance, db, type, value "tikv_channel_full_total_count": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "db1", "type1", 1.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "db2", "type1", 2.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1", "db1", "type2", 3.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0", "db1", "type1", 4.0), - types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0", "db2", "type1", 5.0), - types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1", "db1", "type2", 6.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "db1", "type1", 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0s", "db2", "type1", 2.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1s", "db1", "type2", 3.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tikv-0s", "db1", "type1", 4.0), + types.MakeDatums(datetime("2020-02-12 10:39:00"), "tikv-0s", "db2", "type1", 5.0), + types.MakeDatums(datetime("2020-02-12 10:40:00"), "tikv-1s", "db1", "type2", 6.0), }, // columns: time, instance, db, value "tikv_engine_write_stall": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "kv", 1.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "raft", 2.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1", "reason3", 3.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "kv", 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0s", "raft", 2.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tikv-1s", "reason3", 3.0), }, // columns: time, instance, job, value "up": { - types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", "tikv", 1.0), - types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", "tikv", 0.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-0", "tidb", 0.0), - types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1", "tidb", 0.0), - types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-1", "tidb", 1.0), + types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0s", "tikv", 1.0), + types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0s", "tikv", 0.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-0s", "tidb", 0.0), + types.MakeDatums(datetime("2020-02-12 10:37:00"), "tidb-1s", "tidb", 0.0), + types.MakeDatums(datetime("2020-02-12 10:38:00"), "tidb-1s", "tidb", 1.0), }, } - ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData) - ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool { - return fpname == fpName - }) + ctx := s.setupForInspection(c, mockData, nil) + defer s.tearDownForInspection(c) - rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, instance, value, details from information_schema.inspection_result where rule='critical-error'") + rs, err := tk.Se.Execute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, instance,status_address, value, details from information_schema.inspection_result where rule='critical-error'") c.Assert(err, IsNil) result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) result.Check(testkit.Rows( - "server-down tikv-0 tikv tikv-0 disconnect with prometheus around time '2020-02-12 10:36:00.000000'", - "server-down tidb-1 tidb tidb-1 disconnect with prometheus around time '2020-02-12 10:37:00.000000'", - "channel-is-full tikv-1 9.00(db1, type2) the total number of errors about 'channel-is-full' is too many", - "coprocessor-is-busy tikv-1 9.00(db1) the total number of errors about 'coprocessor-is-busy' is too many", - "channel-is-full tikv-0 7.00(db2, type1) the total number of errors about 'channel-is-full' is too many", - "coprocessor-is-busy tikv-0 7.00(db2) the total number of errors about 'coprocessor-is-busy' is too many", - "scheduler-is-busy tikv-1 6.00(db1, type2, stage2) the total number of errors about 'scheduler-is-busy' is too many", - "channel-is-full tikv-0 5.00(db1, type1) the total number of errors about 'channel-is-full' is too many", - "coprocessor-is-busy tikv-0 5.00(db1) the total number of errors about 'coprocessor-is-busy' is too many", - "critical-error tikv-2 5.00(type2) the total number of errors about 'critical-error' is too many", - "scheduler-is-busy tikv-0 5.00(db2, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many", - "binlog-error tidb-1 4.00 the total number of errors about 'binlog-error' is too many", - "panic-count tidb-0 4.00 the total number of errors about 'panic-count' is too many", - "scheduler-is-busy tikv-0 4.00(db1, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many", - "scheduler-is-busy tikv-1 3.00(db1, type2, stage1) the total number of errors about 'scheduler-is-busy' is too many", - "tikv_engine_write_stall tikv-1 3.00(reason3) the total number of errors about 'tikv_engine_write_stall' is too many", - "scheduler-is-busy tikv-0 2.00(db2, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many", - "tikv_engine_write_stall tikv-0 2.00(raft) the total number of errors about 'tikv_engine_write_stall' is too many", - "binlog-error tidb-3 1.00 the total number of errors about 'binlog-error' is too many", - "critical-error tikv-1 1.00(type1) the total number of errors about 'critical-error' is too many", - "panic-count tidb-1 1.00 the total number of errors about 'panic-count' is too many", - "scheduler-is-busy tikv-0 1.00(db1, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many", - "tikv_engine_write_stall tikv-0 1.00(kv) the total number of errors about 'tikv_engine_write_stall' is too many", + "server-down tikv-0 tikv-0s tikv tikv-0s disconnect with prometheus around time '2020-02-12 10:36:00.000000'", + "server-down tidb-1 tidb-1s tidb tidb-1s disconnect with prometheus around time '2020-02-12 10:37:00.000000'", + "channel-is-full tikv-1 tikv-1s 9.00(db1, type2) the total number of errors about 'channel-is-full' is too many", + "coprocessor-is-busy tikv-1 tikv-1s 9.00(db1) the total number of errors about 'coprocessor-is-busy' is too many", + "channel-is-full tikv-0 tikv-0s 7.00(db2, type1) the total number of errors about 'channel-is-full' is too many", + "coprocessor-is-busy tikv-0 tikv-0s 7.00(db2) the total number of errors about 'coprocessor-is-busy' is too many", + "scheduler-is-busy tikv-1 tikv-1s 6.00(db1, type2, stage2) the total number of errors about 'scheduler-is-busy' is too many", + "channel-is-full tikv-0 tikv-0s 5.00(db1, type1) the total number of errors about 'channel-is-full' is too many", + "coprocessor-is-busy tikv-0 tikv-0s 5.00(db1) the total number of errors about 'coprocessor-is-busy' is too many", + "critical-error tikv-2 tikv-2s 5.00(type2) the total number of errors about 'critical-error' is too many", + "scheduler-is-busy tikv-0 tikv-0s 5.00(db2, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many", + "binlog-error tidb-1 tidb-1s 4.00 the total number of errors about 'binlog-error' is too many", + "panic-count tidb-0 tidb-0s 4.00 the total number of errors about 'panic-count' is too many", + "scheduler-is-busy tikv-0 tikv-0s 4.00(db1, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many", + "scheduler-is-busy tikv-1 tikv-1s 3.00(db1, type2, stage1) the total number of errors about 'scheduler-is-busy' is too many", + "tikv_engine_write_stall tikv-1 tikv-1s 3.00(reason3) the total number of errors about 'tikv_engine_write_stall' is too many", + "scheduler-is-busy tikv-0 tikv-0s 2.00(db2, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many", + "tikv_engine_write_stall tikv-0 tikv-0s 2.00(raft) the total number of errors about 'tikv_engine_write_stall' is too many", + "binlog-error tidb-3s 1.00 the total number of errors about 'binlog-error' is too many", + "critical-error tikv-1 tikv-1s 1.00(type1) the total number of errors about 'critical-error' is too many", + "panic-count tidb-1 tidb-1s 1.00 the total number of errors about 'panic-count' is too many", + "scheduler-is-busy tikv-0 tikv-0s 1.00(db1, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many", + "tikv_engine_write_stall tikv-0 tikv-0s 1.00(kv) the total number of errors about 'tikv_engine_write_stall' is too many", )) } @@ -679,7 +680,7 @@ func (s *inspectionResultSuite) TestConfigCheckOfStorageBlockCacheSize(c *C) { result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect SQL failed")) c.Assert(tk.Se.GetSessionVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetSessionVars().StmtCtx.GetWarnings())) result.Check(testkit.Rows( - "config storage.block-cache.capacity tikv 192.168.3.34 1099511627776 < 24159191040 warning There are 1 TiKV server in 192.168.3.34 node, the total 'storage.block-cache.capacity' of TiKV is more than (0.45 * total node memory)", - "config storage.block-cache.capacity tikv 192.168.3.33 32212254720 < 24159191040 warning There are 2 TiKV server in 192.168.3.33 node, the total 'storage.block-cache.capacity' of TiKV is more than (0.45 * total node memory)", + "config storage.block-cache.capacity tikv 192.168.3.34 1099511627776 < 24159191040 warning There are 1 TiKV server in 192.168.3.34 node, the total 'storage.block-cache.capacity' of TiKV is more than (0.45 * total node memory)", + "config storage.block-cache.capacity tikv 192.168.3.33 32212254720 < 24159191040 warning There are 2 TiKV server in 192.168.3.33 node, the total 'storage.block-cache.capacity' of TiKV is more than (0.45 * total node memory)", )) } diff --git a/infoschema/tables.go b/infoschema/tables.go index b80a790148ba8..d3bf7ad447683 100644 --- a/infoschema/tables.go +++ b/infoschema/tables.go @@ -935,6 +935,7 @@ var tableInspectionResultCols = []columnInfo{ {name: "ITEM", tp: mysql.TypeVarchar, size: 64}, {name: "TYPE", tp: mysql.TypeVarchar, size: 64}, {name: "INSTANCE", tp: mysql.TypeVarchar, size: 64}, + {name: "STATUS_ADDRESS", tp: mysql.TypeVarchar, size: 64}, {name: "VALUE", tp: mysql.TypeVarchar, size: 64}, {name: "REFERENCE", tp: mysql.TypeVarchar, size: 64}, {name: "SEVERITY", tp: mysql.TypeVarchar, size: 64},