From f2d4e4724778dee01e42ba5d7362fa5a048d7e81 Mon Sep 17 00:00:00 2001 From: "Zhuomin(Charming) Liu" Date: Tue, 29 Sep 2020 14:39:23 +0800 Subject: [PATCH] distsql: add metrics for coprocessor cache (#19979) --- distsql/select_result.go | 7 +++ executor/inspection_summary.go | 1 + infoschema/metric_table_def.go | 6 +++ metrics/distsql.go | 8 +++ metrics/grafana/tidb.json | 95 ++++++++++++++++++++++++++++++++++ metrics/metrics.go | 1 + store/tikv/coprocessor.go | 10 +++- 7 files changed, 126 insertions(+), 2 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index 42cc246f4c536..f9840aed95f4c 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -43,6 +43,9 @@ import ( var ( errQueryInterrupted = terror.ClassExecutor.NewStd(errno.ErrQueryInterrupted) + + coprCacheHistogramHit = metrics.DistSQLCoprCacheHistogram.WithLabelValues("hit") + coprCacheHistogramMiss = metrics.DistSQLCoprCacheHistogram.WithLabelValues("miss") ) var ( @@ -155,6 +158,10 @@ func (r *selectResult) fetchResp(ctx context.Context) error { break } } + if r.stats != nil { + coprCacheHistogramHit.Observe(float64(r.stats.CoprCacheHitNum)) + coprCacheHistogramMiss.Observe(float64(len(r.stats.copRespTime) - int(r.stats.CoprCacheHitNum))) + } return nil } diff --git a/executor/inspection_summary.go b/executor/inspection_summary.go index 8ff8b7a4c64d2..37aef042a62f9 100644 --- a/executor/inspection_summary.go +++ b/executor/inspection_summary.go @@ -140,6 +140,7 @@ var inspectionSummaryRules = map[string][]string{ "tidb_distsql_partial_scan_key_num", "tidb_distsql_qps", "tidb_distsql_scan_key_num", + "tidb_distsql_copr_cache", "tidb_region_cache_ops", "tidb_batch_client_pending_req_count", "tidb_batch_client_unavailable_duration", diff --git a/infoschema/metric_table_def.go b/infoschema/metric_table_def.go index 8e2be18d3d0a8..f74820d6f1055 100644 --- a/infoschema/metric_table_def.go +++ b/infoschema/metric_table_def.go @@ -2495,6 +2495,12 @@ var MetricTableMap = map[string]MetricTableDef{ Labels: []string{"instance", "type"}, Comment: "The total time of distsql execution(second)", }, + "tidb_distsql_copr_cache": { + Comment: "The quantile of TiDB distsql coprocessor cache", + PromQL: "histogram_quantile($QUANTILE, sum(rate(tidb_distsql_copr_cache_buckets{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (type,instance))", + Labels: []string{"instance", "type"}, + Quantile: 0.95, + }, "tidb_execute_total_count": { PromQL: "sum(increase(tidb_session_execute_duration_seconds_count{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (instance,sql_type)", Labels: []string{"instance", "sql_type"}, diff --git a/metrics/distsql.go b/metrics/distsql.go index 6074b63c3e82f..9eef1c62f0310 100644 --- a/metrics/distsql.go +++ b/metrics/distsql.go @@ -52,4 +52,12 @@ var ( Help: "number of partial results for each query.", }, ) + DistSQLCoprCacheHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "tidb", + Subsystem: "distsql", + Name: "copr_cache", + Help: "coprocessor cache hit, evict and miss number", + Buckets: prometheus.ExponentialBuckets(1, 2, 16), + }, []string{LblType}) ) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index abe270bfcf874..acd1539761e5f 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -6161,6 +6161,101 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiDB coprocessor cache hit, evict and miss number", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 163 + }, + "id": 175, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(1, sum(rate(tidb_distsql_copr_cache_buckets[1m])) by (type))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Coprocessor Cache", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, diff --git a/metrics/metrics.go b/metrics/metrics.go index ab6c6d84ed1d9..be843ad6f02f0 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -77,6 +77,7 @@ func RegisterMetrics() { prometheus.MustRegister(DDLWorkerHistogram) prometheus.MustRegister(DeploySyncerHistogram) prometheus.MustRegister(DistSQLPartialCountHistogram) + prometheus.MustRegister(DistSQLCoprCacheHistogram) prometheus.MustRegister(DistSQLQueryHistogram) prometheus.MustRegister(DistSQLScanKeysHistogram) prometheus.MustRegister(DistSQLScanKeysPartialHistogram) diff --git a/store/tikv/coprocessor.go b/store/tikv/coprocessor.go index c914d068fca2a..e730cdf43cc85 100644 --- a/store/tikv/coprocessor.go +++ b/store/tikv/coprocessor.go @@ -45,8 +45,11 @@ import ( "go.uber.org/zap" ) -var tikvTxnRegionsNumHistogramWithCoprocessor = metrics.TiKVTxnRegionsNumHistogram.WithLabelValues("coprocessor") -var tikvTxnRegionsNumHistogramWithBatchCoprocessor = metrics.TiKVTxnRegionsNumHistogram.WithLabelValues("batch_coprocessor") +var ( + tikvTxnRegionsNumHistogramWithCoprocessor = metrics.TiKVTxnRegionsNumHistogram.WithLabelValues("coprocessor") + tikvTxnRegionsNumHistogramWithBatchCoprocessor = metrics.TiKVTxnRegionsNumHistogram.WithLabelValues("batch_coprocessor") + coprCacheHistogramEvict = metrics.DistSQLCoprCacheHistogram.WithLabelValues("evict") +) // CopClient is coprocessor client. type CopClient struct { @@ -777,6 +780,9 @@ func (worker *copIteratorWorker) handleTask(ctx context.Context, task *copTask, remainTasks = remainTasks[1:] } } + if worker.store.coprCache != nil && worker.store.coprCache.cache.Metrics != nil { + coprCacheHistogramEvict.Observe(float64(worker.store.coprCache.cache.Metrics.KeysEvicted())) + } } // handleTaskOnce handles single copTask, successful results are send to channel.