Skip to content

Commit

Permalink
planner: add a panel to display the fine-grained operation duration o…
Browse files Browse the repository at this point in the history
…f the Plan Cache (pingcap#57070)

ref pingcap#54057
  • Loading branch information
qw4990 authored Nov 4, 2024
1 parent 538839b commit f2621fa
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 2 deletions.
133 changes: 132 additions & 1 deletion pkg/metrics/grafana/tidb.json
Original file line number Diff line number Diff line change
Expand Up @@ -8834,6 +8834,137 @@
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"decimals": null,
"description": "The time cost of Plan Cache Process",
"editable": true,
"error": false,
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 37
},
"hiddenSeries": false,
"id": 23763572017,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"hideEmpty": true,
"hideZero": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": null,
"sortDesc": null,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.17",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tidb_server_plan_cache_process_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", sql_type!=\"internal\"}[1m])) by (le, type) / sum(rate(tidb_server_plan_cache_process_duration_seconds_count{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", sql_type!=\"internal\"}[1m])) by (le, type)",
"format": "time_series",
"instant": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "avg-{{type}}",
"refId": "A",
"step": 30
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(tidb_server_plan_cache_process_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", sql_type!=\"internal\"}[1m])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "p99-{{type}}",
"refId": "B"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.80, sum(rate(tidb_server_plan_cache_process_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", sql_type!=\"internal\"}[1m])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "p80-{{type}}",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Plan Cache Process Duration",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:65",
"decimals": null,
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:66",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
Expand All @@ -8851,7 +8982,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 37
"y": 45
},
"hiddenSeries": false,
"id": 311,
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ func RegisterMetrics() {
prometheus.MustRegister(PlanCacheMissCounter)
prometheus.MustRegister(PlanCacheInstanceMemoryUsage)
prometheus.MustRegister(PlanCacheInstancePlanNumCounter)
prometheus.MustRegister(PlanCacheProcessDuration)
prometheus.MustRegister(PseudoEstimation)
prometheus.MustRegister(PacketIOCounter)
prometheus.MustRegister(QueryDurationHistogram)
Expand Down
10 changes: 10 additions & 0 deletions pkg/metrics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
PlanCacheMissCounter *prometheus.CounterVec
PlanCacheInstanceMemoryUsage *prometheus.GaugeVec
PlanCacheInstancePlanNumCounter *prometheus.GaugeVec
PlanCacheProcessDuration *prometheus.HistogramVec
ReadFromTableCacheCounter prometheus.Counter
HandShakeErrorCounter prometheus.Counter
GetTokenDurationHistogram prometheus.Histogram
Expand Down Expand Up @@ -207,6 +208,15 @@ func InitServerMetrics() {
Help: "Counter of plan of all prepared plan cache in a instance",
}, []string{LblType})

PlanCacheProcessDuration = NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "tidb",
Subsystem: "server",
Name: "plan_cache_process_duration_seconds",
Help: "Bucketed histogram of processing time (s) of plan cache operations.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days
}, []string{LblType})

ReadFromTableCacheCounter = NewCounter(
prometheus.CounterOpts{
Namespace: "tidb",
Expand Down
12 changes: 12 additions & 0 deletions pkg/planner/core/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ var (
instancePlanCacheInstancePlanNumCounter prometheus.Gauge
instancePlanCacheInstanceMemoryUsage prometheus.Gauge
instancePlanCacheInstanceNumEvict prometheus.Gauge
sessionPlanCacheLookupDuration prometheus.Observer
instancePlanCacheLookupDuration prometheus.Observer
)

func init() {
Expand All @@ -54,6 +56,8 @@ func InitMetricsVars() {
instancePlanCacheInstancePlanNumCounter = metrics.PlanCacheInstancePlanNumCounter.WithLabelValues(" instance-plan-cache")
instancePlanCacheInstanceMemoryUsage = metrics.PlanCacheInstanceMemoryUsage.WithLabelValues(" instance-plan-cache")
instancePlanCacheInstanceNumEvict = metrics.PlanCacheInstancePlanNumCounter.WithLabelValues(" instance-plan-cache-last-evict")
sessionPlanCacheLookupDuration = metrics.PlanCacheProcessDuration.WithLabelValues(" session-plan-cache-lookup")
instancePlanCacheLookupDuration = metrics.PlanCacheProcessDuration.WithLabelValues(" instance-plan-cache-lookup")
}

// GetPlanCacheHitCounter get different plan cache hit counter
Expand Down Expand Up @@ -93,6 +97,14 @@ func GetPlanCacheInstanceMemoryUsage(instancePlanCache bool) prometheus.Gauge {
return sessionPlanCacheInstanceMemoryUsage
}

// GetPlanCacheLookupDuration get different plan cache lookup duration.
func GetPlanCacheLookupDuration(instancePlanCache bool) prometheus.Observer {
if instancePlanCache {
return instancePlanCacheLookupDuration
}
return sessionPlanCacheLookupDuration
}

// GetPlanCacheInstanceEvict get instance plan cache evict counter.
func GetPlanCacheInstanceEvict() prometheus.Gauge {
return instancePlanCacheInstanceNumEvict
Expand Down
9 changes: 8 additions & 1 deletion pkg/planner/core/plan_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package core

import (
"context"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/domain"
Expand Down Expand Up @@ -243,7 +244,13 @@ func instancePlanCacheEnabled(ctx context.Context) bool {
}

func lookupPlanCache(ctx context.Context, sctx sessionctx.Context, cacheKey string, paramTypes []*types.FieldType) (cachedVal *PlanCacheValue, hit bool) {
if instancePlanCacheEnabled(ctx) {
useInstanceCache := instancePlanCacheEnabled(ctx)
defer func(begin time.Time) {
if hit {
core_metrics.GetPlanCacheLookupDuration(useInstanceCache).Observe(time.Since(begin).Seconds())
}
}(time.Now())
if useInstanceCache {
if v, hit := domain.GetDomain(sctx).GetInstancePlanCache().Get(cacheKey, paramTypes); hit {
cachedVal = v.(*PlanCacheValue)
return cachedVal.CloneForInstancePlanCache(ctx, sctx.GetPlanCtx()) // clone the value to solve concurrency problem
Expand Down

0 comments on commit f2621fa

Please sign in to comment.