From 883c1a10d555d932fce52bef2d09c1030153d10d Mon Sep 17 00:00:00 2001 From: JaySon Date: Mon, 11 May 2020 12:42:54 +0800 Subject: [PATCH] Add options to analyze tables after tpc-h data loaded (#41) * Add options to analyze tables after tpc-h data loaded Signed-off-by: JaySon-Huang * Update README Signed-off-by: JaySon-Huang --- README.md | 2 ++ cmd/go-tpc/tpch.go | 18 ++++++++++++++++++ tpch/ddl.go | 11 +++++++---- tpch/workload.go | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 60be2b5..41b5abf 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,8 @@ If you want to import tpcc data into TiDB, please refer to [import-to-tidb](docs ```bash # Prepare data with scale factor 1 ./bin/go-tpc tpch --sf=1 prepare +# Prepare data with scale factor 1, create tiflash replica, and analyze table after data loaded +./bin/go-tpc tpch --sf 1 --analyze --tiflash prepare # Run TPCH workloads with result checking ./bin/go-tpc tpch --sf=1 --check=true run # Run TPCH workloads without result checking diff --git a/cmd/go-tpc/tpch.go b/cmd/go-tpc/tpch.go index 92adc8f..aae37fe 100644 --- a/cmd/go-tpc/tpch.go +++ b/cmd/go-tpc/tpch.go @@ -57,6 +57,24 @@ func registerTpch(root *cobra.Command) { false, "Create tiflash replica") + cmdPrepare.PersistentFlags().BoolVar(&tpchConfig.AnalyzeTable.Enable, + "analyze", + false, + "After data loaded, analyze table to collect column statistics") + // https://pingcap.com/docs/stable/reference/performance/statistics/#control-analyze-concurrency + cmdPrepare.PersistentFlags().IntVar(&tpchConfig.AnalyzeTable.BuildStatsConcurrency, + "tidb_build_stats_concurrency", + 4, + "tidb_build_stats_concurrency param for analyze jobs") + cmdPrepare.PersistentFlags().IntVar(&tpchConfig.AnalyzeTable.DistsqlScanConcurrency, + "tidb_distsql_scan_concurrency", + 15, + "tidb_distsql_scan_concurrency param for analyze jobs") + cmdPrepare.PersistentFlags().IntVar(&tpchConfig.AnalyzeTable.IndexSerialScanConcurrency, + "tidb_index_serial_scan_concurrency", + 1, + "tidb_index_serial_scan_concurrency param for analyze jobs") + var cmdRun = &cobra.Command{ Use: "run", Short: "Run workload", diff --git a/tpch/ddl.go b/tpch/ddl.go index a70e8dd..acf807e 100644 --- a/tpch/ddl.go +++ b/tpch/ddl.go @@ -5,6 +5,12 @@ import ( "fmt" ) +var allTables []string + +func init() { + allTables = []string{"lineitem", "partsupp", "supplier", "part", "orders", "customer", "region", "nation"} +} + func (w *Workloader) createTableDDL(ctx context.Context, query string, tableName string, action string) error { s := w.getState(ctx) fmt.Printf("%s %s\n", action, tableName) @@ -154,11 +160,8 @@ CREATE TABLE IF NOT EXISTS lineitem ( func (w *Workloader) dropTable(ctx context.Context) error { s := w.getState(ctx) - tables := []string{ - "lineitem", "partsupp", "supplier", "part", "orders", "customer", "region", "nation", - } - for _, tbl := range tables { + for _, tbl := range allTables { fmt.Printf("DROP TABLE IF EXISTS %s\n", tbl) if _, err := s.Conn.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", tbl)); err != nil { return err diff --git a/tpch/workload.go b/tpch/workload.go index 270542e..220a579 100644 --- a/tpch/workload.go +++ b/tpch/workload.go @@ -16,6 +16,14 @@ type contextKey string const stateKey = contextKey("tpch") +// analyzeConfig is the configuration for analyze after data loaded +type analyzeConfig struct { + Enable bool + BuildStatsConcurrency int + DistsqlScanConcurrency int + IndexSerialScanConcurrency int +} + // Config is the configuration for tpch workload type Config struct { DBName string @@ -24,6 +32,7 @@ type Config struct { ScaleFactor int EnableOutputCheck bool CreateTiFlashReplica bool + AnalyzeTable analyzeConfig } type tpchState struct { @@ -98,7 +107,29 @@ func (w Workloader) Prepare(ctx context.Context, threadID int) error { dbgen.TRegion: newRegionLoader(ctx, s.Conn), } dbgen.InitDbGen(int64(w.cfg.ScaleFactor)) - return dbgen.DbGen(sqlLoader) + if err := dbgen.DbGen(sqlLoader); err != nil { + return err + } + + // After data loaded, analyze tables to speed up queries. + if w.cfg.AnalyzeTable.Enable { + if err := w.analyzeTables(ctx, w.cfg.AnalyzeTable); err != nil { + return err + } + } + return nil +} + +func (w Workloader) analyzeTables(ctx context.Context, acfg analyzeConfig) error { + s := w.getState(ctx) + for _, tbl := range allTables { + fmt.Printf("analyzing table %s\n", tbl) + if _, err := s.Conn.ExecContext(ctx, fmt.Sprintf("SET @@session.tidb_build_stats_concurrency=%d; SET @@session.tidb_distsql_scan_concurrency=%d; SET @@session.tidb_index_serial_scan_concurrency=%d; ANALYZE TABLE %s", acfg.BuildStatsConcurrency, acfg.DistsqlScanConcurrency, acfg.IndexSerialScanConcurrency, tbl)); err != nil { + return err + } + fmt.Printf("analyze table %s done\n", tbl) + } + return nil } // CheckPrepare checks prepare