plan, executor: give special treat for index columns when analyze tab…

…le (pingcap#2436) * plan/statistics: statistics support add index info
Jnig · Jan 26, 2017 · 4d9b70f · 4d9b70f
1 parent 7e92364
commit 4d9b70f
Show file tree

Hide file tree

Showing 16 changed files with 519 additions and 144 deletions.
diff --git a/executor/analyze.go b/executor/analyze.go
@@ -0,0 +1,170 @@
+// Copyright 2017 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package executor
+
+import (
+	"math/rand"
+
+	"github.com/juju/errors"
+	"github.com/pingcap/tidb/ast"
+	"github.com/pingcap/tidb/context"
+	"github.com/pingcap/tidb/expression"
+	"github.com/pingcap/tidb/meta"
+	"github.com/pingcap/tidb/model"
+	"github.com/pingcap/tidb/plan/statistics"
+	"github.com/pingcap/tidb/plan/statscache"
+	"github.com/pingcap/tidb/util/types"
+)
+
+var _ Executor = &AnalyzeExec{}
+
+// AnalyzeExec represents Analyze executor.
+type AnalyzeExec struct {
+	schema     *expression.Schema
+	tblInfo    *model.TableInfo
+	ctx        context.Context
+	idxOffsets []int
+	colOffsets []int
+	pkOffset   int
+	Srcs       []Executor
+}
+
+const (
+	maxSampleCount     = 10000
+	defaultBucketCount = 256
+)
+
+// Schema implements the Executor Schema interface.
+func (e *AnalyzeExec) Schema() *expression.Schema {
+	return e.schema
+}
+
+// Close implements the Executor Close interface.
+func (e *AnalyzeExec) Close() error {
+	for _, src := range e.Srcs {
+		err := src.Close()
+		if err != nil {
+			return errors.Trace(err)
+		}
+	}
+	return nil
+}
+
+// Next implements the Executor Next interface.
+func (e *AnalyzeExec) Next() (*Row, error) {
+	for _, src := range e.Srcs {
+		ae := src.(*AnalyzeExec)
+		var count int64 = -1
+		var sampleRows []*ast.Row
+		if ae.colOffsets != nil {
+			rs := &recordSet{executor: ae.Srcs[len(ae.Srcs)-1]}
+			var err error
+			count, sampleRows, err = collectSamples(rs)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+		}
+		columnSamples := rowsToColumnSamples(sampleRows)
+		var pkRS ast.RecordSet
+		if ae.pkOffset != -1 {
+			offset := len(ae.Srcs) - 1
+			if ae.colOffsets != nil {
+				offset--
+			}
+			pkRS = &recordSet{executor: ae.Srcs[offset]}
+		}
+		idxRS := make([]ast.RecordSet, 0, len(ae.idxOffsets))
+		for i := range ae.idxOffsets {
+			idxRS = append(idxRS, &recordSet{executor: ae.Srcs[i]})
+		}
+		err := ae.buildStatisticsAndSaveToKV(count, columnSamples, idxRS, pkRS)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+	}
+	return nil, nil
+}
+
+func (e *AnalyzeExec) buildStatisticsAndSaveToKV(count int64, columnSamples [][]types.Datum, idxRS []ast.RecordSet, pkRS ast.RecordSet) error {
+	txn := e.ctx.Txn()
+	statBuilder := &statistics.Builder{
+		Sc:            e.ctx.GetSessionVars().StmtCtx,
+		TblInfo:       e.tblInfo,
+		StartTS:       int64(txn.StartTS()),
+		Count:         count,
+		NumBuckets:    defaultBucketCount,
+		ColumnSamples: columnSamples,
+		ColOffsets:    e.colOffsets,
+		IdxRecords:    idxRS,
+		IdxOffsets:    e.idxOffsets,
+		PkRecords:     pkRS,
+		PkOffset:      e.pkOffset,
+	}
+	t, err := statBuilder.NewTable()
+	if err != nil {
+		return errors.Trace(err)
+	}
+	statscache.SetStatisticsTableCache(e.tblInfo.ID, t)
+	tpb, err := t.ToPB()
+	if err != nil {
+		return errors.Trace(err)
+	}
+	m := meta.NewMeta(txn)
+	err = m.SetTableStats(e.tblInfo.ID, tpb)
+	if err != nil {
+		return errors.Trace(err)
+	}
+	return nil
+}
+
+// collectSamples collects sample from the result set, using Reservoir Sampling algorithm.
+// See https://en.wikipedia.org/wiki/Reservoir_sampling
+func collectSamples(e ast.RecordSet) (count int64, samples []*ast.Row, err error) {
+	for {
+		row, err := e.Next()
+		if err != nil {
+			return count, samples, errors.Trace(err)
+		}
+		if row == nil {
+			break
+		}
+		if len(samples) < maxSampleCount {
+			samples = append(samples, row)
+		} else {
+			shouldAdd := rand.Int63n(count) < maxSampleCount
+			if shouldAdd {
+				idx := rand.Intn(maxSampleCount)
+				samples[idx] = row
+			}
+		}
+		count++
+	}
+	return count, samples, nil
+}
+
+func rowsToColumnSamples(rows []*ast.Row) [][]types.Datum {
+	if len(rows) == 0 {
+		return nil
+	}
+	columnSamples := make([][]types.Datum, len(rows[0].Data))
+	for i := range columnSamples {
+		columnSamples[i] = make([]types.Datum, len(rows))
+	}
+	for j, row := range rows {
+		for i, val := range row.Data {
+			columnSamples[i][j] = val
+		}
+	}
+	return columnSamples
+}
diff --git a/executor/analyze_test.go b/executor/analyze_test.go
@@ -0,0 +1,40 @@
+// Copyright 2017 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package executor_test
+
+import (
+	"fmt"
+	"strings"
+
+	. "github.com/pingcap/check"
+	"github.com/pingcap/tidb/util/testkit"
+	"github.com/pingcap/tidb/util/testleak"
+)
+
+func (s *testSuite) TestAnalyzeTable(c *C) {
+	defer testleak.AfterTest(c)()
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t1")
+	tk.MustExec("create table t1 (a int)")
+	tk.MustExec("create index ind_a on t1 (a)")
+	tk.MustExec("insert into t1 (a) values (1)")
+	result := tk.MustQuery("explain select * from t1 where t1.a = 1")
+	rowStr := fmt.Sprintf("%s", result.Rows())
+	c.Check(strings.Split(rowStr, "{")[0], Equals, "[[IndexScan_5 ")
+	tk.MustExec("analyze table t1")
+	result = tk.MustQuery("explain select * from t1 where t1.a = 1")
+	rowStr = fmt.Sprintf("%s", result.Rows())
+	c.Check(strings.Split(rowStr, "{")[0], Equals, "[[TableScan_4 ")
+}
diff --git a/executor/builder.go b/executor/builder.go
@@ -117,6 +117,8 @@ func (b *executorBuilder) build(p plan.Plan) Executor {
 		return b.buildDummyScan(v)
 	case *plan.Cache:
 		return b.buildCache(v)
+	case *plan.Analyze:
+		return b.buildAnalyze(v)
 	default:
 		b.err = ErrUnknownPlan.Gen("Unknown Plan %T", p)
 		return nil
@@ -679,3 +681,24 @@ func (b *executorBuilder) buildCache(v *plan.Cache) Executor {
 		Src:    src,
 	}
 }
+
+func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor {
+	var tblInfo *model.TableInfo
+	if v.Table != nil {
+		tblInfo = v.Table.TableInfo
+	}
+	e := &AnalyzeExec{
+		schema:     v.Schema(),
+		tblInfo:    tblInfo,
+		ctx:        b.ctx,
+		idxOffsets: v.IdxOffsets,
+		colOffsets: v.ColOffsets,
+		pkOffset:   v.PkOffset,
+		Srcs:       make([]Executor, len(v.Children())),
+	}
+	for i, child := range v.Children() {
+		childExec := b.build(child)
+		e.Srcs[i] = childExec
+	}
+	return e
+}
diff --git a/executor/executor_simple.go b/executor/executor_simple.go
@@ -15,7 +15,6 @@ package executor
 
 import (
 	"fmt"
-	"math/rand"
 	"strings"
 
 	"github.com/juju/errors"
@@ -24,16 +23,12 @@ import (
 	"github.com/pingcap/tidb/context"
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/infoschema"
-	"github.com/pingcap/tidb/meta"
 	"github.com/pingcap/tidb/model"
 	"github.com/pingcap/tidb/mysql"
-	"github.com/pingcap/tidb/plan/statistics"
-	"github.com/pingcap/tidb/plan/statscache"
 	"github.com/pingcap/tidb/sessionctx/variable"
 	"github.com/pingcap/tidb/terror"
 	"github.com/pingcap/tidb/util"
 	"github.com/pingcap/tidb/util/sqlexec"
-	"github.com/pingcap/tidb/util/types"
 )
 
 // SimpleExec represents simple statement executor.
@@ -78,8 +73,6 @@ func (e *SimpleExec) Next() (*Row, error) {
 		err = e.executeDropUser(x)
 	case *ast.SetPwdStmt:
 		err = e.executeSetPwd(x)
-	case *ast.AnalyzeTableStmt:
-		err = e.executeAnalyzeTable(x)
 	case *ast.BinlogStmt:
 		// We just ignore it.
 		return nil, nil
@@ -313,115 +306,3 @@ func (e *SimpleExec) executeFlushTable(s *ast.FlushTableStmt) error {
 	// TODO: A dummy implement
 	return nil
 }
-
-func (e *SimpleExec) executeAnalyzeTable(s *ast.AnalyzeTableStmt) error {
-	for _, table := range s.TableNames {
-		err := e.createStatisticsForTable(table)
-		if err != nil {
-			return errors.Trace(err)
-		}
-	}
-	return nil
-}
-
-const (
-	maxSampleCount     = 10000
-	defaultBucketCount = 256
-)
-
-func (e *SimpleExec) createStatisticsForTable(tn *ast.TableName) error {
-	var tableName string
-	if tn.Schema.L == "" {
-		tableName = tn.Name.L
-	} else {
-		tableName = tn.Schema.L + "." + tn.Name.L
-	}
-	sql := "select * from " + tableName
-	result, err := e.ctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(e.ctx, sql)
-	if err != nil {
-		return errors.Trace(err)
-	}
-	count, samples, err := e.collectSamples(result)
-	result.Close()
-	if err != nil {
-		return errors.Trace(err)
-	}
-	err = e.buildStatisticsAndSaveToKV(tn, count, samples)
-	if err != nil {
-		return errors.Trace(err)
-	}
-	return nil
-}
-
-// collectSamples collects sample from the result set, using Reservoir Sampling algorithm.
-// See https://en.wikipedia.org/wiki/Reservoir_sampling
-func (e *SimpleExec) collectSamples(result ast.RecordSet) (count int64, samples []*ast.Row, err error) {
-	for {
-		var row *ast.Row
-		row, err = result.Next()
-		if err != nil {
-			return count, samples, errors.Trace(err)
-		}
-		if row == nil {
-			break
-		}
-		if len(samples) < maxSampleCount {
-			samples = append(samples, row)
-		} else {
-			shouldAdd := rand.Int63n(count) < maxSampleCount
-			if shouldAdd {
-				idx := rand.Intn(maxSampleCount)
-				samples[idx] = row
-			}
-		}
-		count++
-	}
-	return count, samples, nil
-}
-
-func (e *SimpleExec) buildStatisticsAndSaveToKV(tn *ast.TableName, count int64, sampleRows []*ast.Row) error {
-	txn := e.ctx.Txn()
-	statBuilder := &statistics.Builder{
-		Sc:            e.ctx.GetSessionVars().StmtCtx,
-		TblInfo:       tn.TableInfo,
-		StartTS:       int64(txn.StartTS()),
-		Count:         count,
-		NumBuckets:    defaultBucketCount,
-		ColumnSamples: rowsToColumnSamples(sampleRows),
-		PkOffset:      -1,
-	}
-	for i := range statBuilder.ColumnSamples {
-		statBuilder.ColOffsets = append(statBuilder.ColOffsets, i)
-	}
-	t, err := statBuilder.NewTable()
-	if err != nil {
-		return errors.Trace(err)
-	}
-	statscache.SetStatisticsTableCache(tn.TableInfo.ID, t)
-	tpb, err := t.ToPB()
-	if err != nil {
-		return errors.Trace(err)
-	}
-	m := meta.NewMeta(txn)
-	err = m.SetTableStats(tn.TableInfo.ID, tpb)
-	if err != nil {
-		return errors.Trace(err)
-	}
-	return nil
-}
-
-func rowsToColumnSamples(rows []*ast.Row) [][]types.Datum {
-	if len(rows) == 0 {
-		return nil
-	}
-	columnSamples := make([][]types.Datum, len(rows[0].Data))
-	for i := range columnSamples {
-		columnSamples[i] = make([]types.Datum, len(rows))
-	}
-	for j, row := range rows {
-		for i, val := range row.Data {
-			columnSamples[i][j] = val
-		}
-	}
-	return columnSamples
-}