From 93f4780f409efaa3b781b2f61138384ace61abfd Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 24 Apr 2017 17:07:31 +0800 Subject: [PATCH] plan: add variable `countReliable` to physicalPlanInfo (#3011) --- domain/domain.go | 5 ++ executor/explain_test.go | 32 ++++---- plan/logical_plan_test.go | 28 +++++++ plan/match_property.go | 47 +++++++---- plan/physical_plan_builder.go | 112 ++++++++++++++++----------- plan/physical_plan_test.go | 82 ++++++++++++++++++++ plan/plan.go | 4 + session.go | 1 + sessionctx/variable/session.go | 4 + sessionctx/variable/sysvar.go | 1 + sessionctx/variable/tidb_vars.go | 6 ++ sessionctx/varsutil/varsutil.go | 2 + sessionctx/varsutil/varsutil_test.go | 5 ++ statistics/builder.go | 8 +- statistics/histogram.go | 12 +-- statistics/statscache.go | 8 +- statistics/table.go | 10 +-- 17 files changed, 270 insertions(+), 97 deletions(-) diff --git a/domain/domain.go b/domain/domain.go index 4f2a808fcfb40..8e6fb85ce6fba 100644 --- a/domain/domain.go +++ b/domain/domain.go @@ -441,6 +441,11 @@ func (do *Domain) StatsHandle() *statistics.Handle { return do.statsHandle } +// CreateStatsHandle is used only for test. +func (do *Domain) CreateStatsHandle(ctx context.Context) { + do.statsHandle = statistics.NewHandle(ctx) +} + // UpdateTableStatsLoop creates a goroutine loads stats info and updates stats info in a loop. It // should be called only once in BootstrapSession. func (do *Domain) UpdateTableStatsLoop(ctx context.Context) error { diff --git a/executor/explain_test.go b/executor/explain_test.go index c682144248f18..96860740e2101 100644 --- a/executor/explain_test.go +++ b/executor/explain_test.go @@ -174,10 +174,10 @@ func (s *testSuite) TestExplain(c *C) { { "select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1", []string{ - "TableScan_8", "TableScan_10", "HashLeftJoin_7", + "TableScan_7", "TableScan_10", "HashLeftJoin_9", }, []string{ - "HashLeftJoin_7", "HashLeftJoin_7", "", + "HashLeftJoin_9", "HashLeftJoin_9", "", }, []string{ `{ @@ -213,7 +213,7 @@ func (s *testSuite) TestExplain(c *C) { "leftCond": null, "rightCond": null, "otherCond": null, - "leftPlan": "TableScan_8", + "leftPlan": "TableScan_7", "rightPlan": "TableScan_10" }`, }, @@ -282,10 +282,10 @@ func (s *testSuite) TestExplain(c *C) { { "select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1", []string{ - "TableScan_11", "TableScan_12", "HashAgg_13", "HashLeftJoin_10", "Projection_9", + "TableScan_16", "TableScan_10", "HashAgg_11", "HashLeftJoin_15", "Projection_9", }, []string{ - "HashLeftJoin_10", "HashAgg_13", "HashLeftJoin_10", "Projection_9", "", + "HashLeftJoin_15", "HashAgg_11", "HashLeftJoin_15", "Projection_9", "", }, []string{`{ "db": "test", @@ -327,7 +327,7 @@ func (s *testSuite) TestExplain(c *C) { "GroupByItems": [ "[b.c2]" ], - "child": "TableScan_12" + "child": "TableScan_10" }`, `{ "eqCond": [ @@ -336,14 +336,14 @@ func (s *testSuite) TestExplain(c *C) { "leftCond": null, "rightCond": null, "otherCond": null, - "leftPlan": "TableScan_11", - "rightPlan": "HashAgg_13" + "leftPlan": "TableScan_16", + "rightPlan": "HashAgg_11" }`, `{ "exprs": [ "cast(join_agg_0)" ], - "child": "HashLeftJoin_10" + "child": "HashLeftJoin_15" }`, }, }, @@ -619,8 +619,8 @@ func (s *testSuite) TestExplain(c *C) { }, { "select s.c1 from t2 s left outer join t2 t on s.c2 = t.c2 limit 10", - []string{"TableScan_7", "Limit_8", "TableScan_9", "HashLeftJoin_6", "Limit_10", "Projection_4"}, - []string{"Limit_8", "HashLeftJoin_6", "HashLeftJoin_6", "Limit_10", "Projection_4", ""}, + []string{"TableScan_6", "Limit_7", "TableScan_10", "HashLeftJoin_9", "Limit_11", "Projection_4"}, + []string{"Limit_7", "HashLeftJoin_9", "HashLeftJoin_9", "Limit_11", "Projection_4", ""}, []string{ `{ "db": "test", @@ -637,7 +637,7 @@ func (s *testSuite) TestExplain(c *C) { `{ "limit": 10, "offset": 0, - "child": "TableScan_7" + "child": "TableScan_6" }`, `{ "db": "test", @@ -658,19 +658,19 @@ func (s *testSuite) TestExplain(c *C) { "leftCond": null, "rightCond": null, "otherCond": null, - "leftPlan": "Limit_8", - "rightPlan": "TableScan_9" + "leftPlan": "Limit_7", + "rightPlan": "TableScan_10" }`, `{ "limit": 10, "offset": 0, - "child": "HashLeftJoin_6" + "child": "HashLeftJoin_9" }`, `{ "exprs": [ "s.c1" ], - "child": "Limit_10" + "child": "Limit_11" }`, }, }, diff --git a/plan/logical_plan_test.go b/plan/logical_plan_test.go index 52a3cdc265120..4d4b55a236af7 100644 --- a/plan/logical_plan_test.go +++ b/plan/logical_plan_test.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/terror" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/testleak" @@ -362,10 +363,37 @@ func mockContext() context.Context { client: &mockClient{}, } do := &domain.Domain{} + do.CreateStatsHandle(ctx) sessionctx.BindDomain(ctx, do) return ctx } +func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table { + statsTbl := &statistics.Table{ + TableID: tbl.ID, + Count: rowCount, + Columns: make(map[int64]*statistics.Column, len(tbl.Columns)), + Indices: make(map[int64]*statistics.Index, len(tbl.Indices)), + } + return statsTbl +} + +// mockStatsHistogram will create a statistics.Histogram, of which the data is uniform distribution. +func mockStatsHistogram(id int64, values []types.Datum, repeat int64) *statistics.Histogram { + ndv := len(values) + histogram := &statistics.Histogram{ + ID: id, + NDV: int64(ndv), + Buckets: make([]statistics.Bucket, ndv), + } + for i := 0; i < ndv; i++ { + histogram.Buckets[i].Repeats = repeat + histogram.Buckets[i].Count = repeat * int64(i+1) + histogram.Buckets[i].Value = values[i] + } + return histogram +} + func (s *testPlanSuite) TestPredicatePushDown(c *C) { defer testleak.AfterTest(c)() tests := []struct { diff --git a/plan/match_property.go b/plan/match_property.go index 5fbec550679b1..8c66946068955 100644 --- a/plan/match_property.go +++ b/plan/match_property.go @@ -31,7 +31,11 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy newTS := ts.Copy().(*PhysicalTableScan) newTS.addLimit(prop.limit) p := newTS.tryToAddUnionScan(newTS) - return enforceProperty(prop, &physicalPlanInfo{p: p, cost: cost, count: infos[0].count}) + return enforceProperty(prop, &physicalPlanInfo{ + p: p, + cost: cost, + count: infos[0].count, + reliable: infos[0].reliable}) } if len(prop.props) == 1 && ts.pkCol != nil && ts.pkCol.Equal(prop.props[0].col, ts.ctx) { sortedTS := ts.Copy().(*PhysicalTableScan) @@ -44,9 +48,10 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy } p := sortedTS.tryToAddUnionScan(sortedTS) return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{ - p: p, - cost: cost, - count: infos[0].count}) + p: p, + cost: cost, + count: infos[0].count, + reliable: infos[0].reliable}) } if prop.limit != nil { sortedTS := ts.Copy().(*PhysicalTableScan) @@ -59,9 +64,10 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy sortedTS.KeepOrder = true p := sortedTS.tryToAddUnionScan(sortedTS) return enforceProperty(prop, &physicalPlanInfo{ - p: p, - cost: cost, - count: infos[0].count}) + p: p, + cost: cost, + count: infos[0].count, + reliable: infos[0].reliable}) } return &physicalPlanInfo{p: nil, cost: math.MaxFloat64, count: infos[0].count} } @@ -109,7 +115,11 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy } if len(prop.props) == 0 { p := is.tryToAddUnionScan(is) - return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{p: p, cost: cost, count: infos[0].count}) + return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{ + p: p, + cost: cost, + count: infos[0].count, + reliable: infos[0].reliable}) } matchedIdx := 0 matchedList := make([]bool, len(prop.props)) @@ -141,9 +151,10 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy sortedIS.addLimit(prop.limit) p := sortedIS.tryToAddUnionScan(sortedIS) return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{ - p: p, - cost: sortedCost, - count: infos[0].count}) + p: p, + cost: sortedCost, + count: infos[0].count, + reliable: infos[0].reliable}) } } if prop.limit != nil { @@ -157,9 +168,10 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy sortedIS.OutOfOrder = true p := sortedIS.tryToAddUnionScan(sortedIS) return enforceProperty(prop, &physicalPlanInfo{ - p: p, - cost: cost, - count: infos[0].count}) + p: p, + cost: cost, + count: infos[0].count, + reliable: infos[0].reliable}) } return &physicalPlanInfo{p: nil, cost: math.MaxFloat64, count: infos[0].count} } @@ -223,13 +235,15 @@ func (p *Union) matchProperty(_ *requiredProperty, childPlanInfo ...*physicalPla children := make([]Plan, 0, len(childPlanInfo)) cost := float64(0) count := float64(0) + reliable := true for _, res := range childPlanInfo { children = append(children, res.p) cost += res.cost count += res.count + reliable = reliable && res.reliable } np.SetChildren(children...) - return &physicalPlanInfo{p: np, cost: cost, count: count} + return &physicalPlanInfo{p: np, cost: cost, count: count, reliable: reliable} } // matchProperty implements PhysicalPlan matchProperty interface. @@ -254,6 +268,7 @@ func (p *PhysicalUnionScan) matchProperty(prop *requiredProperty, childPlanInfo res.p = np if limit != nil { res = addPlanToResponse(limit, res) + res.reliable = true } return res } @@ -262,7 +277,7 @@ func (p *PhysicalUnionScan) matchProperty(prop *requiredProperty, childPlanInfo func (p *Projection) matchProperty(_ *requiredProperty, childPlanInfo ...*physicalPlanInfo) *physicalPlanInfo { np := p.Copy() np.SetChildren(childPlanInfo[0].p) - return &physicalPlanInfo{p: np, cost: childPlanInfo[0].cost} + return &physicalPlanInfo{p: np, cost: childPlanInfo[0].cost, reliable: childPlanInfo[0].reliable} } // matchProperty implements PhysicalPlan matchProperty interface. diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go index dc2f27d484c4f..3cf9b7cabef4a 100644 --- a/plan/physical_plan_builder.go +++ b/plan/physical_plan_builder.go @@ -102,7 +102,7 @@ func (p *DataSource) convert2TableScan(prop *requiredProperty) (*physicalPlanInf if ts.TableConditionPBExpr != nil { rowCount = rowCount * selectionFactor } - return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount}), nil + return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount, reliable: !statsTbl.Pseudo}), nil } func (p *DataSource) convert2IndexScan(prop *requiredProperty, index *model.IndexInfo) (*physicalPlanInfo, error) { @@ -164,7 +164,7 @@ func (p *DataSource) convert2IndexScan(prop *requiredProperty, index *model.Inde is.Ranges = rb.buildIndexRanges(fullRange, types.NewFieldType(mysql.TypeNull)) } is.DoubleRead = !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle) - return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount}), nil + return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount, reliable: !statsTbl.Pseudo}), nil } func isCoveringIndex(columns []*model.ColumnInfo, indexColumns []*model.IndexColumn, pkIsHandle bool) bool { @@ -279,7 +279,12 @@ func (p *DataSource) tryToConvert2DummyScan(prop *requiredProperty) (*physicalPl func addPlanToResponse(parent PhysicalPlan, info *physicalPlanInfo) *physicalPlanInfo { np := parent.Copy() np.SetChildren(info.p) - return &physicalPlanInfo{p: np, cost: info.cost, count: info.count} + ret := &physicalPlanInfo{p: np, cost: info.cost, count: info.count, reliable: info.reliable} + if _, ok := parent.(*MaxOneRow); ok { + ret.count = 1 + ret.reliable = true + } + return ret } // enforceProperty creates a *physicalPlanInfo that satisfies the required property by adding @@ -303,12 +308,14 @@ func enforceProperty(prop *requiredProperty, info *physicalPlanInfo) *physicalPl count := info.count if prop.limit != nil { count = float64(prop.limit.Offset + prop.limit.Count) + info.reliable = true } info.cost += sortCost(count) } else if prop.limit != nil { limit := Limit{Offset: prop.limit.Offset, Count: prop.limit.Count}.init(info.p.Allocator(), info.p.context()) limit.SetSchema(info.p.Schema()) info = addPlanToResponse(limit, info) + info.reliable = true } if prop.limit != nil && float64(prop.limit.Count) < info.count { info.count = float64(prop.limit.Count) @@ -602,6 +609,18 @@ func (p *LogicalJoin) buildSelectionWithConds(leftAsOuter bool) (*Selection, []* return selection, corCols } +// outerTableCouldINLJ will check the whether is forced to build index nested loop join or outer info is reliable +// and the count satisfies the condition. +func (p *LogicalJoin) outerTableCouldINLJ(outerInfo *physicalPlanInfo, leftAsOuter bool) bool { + var forced bool + if leftAsOuter { + forced = (p.preferINLJ&preferLeftAsOuter) > 0 && p.hasEqualConds() + } else { + forced = (p.preferINLJ&preferRightAsOuter) > 0 && p.hasEqualConds() + } + return forced || (outerInfo.reliable && outerInfo.count <= float64(p.ctx.GetSessionVars().MaxRowCountForINLJ)) +} + func (p *LogicalJoin) convert2IndexNestedLoopJoinLeft(prop *requiredProperty, innerJoin bool) (*physicalPlanInfo, error) { lChild := p.children[0].(LogicalPlan) switch x := p.children[1].(type) { @@ -638,6 +657,11 @@ func (p *LogicalJoin) convert2IndexNestedLoopJoinLeft(prop *requiredProperty, in if lInfo.p == nil { return nil, nil } + // If the outer table's row count is reliable and don't exceed the MaxRowCountForINLJ or we use hint to force + // choosing index nested loop join, we will continue building. Otherwise we just break and return nil. + if !p.outerTableCouldINLJ(lInfo, true) { + return nil, nil + } selection, corCols := p.buildSelectionWithConds(true) if selection == nil { return nil, nil @@ -709,6 +733,11 @@ func (p *LogicalJoin) convert2IndexNestedLoopJoinRight(prop *requiredProperty, i if rInfo.p == nil { return nil, nil } + // If the outer table's row count is reliable and don't exceed the MaxRowCountForINLJ or we use hint to force + // choosing index nested loop join, we will continue building. Otherwise we just break and return nil. + if !p.outerTableCouldINLJ(rInfo, false) { + return nil, nil + } selection, corCols := p.buildSelectionWithConds(false) if selection == nil { return nil, nil @@ -936,15 +965,13 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla break } } - if (p.preferINLJ&preferLeftAsOuter) > 0 && p.hasEqualConds() { - nljInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, false) - if err != nil { - return nil, errors.Trace(err) - } - if nljInfo != nil { - info = nljInfo - break - } + nljInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, false) + if err != nil { + return nil, errors.Trace(err) + } + if nljInfo != nil { + info = nljInfo + break } // Otherwise fall into hash join info, err = p.convert2PhysicalPlanLeft(prop, false) @@ -961,15 +988,13 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla break } } - if (p.preferINLJ&preferRightAsOuter) > 0 && p.hasEqualConds() { - nljInfo, err := p.convert2IndexNestedLoopJoinRight(prop, false) - if err != nil { - return nil, errors.Trace(err) - } - if nljInfo != nil { - info = nljInfo - break - } + nljInfo, err := p.convert2IndexNestedLoopJoinRight(prop, false) + if err != nil { + return nil, errors.Trace(err) + } + if nljInfo != nil { + info = nljInfo + break } info, err = p.convert2PhysicalPlanRight(prop, false) if err != nil { @@ -984,28 +1009,22 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla } break } - if p.preferINLJ > 0 && p.hasEqualConds() { - if (p.preferINLJ & preferLeftAsOuter) > 0 { - lNLJInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, true) - if err != nil { - return nil, errors.Trace(err) - } - if lNLJInfo != nil { - info = lNLJInfo - } - } - if (p.preferINLJ & preferRightAsOuter) > 0 { - rNLJInfo, err := p.convert2IndexNestedLoopJoinRight(prop, true) - if err != nil { - return nil, errors.Trace(err) - } - if info == nil || (rNLJInfo != nil && info.cost > rNLJInfo.cost) { - info = rNLJInfo - } - } - if info != nil { - break - } + lNLJInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, true) + if err != nil { + return nil, errors.Trace(err) + } + rNLJInfo, err := p.convert2IndexNestedLoopJoinRight(prop, true) + if err != nil { + return nil, errors.Trace(err) + } + if lNLJInfo != nil { + info = lNLJInfo + } + if info == nil || (rNLJInfo != nil && info.cost > rNLJInfo.cost) { + info = rNLJInfo + } + if info != nil { + break } // fall back to hash join lInfo, err := p.convert2PhysicalPlanLeft(prop, true) @@ -1314,9 +1333,10 @@ func (p *Selection) appendSelToInfo(info *physicalPlanInfo) *physicalPlanInfo { np := p.Copy().(*Selection) np.SetChildren(info.p) return &physicalPlanInfo{ - p: np, - cost: info.cost, - count: info.count * selectionFactor, + p: np, + cost: info.cost, + count: info.count * selectionFactor, + reliable: info.reliable, } } diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go index 858999ccf367c..bbaf103776635 100644 --- a/plan/physical_plan_test.go +++ b/plan/physical_plan_test.go @@ -21,7 +21,9 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/mysql" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/variable" + "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/testleak" "github.com/pingcap/tidb/util/types" @@ -1147,3 +1149,83 @@ func (s *testPlanSuite) TestJoinAlgorithm(c *C) { c.Assert(ToString(pp), Equals, tt.ans, Commentf("for %s", tt.sql)) } } + +func (s *testPlanSuite) TestAutoJoinChosen(c *C) { + defer testleak.AfterTest(c)() + cases := []struct { + sql string + ans string + genStatsTbl bool + }{ + { + sql: "select * from (select * from t limit 0, 128) t1 join t t2 on t1.a = t2.a", + ans: "Apply{Table(t)->Limit->Table(t)->Selection}", + }, + { + sql: "select * from (select * from t limit 0, 129) t1 join t t2 on t1.a = t2.a", + ans: "RightHashJoin{Table(t)->Limit->Table(t)}(t1.a,t2.a)", + }, + { + sql: "select * from (select * from t limit 0, 10 union select * from t limit 10, 100) t1 join t t2 on t1.a = t2.a", + ans: "Apply{UnionAll{Table(t)->Limit->Projection->Table(t)->Limit->Projection}->HashAgg->Table(t)->Selection}", + }, + { + sql: "select * from (select * from t limit 0, 29 union all select * from t limit 0, 100) t1 join t t2 on t1.a = t2.a", + ans: "RightHashJoin{UnionAll{Table(t)->Limit->Table(t)->Limit}->Table(t)}(t1.a,t2.a)", + }, + { + sql: "select * from t t1 join t t2 on t1.f = t2.f and t1.a < 5", + ans: "Apply{Table(t)->Index(t.f)[]->Selection}", + genStatsTbl: true, + }, + { + sql: "select * from t t1 join t t2 on t1.f = t2.f and t1.a < 19", + ans: "RightHashJoin{Table(t)->Table(t)}(t1.f,t2.f)", + genStatsTbl: true, + }, + } + for _, ca := range cases { + comment := Commentf("for %s", ca.sql) + stmt, err := s.ParseOneStmt(ca.sql, "", "") + c.Assert(err, IsNil, comment) + ast.SetFlag(stmt) + + is, err := mockResolve(stmt) + c.Assert(err, IsNil) + + ctx := mockContext() + + if ca.genStatsTbl { + handle := sessionctx.GetDomain(ctx).StatsHandle() + tb, _ := is.TableByID(0) + tbl := tb.Meta() + // generate 40 distinct values for pk. + pkValues := make([]types.Datum, 40) + for i := 0; i < 40; i++ { + pkValues[i] = types.NewIntDatum(int64(i)) + } + // make the statistic col info for pk, every distinct value occurs 10 times. + pkStatsCol := &statistics.Column{Histogram: *mockStatsHistogram(1, pkValues, 10)} + // mock the statistic table and set the value of pk column. + statsTbl := mockStatsTable(tbl, 400) + statsTbl.Columns[1] = pkStatsCol + handle.UpdateTableStats([]*statistics.Table{statsTbl}, nil) + } + + builder := &planBuilder{ + allocator: new(idAllocator), + ctx: ctx, + colMapper: make(map[*ast.ColumnNameExpr]int), + is: is, + } + p := builder.build(stmt) + c.Assert(builder.err, IsNil) + pp, err := doOptimize(builder.optFlag, p.(LogicalPlan), builder.ctx, builder.allocator) + c.Assert(err, IsNil) + c.Assert(ToString(pp), Equals, ca.ans, Commentf("for %s", ca.sql)) + + if ca.genStatsTbl { + sessionctx.GetDomain(ctx).StatsHandle().Clear() + } + } +} diff --git a/plan/plan.go b/plan/plan.go index dbafd83cead86..8a3575f0ab4f5 100644 --- a/plan/plan.go +++ b/plan/plan.go @@ -131,6 +131,10 @@ type physicalPlanInfo struct { p PhysicalPlan cost float64 count float64 + + // If the count is calculated by pseudo table, it's not reliable. Otherwise it's reliable. + // But if we has limit or maxOneRow, the count is reliable. + reliable bool } // LogicalPlan is a tree of logical operators. diff --git a/session.go b/session.go index 886e1ef9fe692..39d371b1b2065 100644 --- a/session.go +++ b/session.go @@ -981,6 +981,7 @@ const loadCommonGlobalVarsSQL = "select * from mysql.global_variables where vari variable.TiDBIndexLookupSize + quoteCommaQuote + variable.TiDBIndexLookupConcurrency + quoteCommaQuote + variable.TiDBIndexSerialScanConcurrency + quoteCommaQuote + + variable.TiDBMaxRowCountForINLJ + quoteCommaQuote + variable.TiDBDistSQLScanConcurrency + "')" // LoadCommonGlobalVariableIfNeeded loads and applies commonly used global variables for the session. diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 1d766358222aa..25c78ddfefab3 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -197,6 +197,9 @@ type SessionVars struct { // Should we split insert data into multiple batches. BatchInsert bool + + // Max row count that the outer table of index nested loop join could be without force hint. + MaxRowCountForINLJ int } // NewSessionVars creates a session vars object. @@ -217,6 +220,7 @@ func NewSessionVars() *SessionVars { IndexLookupConcurrency: DefIndexLookupConcurrency, IndexSerialScanConcurrency: DefIndexSerialScanConcurrency, DistSQLScanConcurrency: DefDistSQLScanConcurrency, + MaxRowCountForINLJ: DefMaxRowCountForINLJ, } } diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 0fc1d87659476..e3ed5436b8f08 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -600,6 +600,7 @@ var defaultSysVars = []*SysVar{ {ScopeGlobal | ScopeSession, TiDBIndexLookupSize, strconv.Itoa(DefIndexLookupSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupConcurrency, strconv.Itoa(DefIndexLookupConcurrency)}, {ScopeGlobal | ScopeSession, TiDBIndexSerialScanConcurrency, strconv.Itoa(DefIndexSerialScanConcurrency)}, + {ScopeGlobal | ScopeSession, TiDBMaxRowCountForINLJ, strconv.Itoa(DefMaxRowCountForINLJ)}, {ScopeGlobal | ScopeSession, TiDBSkipDDLWait, boolToIntStr(DefSkipDDLWait)}, {ScopeGlobal | ScopeSession, TiDBSkipUTF8Check, boolToIntStr(DefSkipUTF8Check)}, {ScopeSession, TiDBBatchInsert, boolToIntStr(DefBatchInsert)}, diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index eeb4b80541620..5e94dc57bd260 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -87,6 +87,11 @@ const ( // tidb_batch_insert is used to enable/disable auto-split insert data. If set this option on, insert executor will automatically // insert data into multiple batches and use a single txn for each batch. This will be helpful when inserting large data. TiDBBatchInsert = "tidb_batch_insert" + + // tidb_max_row_count_for_inlj is used when do index nested loop join. + // It controls the max row count of outer table when do index nested loop join without hint. + // After the row count of the inner table is accurate, this variable will be removed. + TiDBMaxRowCountForINLJ = "tidb_max_row_count_for_inlj" ) // Default TiDB system variable values. @@ -96,6 +101,7 @@ const ( DefIndexLookupSize = 20000 DefDistSQLScanConcurrency = 10 DefBuildStatsConcurrency = 4 + DefMaxRowCountForINLJ = 128 DefSkipDDLWait = false DefSkipUTF8Check = false DefOptAggPushDown = true diff --git a/sessionctx/varsutil/varsutil.go b/sessionctx/varsutil/varsutil.go index 5d92bc59dbc41..5c035f7f2e307 100644 --- a/sessionctx/varsutil/varsutil.go +++ b/sessionctx/varsutil/varsutil.go @@ -134,6 +134,8 @@ func SetSessionSystemVar(vars *variable.SessionVars, name string, value types.Da vars.IndexSerialScanConcurrency = tidbOptPositiveInt(sVal, variable.DefIndexSerialScanConcurrency) case variable.TiDBBatchInsert: vars.BatchInsert = tidbOptOn(sVal) + case variable.TiDBMaxRowCountForINLJ: + vars.MaxRowCountForINLJ = tidbOptPositiveInt(sVal, variable.DefMaxRowCountForINLJ) } vars.Systems[name] = sVal return nil diff --git a/sessionctx/varsutil/varsutil_test.go b/sessionctx/varsutil/varsutil_test.go index a40e417f60d1e..375dea7ba11c3 100644 --- a/sessionctx/varsutil/varsutil_test.go +++ b/sessionctx/varsutil/varsutil_test.go @@ -147,6 +147,11 @@ func (s *testVarsutilSuite) TestVarsutil(c *C) { c.Assert(v.BatchInsert, IsFalse) SetSessionSystemVar(v, variable.TiDBBatchInsert, types.NewStringDatum("1")) c.Assert(v.BatchInsert, IsTrue) + + //Test case for tidb_max_row_count_for_inlj. + c.Assert(v.MaxRowCountForINLJ, Equals, 128) + SetSessionSystemVar(v, variable.TiDBMaxRowCountForINLJ, types.NewStringDatum("127")) + c.Assert(v.MaxRowCountForINLJ, Equals, 127) } type mockGlobalAccessor struct { diff --git a/statistics/builder.go b/statistics/builder.go index cc13e05583afc..d2178d1b742fe 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -35,7 +35,7 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R hg := &Histogram{ ID: id, NDV: 0, - Buckets: make([]bucket, 1, numBuckets), + Buckets: make([]Bucket, 1, numBuckets), } var valuesPerBucket, lastNumber, bucketIdx int64 = 1, 0, 0 count := int64(0) @@ -95,7 +95,7 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R } else { lastNumber = hg.Buckets[bucketIdx].Count bucketIdx++ - hg.Buckets = append(hg.Buckets, bucket{ + hg.Buckets = append(hg.Buckets, Bucket{ Count: lastNumber + 1, Value: data, Repeats: 1, @@ -123,7 +123,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int hg := &Histogram{ ID: id, NDV: ndv, - Buckets: make([]bucket, 1, numBuckets), + Buckets: make([]Bucket, 1, numBuckets), } valuesPerBucket := float64(count)/float64(numBuckets) + 1 @@ -160,7 +160,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int lastCount = hg.Buckets[bucketIdx].Count // The bucket is full, store the item in the next bucket. bucketIdx++ - hg.Buckets = append(hg.Buckets, bucket{ + hg.Buckets = append(hg.Buckets, Bucket{ Count: int64(totalCount), Value: samples[i], Repeats: int64(ndvFactor), diff --git a/statistics/histogram.go b/statistics/histogram.go index 907d0fe3b383e..c33f399b9c9c4 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -35,10 +35,10 @@ type Histogram struct { // LastUpdateVersion is the version that this histogram updated last time. LastUpdateVersion uint64 - Buckets []bucket + Buckets []Bucket } -// bucket is an element of histogram. +// Bucket is an element of histogram. // // A bucket count is the number of items stored in all previous buckets and the current bucket. // bucket numbers are always in increasing order. @@ -47,7 +47,7 @@ type Histogram struct { // // Repeat is the number of repeats of the bucket value, it can be used to find popular values. // -type bucket struct { +type Bucket struct { Count int64 Value types.Datum Repeats int64 @@ -109,7 +109,7 @@ func (h *Handle) histogramFromStorage(tableID int64, colID int64, tp *types.Fiel ID: colID, NDV: distinct, LastUpdateVersion: ver, - Buckets: make([]bucket, bucketSize), + Buckets: make([]Bucket, bucketSize), } for i := 0; i < bucketSize; i++ { bucketID := rows[i].Data[0].GetInt64() @@ -124,7 +124,7 @@ func (h *Handle) histogramFromStorage(tableID int64, colID int64, tp *types.Fiel return nil, errors.Trace(err) } } - hg.Buckets[bucketID] = bucket{ + hg.Buckets[bucketID] = Bucket{ Count: count, Value: value, Repeats: repeats, @@ -254,7 +254,7 @@ func (hg *Histogram) lowerBound(sc *variable.StatementContext, target types.Datu func (hg *Histogram) mergeBuckets(bucketIdx int64) { curBuck := 0 for i := int64(0); i+1 <= bucketIdx; i += 2 { - hg.Buckets[curBuck] = bucket{ + hg.Buckets[curBuck] = Bucket{ Count: hg.Buckets[i+1].Count, Value: hg.Buckets[i+1].Value, Repeats: hg.Buckets[i+1].Repeats, diff --git a/statistics/statscache.go b/statistics/statscache.go index 4d2ed9401ceac..0151138500b09 100644 --- a/statistics/statscache.go +++ b/statistics/statscache.go @@ -98,7 +98,7 @@ func (h *Handle) Update(is infoschema.InfoSchema) error { tables = append(tables, tbl) h.LastVersion = version } - h.updateTableStats(tables, deletedTableIDs) + h.UpdateTableStats(tables, deletedTableIDs) return nil } @@ -120,11 +120,11 @@ func (h *Handle) copyFromOldCache() statsCache { return newCache } -// updateTableStats updates the statistics table cache using copy on write. -func (h *Handle) updateTableStats(tables []*Table, deletedIDs []int64) { +// UpdateTableStats updates the statistics table cache using copy on write. +func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64) { newCache := h.copyFromOldCache() for _, tbl := range tables { - id := tbl.tableID + id := tbl.TableID newCache[id] = tbl } for _, id := range deletedIDs { diff --git a/statistics/table.go b/statistics/table.go index 03cdd1c9c5463..254fec305ea0e 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -41,7 +41,7 @@ const ( // Table represents statistics for a table. type Table struct { - tableID int64 + TableID int64 Columns map[int64]*Column Indices map[int64]*Index Count int64 // Total row count in a table. @@ -50,7 +50,7 @@ type Table struct { func (t *Table) copy() *Table { nt := &Table{ - tableID: t.tableID, + TableID: t.TableID, Count: t.Count, Pseudo: t.Pseudo, Columns: make(map[int64]*Column), @@ -77,7 +77,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, count int64) // We copy it before writing to avoid race. table = table.copy() } - table.tableID = tableInfo.ID + table.TableID = tableInfo.ID table.Count = count selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version from mysql.stats_histograms where table_id = %d", tableInfo.ID) @@ -144,7 +144,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, count int64) // String implements Stringer interface. func (t *Table) String() string { strs := make([]string, 0, len(t.Columns)+1) - strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.tableID, t.Count)) + strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.TableID, t.Count)) for _, col := range t.Columns { strs = append(strs, col.String()) } @@ -215,7 +215,7 @@ func (t *Table) GetRowCountByIndexRanges(sc *variable.StatementContext, idxID in // PseudoTable creates a pseudo table statistics when statistic can not be found in KV store. func PseudoTable(tableID int64) *Table { - t := &Table{tableID: tableID, Pseudo: true} + t := &Table{TableID: tableID, Pseudo: true} t.Count = pseudoRowCount t.Columns = make(map[int64]*Column) t.Indices = make(map[int64]*Index)