From 93f4780f409efaa3b781b2f61138384ace61abfd Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@users.noreply.github.com>
Date: Mon, 24 Apr 2017 17:07:31 +0800
Subject: [PATCH] plan: add variable `countReliable` to physicalPlanInfo
 (#3011)

---
 domain/domain.go                     |   5 ++
 executor/explain_test.go             |  32 ++++----
 plan/logical_plan_test.go            |  28 +++++++
 plan/match_property.go               |  47 +++++++----
 plan/physical_plan_builder.go        | 112 ++++++++++++++++-----------
 plan/physical_plan_test.go           |  82 ++++++++++++++++++++
 plan/plan.go                         |   4 +
 session.go                           |   1 +
 sessionctx/variable/session.go       |   4 +
 sessionctx/variable/sysvar.go        |   1 +
 sessionctx/variable/tidb_vars.go     |   6 ++
 sessionctx/varsutil/varsutil.go      |   2 +
 sessionctx/varsutil/varsutil_test.go |   5 ++
 statistics/builder.go                |   8 +-
 statistics/histogram.go              |  12 +--
 statistics/statscache.go             |   8 +-
 statistics/table.go                  |  10 +--
 17 files changed, 270 insertions(+), 97 deletions(-)

diff --git a/domain/domain.go b/domain/domain.go
index 4f2a808fcfb40..8e6fb85ce6fba 100644
--- a/domain/domain.go
+++ b/domain/domain.go
@@ -441,6 +441,11 @@ func (do *Domain) StatsHandle() *statistics.Handle {
 	return do.statsHandle
 }
 
+// CreateStatsHandle is used only for test.
+func (do *Domain) CreateStatsHandle(ctx context.Context) {
+	do.statsHandle = statistics.NewHandle(ctx)
+}
+
 // UpdateTableStatsLoop creates a goroutine loads stats info and updates stats info in a loop. It
 // should be called only once in BootstrapSession.
 func (do *Domain) UpdateTableStatsLoop(ctx context.Context) error {
diff --git a/executor/explain_test.go b/executor/explain_test.go
index c682144248f18..96860740e2101 100644
--- a/executor/explain_test.go
+++ b/executor/explain_test.go
@@ -174,10 +174,10 @@ func (s *testSuite) TestExplain(c *C) {
 		{
 			"select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1",
 			[]string{
-				"TableScan_8", "TableScan_10", "HashLeftJoin_7",
+				"TableScan_7", "TableScan_10", "HashLeftJoin_9",
 			},
 			[]string{
-				"HashLeftJoin_7", "HashLeftJoin_7", "",
+				"HashLeftJoin_9", "HashLeftJoin_9", "",
 			},
 			[]string{
 				`{
@@ -213,7 +213,7 @@ func (s *testSuite) TestExplain(c *C) {
     "leftCond": null,
     "rightCond": null,
     "otherCond": null,
-    "leftPlan": "TableScan_8",
+    "leftPlan": "TableScan_7",
     "rightPlan": "TableScan_10"
 }`,
 			},
@@ -282,10 +282,10 @@ func (s *testSuite) TestExplain(c *C) {
 		{
 			"select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1",
 			[]string{
-				"TableScan_11", "TableScan_12", "HashAgg_13", "HashLeftJoin_10", "Projection_9",
+				"TableScan_16", "TableScan_10", "HashAgg_11", "HashLeftJoin_15", "Projection_9",
 			},
 			[]string{
-				"HashLeftJoin_10", "HashAgg_13", "HashLeftJoin_10", "Projection_9", "",
+				"HashLeftJoin_15", "HashAgg_11", "HashLeftJoin_15", "Projection_9", "",
 			},
 			[]string{`{
     "db": "test",
@@ -327,7 +327,7 @@ func (s *testSuite) TestExplain(c *C) {
     "GroupByItems": [
         "[b.c2]"
     ],
-    "child": "TableScan_12"
+    "child": "TableScan_10"
 }`,
 				`{
     "eqCond": [
@@ -336,14 +336,14 @@ func (s *testSuite) TestExplain(c *C) {
     "leftCond": null,
     "rightCond": null,
     "otherCond": null,
-    "leftPlan": "TableScan_11",
-    "rightPlan": "HashAgg_13"
+    "leftPlan": "TableScan_16",
+    "rightPlan": "HashAgg_11"
 }`,
 				`{
     "exprs": [
         "cast(join_agg_0)"
     ],
-    "child": "HashLeftJoin_10"
+    "child": "HashLeftJoin_15"
 }`,
 			},
 		},
@@ -619,8 +619,8 @@ func (s *testSuite) TestExplain(c *C) {
 		},
 		{
 			"select s.c1 from t2 s left outer join t2 t on s.c2 = t.c2 limit 10",
-			[]string{"TableScan_7", "Limit_8", "TableScan_9", "HashLeftJoin_6", "Limit_10", "Projection_4"},
-			[]string{"Limit_8", "HashLeftJoin_6", "HashLeftJoin_6", "Limit_10", "Projection_4", ""},
+			[]string{"TableScan_6", "Limit_7", "TableScan_10", "HashLeftJoin_9", "Limit_11", "Projection_4"},
+			[]string{"Limit_7", "HashLeftJoin_9", "HashLeftJoin_9", "Limit_11", "Projection_4", ""},
 			[]string{
 				`{
     "db": "test",
@@ -637,7 +637,7 @@ func (s *testSuite) TestExplain(c *C) {
 				`{
     "limit": 10,
     "offset": 0,
-    "child": "TableScan_7"
+    "child": "TableScan_6"
 }`,
 				`{
     "db": "test",
@@ -658,19 +658,19 @@ func (s *testSuite) TestExplain(c *C) {
     "leftCond": null,
     "rightCond": null,
     "otherCond": null,
-    "leftPlan": "Limit_8",
-    "rightPlan": "TableScan_9"
+    "leftPlan": "Limit_7",
+    "rightPlan": "TableScan_10"
 }`,
 				`{
     "limit": 10,
     "offset": 0,
-    "child": "HashLeftJoin_6"
+    "child": "HashLeftJoin_9"
 }`,
 				`{
     "exprs": [
         "s.c1"
     ],
-    "child": "Limit_10"
+    "child": "Limit_11"
 }`,
 			},
 		},
diff --git a/plan/logical_plan_test.go b/plan/logical_plan_test.go
index 52a3cdc265120..4d4b55a236af7 100644
--- a/plan/logical_plan_test.go
+++ b/plan/logical_plan_test.go
@@ -27,6 +27,7 @@ import (
 	"github.com/pingcap/tidb/mysql"
 	"github.com/pingcap/tidb/parser"
 	"github.com/pingcap/tidb/sessionctx"
+	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/terror"
 	"github.com/pingcap/tidb/util/mock"
 	"github.com/pingcap/tidb/util/testleak"
@@ -362,10 +363,37 @@ func mockContext() context.Context {
 		client: &mockClient{},
 	}
 	do := &domain.Domain{}
+	do.CreateStatsHandle(ctx)
 	sessionctx.BindDomain(ctx, do)
 	return ctx
 }
 
+func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table {
+	statsTbl := &statistics.Table{
+		TableID: tbl.ID,
+		Count:   rowCount,
+		Columns: make(map[int64]*statistics.Column, len(tbl.Columns)),
+		Indices: make(map[int64]*statistics.Index, len(tbl.Indices)),
+	}
+	return statsTbl
+}
+
+// mockStatsHistogram will create a statistics.Histogram, of which the data is uniform distribution.
+func mockStatsHistogram(id int64, values []types.Datum, repeat int64) *statistics.Histogram {
+	ndv := len(values)
+	histogram := &statistics.Histogram{
+		ID:      id,
+		NDV:     int64(ndv),
+		Buckets: make([]statistics.Bucket, ndv),
+	}
+	for i := 0; i < ndv; i++ {
+		histogram.Buckets[i].Repeats = repeat
+		histogram.Buckets[i].Count = repeat * int64(i+1)
+		histogram.Buckets[i].Value = values[i]
+	}
+	return histogram
+}
+
 func (s *testPlanSuite) TestPredicatePushDown(c *C) {
 	defer testleak.AfterTest(c)()
 	tests := []struct {
diff --git a/plan/match_property.go b/plan/match_property.go
index 5fbec550679b1..8c66946068955 100644
--- a/plan/match_property.go
+++ b/plan/match_property.go
@@ -31,7 +31,11 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy
 		newTS := ts.Copy().(*PhysicalTableScan)
 		newTS.addLimit(prop.limit)
 		p := newTS.tryToAddUnionScan(newTS)
-		return enforceProperty(prop, &physicalPlanInfo{p: p, cost: cost, count: infos[0].count})
+		return enforceProperty(prop, &physicalPlanInfo{
+			p:        p,
+			cost:     cost,
+			count:    infos[0].count,
+			reliable: infos[0].reliable})
 	}
 	if len(prop.props) == 1 && ts.pkCol != nil && ts.pkCol.Equal(prop.props[0].col, ts.ctx) {
 		sortedTS := ts.Copy().(*PhysicalTableScan)
@@ -44,9 +48,10 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy
 		}
 		p := sortedTS.tryToAddUnionScan(sortedTS)
 		return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{
-			p:     p,
-			cost:  cost,
-			count: infos[0].count})
+			p:        p,
+			cost:     cost,
+			count:    infos[0].count,
+			reliable: infos[0].reliable})
 	}
 	if prop.limit != nil {
 		sortedTS := ts.Copy().(*PhysicalTableScan)
@@ -59,9 +64,10 @@ func (ts *PhysicalTableScan) matchProperty(prop *requiredProperty, infos ...*phy
 		sortedTS.KeepOrder = true
 		p := sortedTS.tryToAddUnionScan(sortedTS)
 		return enforceProperty(prop, &physicalPlanInfo{
-			p:     p,
-			cost:  cost,
-			count: infos[0].count})
+			p:        p,
+			cost:     cost,
+			count:    infos[0].count,
+			reliable: infos[0].reliable})
 	}
 	return &physicalPlanInfo{p: nil, cost: math.MaxFloat64, count: infos[0].count}
 }
@@ -109,7 +115,11 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy
 	}
 	if len(prop.props) == 0 {
 		p := is.tryToAddUnionScan(is)
-		return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{p: p, cost: cost, count: infos[0].count})
+		return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{
+			p:        p,
+			cost:     cost,
+			count:    infos[0].count,
+			reliable: infos[0].reliable})
 	}
 	matchedIdx := 0
 	matchedList := make([]bool, len(prop.props))
@@ -141,9 +151,10 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy
 			sortedIS.addLimit(prop.limit)
 			p := sortedIS.tryToAddUnionScan(sortedIS)
 			return enforceProperty(&requiredProperty{limit: prop.limit}, &physicalPlanInfo{
-				p:     p,
-				cost:  sortedCost,
-				count: infos[0].count})
+				p:        p,
+				cost:     sortedCost,
+				count:    infos[0].count,
+				reliable: infos[0].reliable})
 		}
 	}
 	if prop.limit != nil {
@@ -157,9 +168,10 @@ func (is *PhysicalIndexScan) matchProperty(prop *requiredProperty, infos ...*phy
 		sortedIS.OutOfOrder = true
 		p := sortedIS.tryToAddUnionScan(sortedIS)
 		return enforceProperty(prop, &physicalPlanInfo{
-			p:     p,
-			cost:  cost,
-			count: infos[0].count})
+			p:        p,
+			cost:     cost,
+			count:    infos[0].count,
+			reliable: infos[0].reliable})
 	}
 	return &physicalPlanInfo{p: nil, cost: math.MaxFloat64, count: infos[0].count}
 }
@@ -223,13 +235,15 @@ func (p *Union) matchProperty(_ *requiredProperty, childPlanInfo ...*physicalPla
 	children := make([]Plan, 0, len(childPlanInfo))
 	cost := float64(0)
 	count := float64(0)
+	reliable := true
 	for _, res := range childPlanInfo {
 		children = append(children, res.p)
 		cost += res.cost
 		count += res.count
+		reliable = reliable && res.reliable
 	}
 	np.SetChildren(children...)
-	return &physicalPlanInfo{p: np, cost: cost, count: count}
+	return &physicalPlanInfo{p: np, cost: cost, count: count, reliable: reliable}
 }
 
 // matchProperty implements PhysicalPlan matchProperty interface.
@@ -254,6 +268,7 @@ func (p *PhysicalUnionScan) matchProperty(prop *requiredProperty, childPlanInfo
 	res.p = np
 	if limit != nil {
 		res = addPlanToResponse(limit, res)
+		res.reliable = true
 	}
 	return res
 }
@@ -262,7 +277,7 @@ func (p *PhysicalUnionScan) matchProperty(prop *requiredProperty, childPlanInfo
 func (p *Projection) matchProperty(_ *requiredProperty, childPlanInfo ...*physicalPlanInfo) *physicalPlanInfo {
 	np := p.Copy()
 	np.SetChildren(childPlanInfo[0].p)
-	return &physicalPlanInfo{p: np, cost: childPlanInfo[0].cost}
+	return &physicalPlanInfo{p: np, cost: childPlanInfo[0].cost, reliable: childPlanInfo[0].reliable}
 }
 
 // matchProperty implements PhysicalPlan matchProperty interface.
diff --git a/plan/physical_plan_builder.go b/plan/physical_plan_builder.go
index dc2f27d484c4f..3cf9b7cabef4a 100644
--- a/plan/physical_plan_builder.go
+++ b/plan/physical_plan_builder.go
@@ -102,7 +102,7 @@ func (p *DataSource) convert2TableScan(prop *requiredProperty) (*physicalPlanInf
 	if ts.TableConditionPBExpr != nil {
 		rowCount = rowCount * selectionFactor
 	}
-	return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount}), nil
+	return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount, reliable: !statsTbl.Pseudo}), nil
 }
 
 func (p *DataSource) convert2IndexScan(prop *requiredProperty, index *model.IndexInfo) (*physicalPlanInfo, error) {
@@ -164,7 +164,7 @@ func (p *DataSource) convert2IndexScan(prop *requiredProperty, index *model.Inde
 		is.Ranges = rb.buildIndexRanges(fullRange, types.NewFieldType(mysql.TypeNull))
 	}
 	is.DoubleRead = !isCoveringIndex(is.Columns, is.Index.Columns, is.Table.PKIsHandle)
-	return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount}), nil
+	return resultPlan.matchProperty(prop, &physicalPlanInfo{count: rowCount, reliable: !statsTbl.Pseudo}), nil
 }
 
 func isCoveringIndex(columns []*model.ColumnInfo, indexColumns []*model.IndexColumn, pkIsHandle bool) bool {
@@ -279,7 +279,12 @@ func (p *DataSource) tryToConvert2DummyScan(prop *requiredProperty) (*physicalPl
 func addPlanToResponse(parent PhysicalPlan, info *physicalPlanInfo) *physicalPlanInfo {
 	np := parent.Copy()
 	np.SetChildren(info.p)
-	return &physicalPlanInfo{p: np, cost: info.cost, count: info.count}
+	ret := &physicalPlanInfo{p: np, cost: info.cost, count: info.count, reliable: info.reliable}
+	if _, ok := parent.(*MaxOneRow); ok {
+		ret.count = 1
+		ret.reliable = true
+	}
+	return ret
 }
 
 // enforceProperty creates a *physicalPlanInfo that satisfies the required property by adding
@@ -303,12 +308,14 @@ func enforceProperty(prop *requiredProperty, info *physicalPlanInfo) *physicalPl
 		count := info.count
 		if prop.limit != nil {
 			count = float64(prop.limit.Offset + prop.limit.Count)
+			info.reliable = true
 		}
 		info.cost += sortCost(count)
 	} else if prop.limit != nil {
 		limit := Limit{Offset: prop.limit.Offset, Count: prop.limit.Count}.init(info.p.Allocator(), info.p.context())
 		limit.SetSchema(info.p.Schema())
 		info = addPlanToResponse(limit, info)
+		info.reliable = true
 	}
 	if prop.limit != nil && float64(prop.limit.Count) < info.count {
 		info.count = float64(prop.limit.Count)
@@ -602,6 +609,18 @@ func (p *LogicalJoin) buildSelectionWithConds(leftAsOuter bool) (*Selection, []*
 	return selection, corCols
 }
 
+// outerTableCouldINLJ will check the whether is forced to build index nested loop join or outer info is reliable
+// and the count satisfies the condition.
+func (p *LogicalJoin) outerTableCouldINLJ(outerInfo *physicalPlanInfo, leftAsOuter bool) bool {
+	var forced bool
+	if leftAsOuter {
+		forced = (p.preferINLJ&preferLeftAsOuter) > 0 && p.hasEqualConds()
+	} else {
+		forced = (p.preferINLJ&preferRightAsOuter) > 0 && p.hasEqualConds()
+	}
+	return forced || (outerInfo.reliable && outerInfo.count <= float64(p.ctx.GetSessionVars().MaxRowCountForINLJ))
+}
+
 func (p *LogicalJoin) convert2IndexNestedLoopJoinLeft(prop *requiredProperty, innerJoin bool) (*physicalPlanInfo, error) {
 	lChild := p.children[0].(LogicalPlan)
 	switch x := p.children[1].(type) {
@@ -638,6 +657,11 @@ func (p *LogicalJoin) convert2IndexNestedLoopJoinLeft(prop *requiredProperty, in
 	if lInfo.p == nil {
 		return nil, nil
 	}
+	// If the outer table's row count is reliable and don't exceed the MaxRowCountForINLJ or we use hint to force
+	// choosing index nested loop join, we will continue building. Otherwise we just break and return nil.
+	if !p.outerTableCouldINLJ(lInfo, true) {
+		return nil, nil
+	}
 	selection, corCols := p.buildSelectionWithConds(true)
 	if selection == nil {
 		return nil, nil
@@ -709,6 +733,11 @@ func (p *LogicalJoin) convert2IndexNestedLoopJoinRight(prop *requiredProperty, i
 	if rInfo.p == nil {
 		return nil, nil
 	}
+	// If the outer table's row count is reliable and don't exceed the MaxRowCountForINLJ or we use hint to force
+	// choosing index nested loop join, we will continue building. Otherwise we just break and return nil.
+	if !p.outerTableCouldINLJ(rInfo, false) {
+		return nil, nil
+	}
 	selection, corCols := p.buildSelectionWithConds(false)
 	if selection == nil {
 		return nil, nil
@@ -936,15 +965,13 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla
 				break
 			}
 		}
-		if (p.preferINLJ&preferLeftAsOuter) > 0 && p.hasEqualConds() {
-			nljInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, false)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			if nljInfo != nil {
-				info = nljInfo
-				break
-			}
+		nljInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, false)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if nljInfo != nil {
+			info = nljInfo
+			break
 		}
 		// Otherwise fall into hash join
 		info, err = p.convert2PhysicalPlanLeft(prop, false)
@@ -961,15 +988,13 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla
 				break
 			}
 		}
-		if (p.preferINLJ&preferRightAsOuter) > 0 && p.hasEqualConds() {
-			nljInfo, err := p.convert2IndexNestedLoopJoinRight(prop, false)
-			if err != nil {
-				return nil, errors.Trace(err)
-			}
-			if nljInfo != nil {
-				info = nljInfo
-				break
-			}
+		nljInfo, err := p.convert2IndexNestedLoopJoinRight(prop, false)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if nljInfo != nil {
+			info = nljInfo
+			break
 		}
 		info, err = p.convert2PhysicalPlanRight(prop, false)
 		if err != nil {
@@ -984,28 +1009,22 @@ func (p *LogicalJoin) convert2PhysicalPlan(prop *requiredProperty) (*physicalPla
 			}
 			break
 		}
-		if p.preferINLJ > 0 && p.hasEqualConds() {
-			if (p.preferINLJ & preferLeftAsOuter) > 0 {
-				lNLJInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, true)
-				if err != nil {
-					return nil, errors.Trace(err)
-				}
-				if lNLJInfo != nil {
-					info = lNLJInfo
-				}
-			}
-			if (p.preferINLJ & preferRightAsOuter) > 0 {
-				rNLJInfo, err := p.convert2IndexNestedLoopJoinRight(prop, true)
-				if err != nil {
-					return nil, errors.Trace(err)
-				}
-				if info == nil || (rNLJInfo != nil && info.cost > rNLJInfo.cost) {
-					info = rNLJInfo
-				}
-			}
-			if info != nil {
-				break
-			}
+		lNLJInfo, err := p.convert2IndexNestedLoopJoinLeft(prop, true)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		rNLJInfo, err := p.convert2IndexNestedLoopJoinRight(prop, true)
+		if err != nil {
+			return nil, errors.Trace(err)
+		}
+		if lNLJInfo != nil {
+			info = lNLJInfo
+		}
+		if info == nil || (rNLJInfo != nil && info.cost > rNLJInfo.cost) {
+			info = rNLJInfo
+		}
+		if info != nil {
+			break
 		}
 		// fall back to hash join
 		lInfo, err := p.convert2PhysicalPlanLeft(prop, true)
@@ -1314,9 +1333,10 @@ func (p *Selection) appendSelToInfo(info *physicalPlanInfo) *physicalPlanInfo {
 	np := p.Copy().(*Selection)
 	np.SetChildren(info.p)
 	return &physicalPlanInfo{
-		p:     np,
-		cost:  info.cost,
-		count: info.count * selectionFactor,
+		p:        np,
+		cost:     info.cost,
+		count:    info.count * selectionFactor,
+		reliable: info.reliable,
 	}
 }
 
diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go
index 858999ccf367c..bbaf103776635 100644
--- a/plan/physical_plan_test.go
+++ b/plan/physical_plan_test.go
@@ -21,7 +21,9 @@ import (
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/model"
 	"github.com/pingcap/tidb/mysql"
+	"github.com/pingcap/tidb/sessionctx"
 	"github.com/pingcap/tidb/sessionctx/variable"
+	"github.com/pingcap/tidb/statistics"
 	"github.com/pingcap/tidb/util/mock"
 	"github.com/pingcap/tidb/util/testleak"
 	"github.com/pingcap/tidb/util/types"
@@ -1147,3 +1149,83 @@ func (s *testPlanSuite) TestJoinAlgorithm(c *C) {
 		c.Assert(ToString(pp), Equals, tt.ans, Commentf("for %s", tt.sql))
 	}
 }
+
+func (s *testPlanSuite) TestAutoJoinChosen(c *C) {
+	defer testleak.AfterTest(c)()
+	cases := []struct {
+		sql         string
+		ans         string
+		genStatsTbl bool
+	}{
+		{
+			sql: "select * from (select * from t limit 0, 128) t1 join t t2 on t1.a = t2.a",
+			ans: "Apply{Table(t)->Limit->Table(t)->Selection}",
+		},
+		{
+			sql: "select * from (select * from t limit 0, 129) t1 join t t2 on t1.a = t2.a",
+			ans: "RightHashJoin{Table(t)->Limit->Table(t)}(t1.a,t2.a)",
+		},
+		{
+			sql: "select * from (select * from t limit 0, 10 union select * from t limit 10, 100) t1 join t t2 on t1.a = t2.a",
+			ans: "Apply{UnionAll{Table(t)->Limit->Projection->Table(t)->Limit->Projection}->HashAgg->Table(t)->Selection}",
+		},
+		{
+			sql: "select * from (select * from t limit 0, 29 union all select * from t limit 0, 100) t1 join t t2 on t1.a = t2.a",
+			ans: "RightHashJoin{UnionAll{Table(t)->Limit->Table(t)->Limit}->Table(t)}(t1.a,t2.a)",
+		},
+		{
+			sql:         "select * from t t1 join t t2 on t1.f = t2.f and t1.a < 5",
+			ans:         "Apply{Table(t)->Index(t.f)[]->Selection}",
+			genStatsTbl: true,
+		},
+		{
+			sql:         "select * from t t1 join t t2 on t1.f = t2.f and t1.a < 19",
+			ans:         "RightHashJoin{Table(t)->Table(t)}(t1.f,t2.f)",
+			genStatsTbl: true,
+		},
+	}
+	for _, ca := range cases {
+		comment := Commentf("for %s", ca.sql)
+		stmt, err := s.ParseOneStmt(ca.sql, "", "")
+		c.Assert(err, IsNil, comment)
+		ast.SetFlag(stmt)
+
+		is, err := mockResolve(stmt)
+		c.Assert(err, IsNil)
+
+		ctx := mockContext()
+
+		if ca.genStatsTbl {
+			handle := sessionctx.GetDomain(ctx).StatsHandle()
+			tb, _ := is.TableByID(0)
+			tbl := tb.Meta()
+			// generate 40 distinct values for pk.
+			pkValues := make([]types.Datum, 40)
+			for i := 0; i < 40; i++ {
+				pkValues[i] = types.NewIntDatum(int64(i))
+			}
+			// make the statistic col info for pk, every distinct value occurs 10 times.
+			pkStatsCol := &statistics.Column{Histogram: *mockStatsHistogram(1, pkValues, 10)}
+			// mock the statistic table and set the value of pk column.
+			statsTbl := mockStatsTable(tbl, 400)
+			statsTbl.Columns[1] = pkStatsCol
+			handle.UpdateTableStats([]*statistics.Table{statsTbl}, nil)
+		}
+
+		builder := &planBuilder{
+			allocator: new(idAllocator),
+			ctx:       ctx,
+			colMapper: make(map[*ast.ColumnNameExpr]int),
+			is:        is,
+		}
+		p := builder.build(stmt)
+		c.Assert(builder.err, IsNil)
+		pp, err := doOptimize(builder.optFlag, p.(LogicalPlan), builder.ctx, builder.allocator)
+		c.Assert(err, IsNil)
+		c.Assert(ToString(pp), Equals, ca.ans, Commentf("for %s", ca.sql))
+
+		if ca.genStatsTbl {
+			sessionctx.GetDomain(ctx).StatsHandle().Clear()
+		}
+	}
+}
diff --git a/plan/plan.go b/plan/plan.go
index dbafd83cead86..8a3575f0ab4f5 100644
--- a/plan/plan.go
+++ b/plan/plan.go
@@ -131,6 +131,10 @@ type physicalPlanInfo struct {
 	p     PhysicalPlan
 	cost  float64
 	count float64
+
+	// If the count is calculated by pseudo table, it's not reliable. Otherwise it's reliable.
+	// But if we has limit or maxOneRow, the count is reliable.
+	reliable bool
 }
 
 // LogicalPlan is a tree of logical operators.
diff --git a/session.go b/session.go
index 886e1ef9fe692..39d371b1b2065 100644
--- a/session.go
+++ b/session.go
@@ -981,6 +981,7 @@ const loadCommonGlobalVarsSQL = "select * from mysql.global_variables where vari
 	variable.TiDBIndexLookupSize + quoteCommaQuote +
 	variable.TiDBIndexLookupConcurrency + quoteCommaQuote +
 	variable.TiDBIndexSerialScanConcurrency + quoteCommaQuote +
+	variable.TiDBMaxRowCountForINLJ + quoteCommaQuote +
 	variable.TiDBDistSQLScanConcurrency + "')"
 
 // LoadCommonGlobalVariableIfNeeded loads and applies commonly used global variables for the session.
diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go
index 1d766358222aa..25c78ddfefab3 100644
--- a/sessionctx/variable/session.go
+++ b/sessionctx/variable/session.go
@@ -197,6 +197,9 @@ type SessionVars struct {
 
 	// Should we split insert data into multiple batches.
 	BatchInsert bool
+
+	// Max row count that the outer table of index nested loop join could be without force hint.
+	MaxRowCountForINLJ int
 }
 
 // NewSessionVars creates a session vars object.
@@ -217,6 +220,7 @@ func NewSessionVars() *SessionVars {
 		IndexLookupConcurrency:     DefIndexLookupConcurrency,
 		IndexSerialScanConcurrency: DefIndexSerialScanConcurrency,
 		DistSQLScanConcurrency:     DefDistSQLScanConcurrency,
+		MaxRowCountForINLJ:         DefMaxRowCountForINLJ,
 	}
 }
 
diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go
index 0fc1d87659476..e3ed5436b8f08 100644
--- a/sessionctx/variable/sysvar.go
+++ b/sessionctx/variable/sysvar.go
@@ -600,6 +600,7 @@ var defaultSysVars = []*SysVar{
 	{ScopeGlobal | ScopeSession, TiDBIndexLookupSize, strconv.Itoa(DefIndexLookupSize)},
 	{ScopeGlobal | ScopeSession, TiDBIndexLookupConcurrency, strconv.Itoa(DefIndexLookupConcurrency)},
 	{ScopeGlobal | ScopeSession, TiDBIndexSerialScanConcurrency, strconv.Itoa(DefIndexSerialScanConcurrency)},
+	{ScopeGlobal | ScopeSession, TiDBMaxRowCountForINLJ, strconv.Itoa(DefMaxRowCountForINLJ)},
 	{ScopeGlobal | ScopeSession, TiDBSkipDDLWait, boolToIntStr(DefSkipDDLWait)},
 	{ScopeGlobal | ScopeSession, TiDBSkipUTF8Check, boolToIntStr(DefSkipUTF8Check)},
 	{ScopeSession, TiDBBatchInsert, boolToIntStr(DefBatchInsert)},
diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go
index eeb4b80541620..5e94dc57bd260 100644
--- a/sessionctx/variable/tidb_vars.go
+++ b/sessionctx/variable/tidb_vars.go
@@ -87,6 +87,11 @@ const (
 	// tidb_batch_insert is used to enable/disable auto-split insert data. If set this option on, insert executor will automatically
 	// insert data into multiple batches and use a single txn for each batch. This will be helpful when inserting large data.
 	TiDBBatchInsert = "tidb_batch_insert"
+
+	// tidb_max_row_count_for_inlj is used when do index nested loop join.
+	// It controls the max row count of outer table when do index nested loop join without hint.
+	// After the row count of the inner table is accurate, this variable will be removed.
+	TiDBMaxRowCountForINLJ = "tidb_max_row_count_for_inlj"
 )
 
 // Default TiDB system variable values.
@@ -96,6 +101,7 @@ const (
 	DefIndexLookupSize            = 20000
 	DefDistSQLScanConcurrency     = 10
 	DefBuildStatsConcurrency      = 4
+	DefMaxRowCountForINLJ         = 128
 	DefSkipDDLWait                = false
 	DefSkipUTF8Check              = false
 	DefOptAggPushDown             = true
diff --git a/sessionctx/varsutil/varsutil.go b/sessionctx/varsutil/varsutil.go
index 5d92bc59dbc41..5c035f7f2e307 100644
--- a/sessionctx/varsutil/varsutil.go
+++ b/sessionctx/varsutil/varsutil.go
@@ -134,6 +134,8 @@ func SetSessionSystemVar(vars *variable.SessionVars, name string, value types.Da
 		vars.IndexSerialScanConcurrency = tidbOptPositiveInt(sVal, variable.DefIndexSerialScanConcurrency)
 	case variable.TiDBBatchInsert:
 		vars.BatchInsert = tidbOptOn(sVal)
+	case variable.TiDBMaxRowCountForINLJ:
+		vars.MaxRowCountForINLJ = tidbOptPositiveInt(sVal, variable.DefMaxRowCountForINLJ)
 	}
 	vars.Systems[name] = sVal
 	return nil
diff --git a/sessionctx/varsutil/varsutil_test.go b/sessionctx/varsutil/varsutil_test.go
index a40e417f60d1e..375dea7ba11c3 100644
--- a/sessionctx/varsutil/varsutil_test.go
+++ b/sessionctx/varsutil/varsutil_test.go
@@ -147,6 +147,11 @@ func (s *testVarsutilSuite) TestVarsutil(c *C) {
 	c.Assert(v.BatchInsert, IsFalse)
 	SetSessionSystemVar(v, variable.TiDBBatchInsert, types.NewStringDatum("1"))
 	c.Assert(v.BatchInsert, IsTrue)
+
+	//Test case for tidb_max_row_count_for_inlj.
+	c.Assert(v.MaxRowCountForINLJ, Equals, 128)
+	SetSessionSystemVar(v, variable.TiDBMaxRowCountForINLJ, types.NewStringDatum("127"))
+	c.Assert(v.MaxRowCountForINLJ, Equals, 127)
 }
 
 type mockGlobalAccessor struct {
diff --git a/statistics/builder.go b/statistics/builder.go
index cc13e05583afc..d2178d1b742fe 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -35,7 +35,7 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R
 	hg := &Histogram{
 		ID:      id,
 		NDV:     0,
-		Buckets: make([]bucket, 1, numBuckets),
+		Buckets: make([]Bucket, 1, numBuckets),
 	}
 	var valuesPerBucket, lastNumber, bucketIdx int64 = 1, 0, 0
 	count := int64(0)
@@ -95,7 +95,7 @@ func build4SortedColumn(ctx context.Context, numBuckets, id int64, records ast.R
 			} else {
 				lastNumber = hg.Buckets[bucketIdx].Count
 				bucketIdx++
-				hg.Buckets = append(hg.Buckets, bucket{
+				hg.Buckets = append(hg.Buckets, Bucket{
 					Count:   lastNumber + 1,
 					Value:   data,
 					Repeats: 1,
@@ -123,7 +123,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
 	hg := &Histogram{
 		ID:      id,
 		NDV:     ndv,
-		Buckets: make([]bucket, 1, numBuckets),
+		Buckets: make([]Bucket, 1, numBuckets),
 	}
 	valuesPerBucket := float64(count)/float64(numBuckets) + 1
 
@@ -160,7 +160,7 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, ndv int64, count int
 			lastCount = hg.Buckets[bucketIdx].Count
 			// The bucket is full, store the item in the next bucket.
 			bucketIdx++
-			hg.Buckets = append(hg.Buckets, bucket{
+			hg.Buckets = append(hg.Buckets, Bucket{
 				Count:   int64(totalCount),
 				Value:   samples[i],
 				Repeats: int64(ndvFactor),
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 907d0fe3b383e..c33f399b9c9c4 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -35,10 +35,10 @@ type Histogram struct {
 	// LastUpdateVersion is the version that this histogram updated last time.
 	LastUpdateVersion uint64
 
-	Buckets []bucket
+	Buckets []Bucket
 }
 
-// bucket is an element of histogram.
+// Bucket is an element of histogram.
 //
 // A bucket count is the number of items stored in all previous buckets and the current bucket.
 // bucket numbers are always in increasing order.
@@ -47,7 +47,7 @@ type Histogram struct {
 //
 // Repeat is the number of repeats of the bucket value, it can be used to find popular values.
 //
-type bucket struct {
+type Bucket struct {
 	Count   int64
 	Value   types.Datum
 	Repeats int64
@@ -109,7 +109,7 @@ func (h *Handle) histogramFromStorage(tableID int64, colID int64, tp *types.Fiel
 		ID:                colID,
 		NDV:               distinct,
 		LastUpdateVersion: ver,
-		Buckets:           make([]bucket, bucketSize),
+		Buckets:           make([]Bucket, bucketSize),
 	}
 	for i := 0; i < bucketSize; i++ {
 		bucketID := rows[i].Data[0].GetInt64()
@@ -124,7 +124,7 @@ func (h *Handle) histogramFromStorage(tableID int64, colID int64, tp *types.Fiel
 				return nil, errors.Trace(err)
 			}
 		}
-		hg.Buckets[bucketID] = bucket{
+		hg.Buckets[bucketID] = Bucket{
 			Count:   count,
 			Value:   value,
 			Repeats: repeats,
@@ -254,7 +254,7 @@ func (hg *Histogram) lowerBound(sc *variable.StatementContext, target types.Datu
 func (hg *Histogram) mergeBuckets(bucketIdx int64) {
 	curBuck := 0
 	for i := int64(0); i+1 <= bucketIdx; i += 2 {
-		hg.Buckets[curBuck] = bucket{
+		hg.Buckets[curBuck] = Bucket{
 			Count:   hg.Buckets[i+1].Count,
 			Value:   hg.Buckets[i+1].Value,
 			Repeats: hg.Buckets[i+1].Repeats,
diff --git a/statistics/statscache.go b/statistics/statscache.go
index 4d2ed9401ceac..0151138500b09 100644
--- a/statistics/statscache.go
+++ b/statistics/statscache.go
@@ -98,7 +98,7 @@ func (h *Handle) Update(is infoschema.InfoSchema) error {
 		tables = append(tables, tbl)
 		h.LastVersion = version
 	}
-	h.updateTableStats(tables, deletedTableIDs)
+	h.UpdateTableStats(tables, deletedTableIDs)
 	return nil
 }
 
@@ -120,11 +120,11 @@ func (h *Handle) copyFromOldCache() statsCache {
 	return newCache
 }
 
-// updateTableStats updates the statistics table cache using copy on write.
-func (h *Handle) updateTableStats(tables []*Table, deletedIDs []int64) {
+// UpdateTableStats updates the statistics table cache using copy on write.
+func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64) {
 	newCache := h.copyFromOldCache()
 	for _, tbl := range tables {
-		id := tbl.tableID
+		id := tbl.TableID
 		newCache[id] = tbl
 	}
 	for _, id := range deletedIDs {
diff --git a/statistics/table.go b/statistics/table.go
index 03cdd1c9c5463..254fec305ea0e 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -41,7 +41,7 @@ const (
 
 // Table represents statistics for a table.
 type Table struct {
-	tableID int64
+	TableID int64
 	Columns map[int64]*Column
 	Indices map[int64]*Index
 	Count   int64 // Total row count in a table.
@@ -50,7 +50,7 @@ type Table struct {
 
 func (t *Table) copy() *Table {
 	nt := &Table{
-		tableID: t.tableID,
+		TableID: t.TableID,
 		Count:   t.Count,
 		Pseudo:  t.Pseudo,
 		Columns: make(map[int64]*Column),
@@ -77,7 +77,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, count int64)
 		// We copy it before writing to avoid race.
 		table = table.copy()
 	}
-	table.tableID = tableInfo.ID
+	table.TableID = tableInfo.ID
 	table.Count = count
 
 	selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version from mysql.stats_histograms where table_id = %d", tableInfo.ID)
@@ -144,7 +144,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, count int64)
 // String implements Stringer interface.
 func (t *Table) String() string {
 	strs := make([]string, 0, len(t.Columns)+1)
-	strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.tableID, t.Count))
+	strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.TableID, t.Count))
 	for _, col := range t.Columns {
 		strs = append(strs, col.String())
 	}
@@ -215,7 +215,7 @@ func (t *Table) GetRowCountByIndexRanges(sc *variable.StatementContext, idxID in
 
 // PseudoTable creates a pseudo table statistics when statistic can not be found in KV store.
 func PseudoTable(tableID int64) *Table {
-	t := &Table{tableID: tableID, Pseudo: true}
+	t := &Table{TableID: tableID, Pseudo: true}
 	t.Count = pseudoRowCount
 	t.Columns = make(map[int64]*Column)
 	t.Indices = make(map[int64]*Index)