Skip to content

Commit

Permalink
statistics, ranger: add method to calculate the range and row count o…
Browse files Browse the repository at this point in the history
…f non pk column (pingcap#3234)
  • Loading branch information
winoros authored and shenli committed Jun 13, 2017
1 parent 439a47c commit 1ccc71a
Show file tree
Hide file tree
Showing 11 changed files with 351 additions and 15 deletions.
2 changes: 1 addition & 1 deletion executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ func (e *SelectionExec) initController() error {

switch x := e.children[0].(type) {
case *XSelectTableExec:
accessCondition, restCondtion := ranger.DetachTableScanConditions(newConds, x.tableInfo.GetPkName())
accessCondition, restCondtion := ranger.DetachColumnConditions(newConds, x.tableInfo.GetPkName())
x.where, _, _ = expression.ExpressionsToPB(sc, restCondtion, client)
ranges, err := ranger.BuildTableRange(accessCondition, sc)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion plan/decorrelate.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func (p *Selection) checkScanController() int {
}
}
if pkCol != nil {
access, _ := ranger.DetachTableScanConditions(corColConds, pkCol.ColName)
access, _ := ranger.DetachColumnConditions(corColConds, pkCol.ColName)
for _, cond := range access {
if sf, ok := cond.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.EQ {
return controlTableScan
Expand Down
2 changes: 1 addition & 1 deletion plan/new_physical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ func (p *DataSource) convertToTableScan(prop *requiredProp) (task task, err erro
for _, cond := range p.pushedDownConds {
conds = append(conds, cond.Clone())
}
ts.AccessCondition, ts.filterCondition = ranger.DetachTableScanConditions(conds, p.tableInfo.GetPkName())
ts.AccessCondition, ts.filterCondition = ranger.DetachColumnConditions(conds, p.tableInfo.GetPkName())
ts.Ranges, err = ranger.BuildTableRange(ts.AccessCondition, sc)
if err != nil {
return nil, errors.Trace(err)
Expand Down
2 changes: 1 addition & 1 deletion plan/physical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (p *DataSource) convert2TableScan(prop *requiredProperty) (*physicalPlanInf
for _, cond := range sel.Conditions {
conds = append(conds, cond.Clone())
}
ts.AccessCondition, newSel.Conditions = ranger.DetachTableScanConditions(conds, table.GetPkName())
ts.AccessCondition, newSel.Conditions = ranger.DetachColumnConditions(conds, table.GetPkName())
ts.TableConditionPBExpr, ts.tableFilterConditions, newSel.Conditions =
expression.ExpressionsToPB(sc, newSel.Conditions, client)
ranges, err := ranger.BuildTableRange(ts.AccessCondition, sc)
Expand Down
50 changes: 49 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges [
if rg.LowVal == rg.HighVal {
cnt, err = c.equalRowCount(sc, types.NewIntDatum(rg.LowVal))
} else {
cnt, err = c.betweenRowCount(sc, types.NewIntDatum(rg.LowVal), types.NewIntDatum(rg.HighVal))
cnt, err = c.betweenRowCount(sc, types.NewIntDatum(rg.LowVal), types.NewIntDatum(rg.HighVal+1))
}
}
if err != nil {
Expand All @@ -376,6 +376,54 @@ func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges [
return rowCount, nil
}

// getColumnRowCount estimates the row count by a slice of ColumnRange.
func (c *Column) getColumnRowCount(sc *variable.StatementContext, ranges ...types.ColumnRange) (float64, error) {
var rowCount float64
for _, rg := range ranges {
cmp, err := rg.Low.CompareDatum(sc, rg.High)
if err != nil {
return 0, errors.Trace(err)
}
if cmp == 0 {
// the point case.
if !rg.LowExcl && !rg.HighExcl {
cnt, err := c.equalRowCount(sc, rg.Low)
if err != nil {
return 0, errors.Trace(err)
}
rowCount += cnt
}
continue
}
// the interval case.
cnt, err := c.betweenRowCount(sc, rg.Low, rg.High)
if err != nil {
return 0, errors.Trace(err)
}
if rg.LowExcl {
lowCnt, err := c.equalRowCount(sc, rg.Low)
if err != nil {
return 0, errors.Trace(err)
}
cnt -= lowCnt
}
if !rg.HighExcl {
highCnt, err := c.equalRowCount(sc, rg.High)
if err != nil {
return 0, errors.Trace(err)
}
cnt += highCnt
}
rowCount += cnt
}
if rowCount > c.totalRowCount() {
rowCount = c.totalRowCount()
} else if rowCount < 0 {
rowCount = 0
}
return rowCount, nil
}

// Index represents an index histogram.
type Index struct {
Histogram
Expand Down
33 changes: 33 additions & 0 deletions statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
count, err = col.lessAndEqRowCount(sc, types.NewIntDatum(99999))
c.Check(err, IsNil)
c.Check(int(count), Equals, 100000)
count, err = col.lessAndEqRowCount(sc, types.Datum{})
c.Check(err, IsNil)
c.Check(int(count), Equals, 256)
count, err = col.greaterRowCount(sc, types.NewIntDatum(1001))
c.Check(err, IsNil)
c.Check(int(count), Equals, 99231)
Expand Down Expand Up @@ -212,3 +215,33 @@ func (s *testStatisticsSuite) TestPseudoTable(c *C) {
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 250000)
}

func (s *testStatisticsSuite) TestColumnRange(c *C) {
bucketCount := int64(256)
_, ndv, _ := buildFMSketch(s.rc.(*recordSet).data, 1000)
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx

hg, err := BuildColumn(ctx, bucketCount, 5, ndv, s.count, 0, s.samples)
c.Check(err, IsNil)
col := &Column{Histogram: *hg}
ran := types.ColumnRange{
Low: types.Datum{},
High: types.Datum{},
}
count, err := col.getColumnRowCount(sc, ran)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 10000)
ran.Low = types.NewIntDatum(1000)
ran.LowExcl = true
ran.High = types.NewIntDatum(2000)
ran.HighExcl = true
count, err = col.getColumnRowCount(sc, ran)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 9964)
ran.LowExcl = false
ran.HighExcl = false
count, err = col.getColumnRowCount(sc, ran)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 9965)
}
24 changes: 24 additions & 0 deletions util/ranger/range.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,27 @@ func (r *Builder) BuildTableRanges(rangePoints []point) []types.IntColumnRange {
}
return tableRanges
}

func (r *Builder) buildColumnRanges(points []point, tp *types.FieldType) []*types.ColumnRange {
columnRanges := make([]*types.ColumnRange, 0, len(points)/2)
for i := 0; i < len(points); i += 2 {
startPoint := r.convertPoint(points[i], tp)
endPoint := r.convertPoint(points[i+1], tp)
less, err := rangePointLess(r.Sc, startPoint, endPoint)
if err != nil {
r.err = errors.Trace(err)
return nil
}
if !less {
continue
}
cr := &types.ColumnRange{
Low: startPoint.value,
LowExcl: startPoint.excl,
High: endPoint.value,
HighExcl: endPoint.excl,
}
columnRanges = append(columnRanges, cr)
}
return columnRanges
}
159 changes: 159 additions & 0 deletions util/ranger/range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ import (
"github.com/pingcap/tidb/context"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/parser"
"github.com/pingcap/tidb/plan"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/testkit"
"github.com/pingcap/tidb/util/testleak"
"github.com/pingcap/tidb/util/types"
)

func TestT(t *testing.T) {
Expand Down Expand Up @@ -304,3 +306,160 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}

func (s *testRangerSuite) TestColumnRange(c *C) {
defer testleak.AfterTest(c)()
store, err := newStoreWithBootstrap()
defer store.Close()
c.Assert(err, IsNil)
testKit := testkit.NewTestKit(c, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int)")

tests := []struct {
exprStr string
resultStr string
}{
{
exprStr: "a = 1 and b > 1",
resultStr: "[[1,1]]",
},
{
exprStr: "b > 1",
resultStr: "[[<nil>,+inf]]",
},
{
exprStr: "1 = a",
resultStr: "[[1,1]]",
},
{
exprStr: "a != 1",
resultStr: "[[-inf,1) (1,+inf]]",
},
{
exprStr: "1 != a",
resultStr: "[[-inf,1) (1,+inf]]",
},
{
exprStr: "a > 1",
resultStr: "[(1,+inf]]",
},
{
exprStr: "1 < a",
resultStr: "[(1,+inf]]",
},
{
exprStr: "a >= 1",
resultStr: "[[1,+inf]]",
},
{
exprStr: "1 <= a",
resultStr: "[[1,+inf]]",
},
{
exprStr: "a < 1",
resultStr: "[[-inf,1)]",
},
{
exprStr: "1 > a",
resultStr: "[[-inf,1)]",
},
{
exprStr: "a <= 1",
resultStr: "[[-inf,1]]",
},
{
exprStr: "1 >= a",
resultStr: "[[-inf,1]]",
},
{
exprStr: "(a)",
resultStr: "[[-inf,0) (0,+inf]]",
},
{
exprStr: "a in (1, 3, NULL, 2)",
resultStr: "[[<nil>,<nil>] [1,1] [2,2] [3,3]]",
},
{
exprStr: `a IN (8,8,81,45)`,
resultStr: `[[8,8] [45,45] [81,81]]`,
},
{
exprStr: "a between 1 and 2",
resultStr: "[[1,2]]",
},
{
exprStr: "a not between 1 and 2",
resultStr: "[[-inf,1) (2,+inf]]",
},
{
exprStr: "a not between null and 0",
resultStr: "[(0,+inf]]",
},
{
exprStr: "a between 2 and 1",
resultStr: "[]",
},
{
exprStr: "a not between 2 and 1",
resultStr: "[[-inf,+inf]]",
},
{
exprStr: "a IS NULL",
resultStr: "[[<nil>,<nil>]]",
},
{
exprStr: "a IS NOT NULL",
resultStr: "[[-inf,+inf]]",
},
{
exprStr: "a IS TRUE",
resultStr: "[[-inf,0) (0,+inf]]",
},
{
exprStr: "a IS NOT TRUE",
resultStr: "[[<nil>,<nil>] [0,0]]",
},
{
exprStr: "a IS FALSE",
resultStr: "[[0,0]]",
},
{
exprStr: "a IS NOT FALSE",
resultStr: "[[<nil>,0) (0,+inf]]",
},
}

for _, tt := range tests {
sql := "select * from t where " + tt.exprStr
ctx := testKit.Se.(context.Context)
stmts, err := tidb.Parse(ctx, sql)
c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt.exprStr))
c.Assert(stmts, HasLen, 1)
is := sessionctx.GetDomain(ctx).InfoSchema()
err = plan.ResolveName(stmts[0], is, ctx)

p, err := plan.BuildLogicalPlan(ctx, stmts[0], is)
c.Assert(err, IsNil, Commentf("error %v, for build plan, expr %s", err, tt.exprStr))
var sel *plan.Selection
for _, child := range p.Children() {
plan, ok := child.(*plan.Selection)
if ok {
sel = plan
break
}
}
c.Assert(sel, NotNil, Commentf("expr:%v", tt.exprStr))
ds, ok := sel.Children()[0].(*plan.DataSource)
c.Assert(ok, IsTrue, Commentf("expr:%v", tt.exprStr))
conds := make([]expression.Expression, 0, len(sel.Conditions))
for _, cond := range sel.Conditions {
conds = append(conds, expression.PushDownNot(cond, false, ctx))
}
result, _, _, err := ranger.BuildColumnRange(conds, ds.TableInfo().Columns[0].Name, new(variable.StatementContext), types.NewFieldType(mysql.TypeLonglong))
c.Assert(err, IsNil)
got := fmt.Sprintf("%s", result)
c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s", tt.exprStr))
}
}
Loading

0 comments on commit 1ccc71a

Please sign in to comment.