Skip to content

Commit

Permalink
plan, statistics: extract histColl struct (pingcap#6860)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored and zz-jason committed Jul 13, 2018
1 parent cf8a412 commit 34d8fd8
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 102 deletions.
7 changes: 4 additions & 3 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1790,19 +1790,20 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
schema := expression.NewSchema(make([]*expression.Column, 0, len(columns))...)
for i, col := range columns {
ds.Columns = append(ds.Columns, col.ToInfo())
schema.Append(&expression.Column{
newCol := &expression.Column{
FromID: ds.id,
Position: i,
DBName: dbName,
TblName: tableInfo.Name,
ColName: col.Name,
ID: col.ID,
RetType: &col.FieldType,
})
}

if tableInfo.PKIsHandle && mysql.HasPriKeyFlag(col.Flag) {
handleCol = schema.Columns[i]
handleCol = newCol
}
schema.Append(newCol)
}
ds.SetSchema(schema)

Expand Down
10 changes: 7 additions & 3 deletions statistics/boostrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@ func initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chun
continue
}
tableInfo := table.Meta()
tbl := &Table{
TableID: tableID,
newHistColl := HistColl{
TableID: tableInfo.ID,
HaveTblID: true,
Count: row.GetInt64(3),
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64, len(tableInfo.Columns)),
colName2ID: make(map[string]int64, len(tableInfo.Columns)),
Count: row.GetInt64(3),
}
tbl := &Table{
HistColl: newHistColl,
ModifyCount: row.GetInt64(2),
Version: row.GetUint64(0),
}
Expand Down
12 changes: 8 additions & 4 deletions statistics/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,15 @@ func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable)

// LoadStatsFromJSONToTable load statistic from JSONTable and return the Table of statistic.
func (h *Handle) LoadStatsFromJSONToTable(tableInfo *model.TableInfo, jsonTbl *JSONTable) (*Table, error) {
newHistColl := HistColl{
TableID: tableInfo.ID,
HaveTblID: true,
Count: jsonTbl.Count,
Columns: make(map[int64]*Column, len(jsonTbl.Columns)),
Indices: make(map[int64]*Index, len(jsonTbl.Indices)),
}
tbl := &Table{
TableID: tableInfo.ID,
Columns: make(map[int64]*Column, len(jsonTbl.Columns)),
Indices: make(map[int64]*Index, len(jsonTbl.Indices)),
Count: jsonTbl.Count,
HistColl: newHistColl,
ModifyCount: jsonTbl.ModifyCount,
}
for id, jsonIdx := range jsonTbl.Indices {
Expand Down
4 changes: 2 additions & 2 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,10 +684,10 @@ func (q *QueryFeedback) recalculateExpectCount(h *Handle) error {
}
isIndex := q.hist.tp.Tp == mysql.TypeBlob
id := q.hist.ID
if isIndex && (t.Indices[id] == nil || t.Indices[id].IsPseudo() == false) {
if isIndex && (t.Indices[id] == nil || t.Indices[id].NotAccurate() == false) {
return nil
}
if !isIndex && (t.Columns[id] == nil || t.Columns[id].IsPseudo() == false) {
if !isIndex && (t.Columns[id] == nil || t.Columns[id].NotAccurate() == false) {
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,9 +676,9 @@ type ErrorRate struct {
// then the column is not pseudo.
const MaxErrorRate = 0.25

// IsPseudo is true when the total of query is zero or the average error
// NotAccurate is true when the total of query is zero or the average error
// rate is greater than MaxErrorRate.
func (e *ErrorRate) IsPseudo() bool {
func (e *ErrorRate) NotAccurate() bool {
if e.QueryTotal == 0 {
return true
}
Expand Down
49 changes: 22 additions & 27 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ func getConstantColumnID(e []expression.Expression) int64 {
return unknownColumnID
}

func pseudoSelectivity(t *Table, exprs []expression.Expression) float64 {
func pseudoSelectivity(coll *HistColl, exprs []expression.Expression) float64 {
minFactor := selectionFactor
uniqueCol := make(map[string]bool)
colExists := make(map[string]bool)
for _, expr := range exprs {
fun, ok := expr.(*expression.ScalarFunction)
if !ok {
Expand All @@ -80,41 +80,36 @@ func pseudoSelectivity(t *Table, exprs []expression.Expression) float64 {
switch fun.FuncName.L {
case ast.EQ, ast.NullEQ, ast.In:
minFactor = math.Min(minFactor, 1.0/pseudoEqualRate)
col, ok := t.Columns[colID]
col, ok := coll.Columns[colID]
if !ok {
continue
}
colExists[col.Info.Name.L] = true
if mysql.HasUniKeyFlag(col.Info.Flag) {
uniqueCol[col.Info.Name.L] = true
}
if mysql.HasPriKeyFlag(col.Info.Flag) {
if t.PKIsHandle {
return 1.0 / float64(t.Count)
}
uniqueCol[col.Info.Name.L] = true
return 1.0 / float64(coll.Count)
}
case ast.GE, ast.GT, ast.LE, ast.LT:
minFactor = math.Min(minFactor, 1.0/pseudoLessRate)
// FIXME: To resolve the between case.
}
}
if len(uniqueCol) == 0 {
if len(colExists) == 0 {
return minFactor
}
// use the unique key info
for _, idx := range t.Indices {
for _, idx := range coll.Indices {
if !idx.Info.Unique {
continue
}
unique := true
for _, col := range idx.Info.Columns {
if !uniqueCol[col.Name.L] {
if !colExists[col.Name.L] {
unique = false
break
}
}
if unique {
return 1.0 / float64(t.Count)
return 1.0 / float64(coll.Count)
}
}
return minFactor
Expand Down Expand Up @@ -145,15 +140,15 @@ func isColEqCorCol(filter expression.Expression) *expression.Column {
// And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this.
// TODO: support expressions that the top layer is a DNF.
// Currently the time complexity is o(n^2).
func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error) {
func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, error) {
// If table's count is zero or conditions are empty, we should return 100% selectivity.
if t.Count == 0 || len(exprs) == 0 {
if coll.Count == 0 || len(exprs) == 0 {
return 1, nil
}
// TODO: If len(exprs) is bigger than 63, we could use bitset structure to replace the int64.
// This will simplify some code and speed up if we use this rather than a boolean slice.
if len(exprs) > 63 || (len(t.Columns) == 0 && len(t.Indices) == 0) {
return pseudoSelectivity(t, exprs), nil
if len(exprs) > 63 || (len(coll.Columns) == 0 && len(coll.Indices) == 0) {
return pseudoSelectivity(coll, exprs), nil
}
ret := 1.0
var sets []*exprSet
Expand All @@ -163,8 +158,8 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio

// Deal with the correlated column.
for _, expr := range exprs {
if c := isColEqCorCol(expr); c != nil && !t.ColumnIsInvalid(sc, c.ID) {
colHist := t.Columns[c.ID]
if c := isColEqCorCol(expr); c != nil && !coll.ColumnIsInvalid(sc, c.ID) {
colHist := coll.Columns[c.ID]
if colHist.NDV > 0 {
ret *= 1 / float64(colHist.NDV)
}
Expand All @@ -173,9 +168,9 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
}
}

extractedCols := make([]*expression.Column, 0, len(t.Columns))
extractedCols := make([]*expression.Column, 0, len(coll.Columns))
extractedCols = expression.ExtractColumnsFromExpressions(extractedCols, remainedExprs, nil)
for _, colInfo := range t.Columns {
for _, colInfo := range coll.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
if col != nil {
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
Expand All @@ -188,7 +183,7 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
}
}
}
for _, idxInfo := range t.Indices {
for _, idxInfo := range coll.Indices {
idxCols, lengths := expression.IndexInfo2Cols(extractedCols, idxInfo.Info)
if len(idxCols) > 0 {
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
Expand All @@ -209,16 +204,16 @@ func (t *Table) Selectivity(ctx sessionctx.Context, exprs []expression.Expressio
)
switch set.tp {
case pkType:
rowCount, err = t.GetRowCountByIntColumnRanges(sc, set.ID, set.ranges)
rowCount, err = coll.GetRowCountByIntColumnRanges(sc, set.ID, set.ranges)
case colType:
rowCount, err = t.GetRowCountByColumnRanges(sc, set.ID, set.ranges)
rowCount, err = coll.GetRowCountByColumnRanges(sc, set.ID, set.ranges)
case indexType:
rowCount, err = t.GetRowCountByIndexRanges(sc, set.ID, set.ranges)
rowCount, err = coll.GetRowCountByIndexRanges(sc, set.ID, set.ranges)
}
if err != nil {
return 0, errors.Trace(err)
}
ret *= rowCount / float64(t.Count)
ret *= rowCount / float64(coll.Count)
}
// If there's still conditions which cannot be calculated, we will multiply a selectionFactor.
if mask > 0 {
Expand Down
12 changes: 8 additions & 4 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,15 @@ func mockStatsHistogram(id int64, values []types.Datum, repeat int64, tp *types.
}

func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table {
histColl := statistics.HistColl{
TableID: tbl.ID,
HaveTblID: true,
Count: rowCount,
Columns: make(map[int64]*statistics.Column, len(tbl.Columns)),
Indices: make(map[int64]*statistics.Index, len(tbl.Indices)),
}
statsTbl := &statistics.Table{
TableID: tbl.ID,
Count: rowCount,
Columns: make(map[int64]*statistics.Column, len(tbl.Columns)),
Indices: make(map[int64]*statistics.Index, len(tbl.Indices)),
HistColl: histColl,
}
return statsTbl
}
Expand Down
18 changes: 12 additions & 6 deletions statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,8 +455,10 @@ func (s *testStatisticsSuite) TestColumnRange(c *C) {
c.Check(err, IsNil)
col := &Column{Histogram: *hg, CMSketch: buildCMSketch(s.rc.(*recordSet).data), Info: &model.ColumnInfo{}}
tbl := &Table{
Count: int64(col.totalRowCount()),
Columns: make(map[int64]*Column),
HistColl: HistColl{
Count: int64(col.totalRowCount()),
Columns: make(map[int64]*Column),
},
}
ran := []*ranger.Range{{
LowVal: []types.Datum{{}},
Expand Down Expand Up @@ -522,8 +524,10 @@ func (s *testStatisticsSuite) TestIntColumnRanges(c *C) {
c.Check(rowCount, Equals, int64(100000))
col := &Column{Histogram: *hg, Info: &model.ColumnInfo{}}
tbl := &Table{
Count: int64(col.totalRowCount()),
Columns: make(map[int64]*Column),
HistColl: HistColl{
Count: int64(col.totalRowCount()),
Columns: make(map[int64]*Column),
},
}
ran := []*ranger.Range{{
LowVal: []types.Datum{types.NewIntDatum(math.MinInt64)},
Expand Down Expand Up @@ -612,8 +616,10 @@ func (s *testStatisticsSuite) TestIndexRanges(c *C) {
idxInfo := &model.IndexInfo{Columns: []*model.IndexColumn{{Offset: 0}}}
idx := &Index{Histogram: *hg, CMSketch: cms, Info: idxInfo}
tbl := &Table{
Count: int64(idx.totalRowCount()),
Indices: make(map[int64]*Index),
HistColl: HistColl{
Count: int64(idx.totalRowCount()),
Indices: make(map[int64]*Index),
},
}
ran := []*ranger.Range{{
LowVal: []types.Datum{types.MinNotNullDatum()},
Expand Down
Loading

0 comments on commit 34d8fd8

Please sign in to comment.