Skip to content

Commit

Permalink
statisitcs: estimate range result with increase factor. (pingcap#3432)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanfei1991 authored and XuHuaiyu committed Jun 9, 2017
1 parent ddc2ffc commit 1ac2be8
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 19 deletions.
6 changes: 2 additions & 4 deletions statistics/ddl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
statsTbl := do.StatsHandle().GetTableStats(tableInfo.ID)
c.Assert(statsTbl.Pseudo, IsFalse)
sc := new(variable.StatementContext)
count, err := statsTbl.ColumnLessRowCount(sc, types.NewIntDatum(0), tableInfo.Columns[2])
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(0))
c.Assert(statsTbl.ColumnIsInvalid(tableInfo.Columns[2]), IsTrue)

testKit.MustExec("alter table t add column c3 int NOT NULL")
err = h.HandleDDLEvent(<-h.DDLEventCh())
Expand All @@ -142,7 +140,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
statsTbl = do.StatsHandle().GetTableStats(tableInfo.ID)
c.Assert(statsTbl.Pseudo, IsFalse)
sc = new(variable.StatementContext)
count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(0), tableInfo.Columns[3])
count, err := statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(0), tableInfo.Columns[3])
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(2))
count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3])
Expand Down
12 changes: 11 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (hg *Histogram) SaveToStorage(ctx context.Context, tableID int64, count int
if err != nil {
return errors.Trace(err)
}
replaceSQL = fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, null_count) values (%d, %d, %d, %d, %d)", tableID, isIndex, hg.ID, hg.NDV, hg.NullCount)
replaceSQL = fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count) values (%d, %d, %d, %d, %d, %d)", tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount)
_, err = exec.Execute(replaceSQL)
if err != nil {
return errors.Trace(err)
Expand Down Expand Up @@ -323,6 +323,16 @@ func (hg *Histogram) mergeBuckets(bucketIdx int64) {
return
}

// getIncreaseFactor will return a factor of data increasing after the last analysis.
func (hg *Histogram) getIncreaseFactor(totalCount int64) float64 {
columnCount := hg.Buckets[len(hg.Buckets)-1].Count + hg.NullCount
if columnCount == 0 {
// avoid dividing by 0
return 1.0
}
return float64(totalCount) / float64(columnCount)
}

// Column represents a column histogram.
type Column struct {
Histogram
Expand Down
40 changes: 27 additions & 13 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,45 +155,57 @@ func (t *Table) String() string {
return strings.Join(strs, "\n")
}

// columnIsInvalid checks if this column is invalid.
func (t *Table) columnIsInvalid(colInfo *model.ColumnInfo) bool {
// ColumnIsInvalid checks if this column is invalid.
func (t *Table) ColumnIsInvalid(colInfo *model.ColumnInfo) bool {
if t.Pseudo {
return true
}
_, ok := t.Columns[colInfo.ID]
return !ok
col, ok := t.Columns[colInfo.ID]
return !ok || len(col.Buckets) == 0
}

// ColumnGreaterRowCount estimates the row count where the column greater than value.
func (t *Table) ColumnGreaterRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error) {
if t.columnIsInvalid(colInfo) {
if t.ColumnIsInvalid(colInfo) {
return float64(t.Count) / pseudoLessRate, nil
}
return t.Columns[colInfo.ID].greaterRowCount(sc, value)
hist := t.Columns[colInfo.ID]
result, err := hist.greaterRowCount(sc, value)
result *= hist.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}

// ColumnLessRowCount estimates the row count where the column less than value.
func (t *Table) ColumnLessRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error) {
if t.columnIsInvalid(colInfo) {
if t.ColumnIsInvalid(colInfo) {
return float64(t.Count) / pseudoLessRate, nil
}
return t.Columns[colInfo.ID].lessRowCount(sc, value)
hist := t.Columns[colInfo.ID]
result, err := hist.lessRowCount(sc, value)
result *= hist.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}

// ColumnBetweenRowCount estimates the row count where column greater or equal to a and less than b.
func (t *Table) ColumnBetweenRowCount(sc *variable.StatementContext, a, b types.Datum, colInfo *model.ColumnInfo) (float64, error) {
if t.columnIsInvalid(colInfo) {
if t.ColumnIsInvalid(colInfo) {
return float64(t.Count) / pseudoBetweenRate, nil
}
return t.Columns[colInfo.ID].betweenRowCount(sc, a, b)
hist := t.Columns[colInfo.ID]
result, err := hist.betweenRowCount(sc, a, b)
result *= hist.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}

// ColumnEqualRowCount estimates the row count where the column equals to value.
func (t *Table) ColumnEqualRowCount(sc *variable.StatementContext, value types.Datum, colInfo *model.ColumnInfo) (float64, error) {
if t.columnIsInvalid(colInfo) {
if t.ColumnIsInvalid(colInfo) {
return float64(t.Count) / pseudoEqualRate, nil
}
return t.Columns[colInfo.ID].equalRowCount(sc, value)
hist := t.Columns[colInfo.ID]
result, err := hist.equalRowCount(sc, value)
result *= hist.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
Expand All @@ -211,7 +223,9 @@ func (t *Table) GetRowCountByIndexRanges(sc *variable.StatementContext, idxID in
if t.Pseudo || idx == nil || len(idx.Buckets) == 0 {
return getPseudoRowCountByIndexRanges(sc, indexRanges, inAndEQCnt, float64(t.Count))
}
return idx.getRowCount(sc, indexRanges, inAndEQCnt)
result, err := idx.getRowCount(sc, indexRanges, inAndEQCnt)
result *= idx.getIncreaseFactor(t.Count)
return result, errors.Trace(err)
}

// PseudoTable creates a pseudo table statistics when statistic can not be found in KV store.
Expand Down
9 changes: 8 additions & 1 deletion statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
. "github.com/pingcap/check"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/util/testkit"
"github.com/pingcap/tidb/util/types"
)

var _ = Suite(&testStatsUpdateSuite{})
Expand Down Expand Up @@ -61,6 +62,7 @@ func (s *testStatsUpdateSuite) TestSingleSessionInsert(c *C) {
stats2 := h.GetTableStats(tableInfo2.ID)
c.Assert(stats2.Count, Equals, int64(rowCount2))

testKit.MustExec("analyze table t1")
// Test update in a txn.
for i := 0; i < rowCount1; i++ {
testKit.MustExec("insert into t1 values(1, 2)")
Expand All @@ -70,6 +72,11 @@ func (s *testStatsUpdateSuite) TestSingleSessionInsert(c *C) {
stats1 = h.GetTableStats(tableInfo1.ID)
c.Assert(stats1.Count, Equals, int64(rowCount1*2))

// Test IncreaseFactor.
count, err := stats1.ColumnEqualRowCount(testKit.Se.GetSessionVars().StmtCtx, types.NewIntDatum(1), tableInfo1.Columns[0])
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(rowCount1*2))

testKit.MustExec("begin")
for i := 0; i < rowCount1; i++ {
testKit.MustExec("insert into t1 values(1, 2)")
Expand Down Expand Up @@ -99,7 +106,7 @@ func (s *testStatsUpdateSuite) TestSingleSessionInsert(c *C) {
c.Assert(stats2.Count, Equals, int64(rowCount2))

rs := testKit.MustQuery("select modify_count from mysql.stats_meta")
rs.Check(testkit.Rows("50", "40"))
rs.Check(testkit.Rows("40", "40"))
}

func (s *testStatsUpdateSuite) TestMultiSession(c *C) {
Expand Down

0 comments on commit 1ac2be8

Please sign in to comment.