Skip to content

Commit

Permalink
stats: log detailed stats info for query feedback (pingcap#7293)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and zz-jason committed Aug 15, 2018
1 parent 9fc67b9 commit 29addca
Show file tree
Hide file tree
Showing 10 changed files with 351 additions and 59 deletions.
14 changes: 14 additions & 0 deletions statistics/boostrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package statistics

import (
"fmt"

"github.com/juju/errors"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/model"
Expand Down Expand Up @@ -49,6 +51,7 @@ func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache
tbl := &Table{
HistColl: newHistColl,
Version: row.GetUint64(0),
name: getFullTableName(is, tableInfo),
}
tables[physicalID] = tbl
}
Expand Down Expand Up @@ -257,3 +260,14 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) error {
h.statsCache.Store(tables)
return nil
}

func getFullTableName(is infoschema.InfoSchema, tblInfo *model.TableInfo) string {
for _, schema := range is.AllSchemas() {
if t, err := is.TableByName(schema.Name, tblInfo.Name); err == nil {
if t.Meta().ID == tblInfo.ID {
return schema.Name.O + "." + tblInfo.Name.O
}
}
}
return fmt.Sprintf("%d", tblInfo.ID)
}
137 changes: 135 additions & 2 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package statistics
import (
"bytes"
"encoding/gob"
"fmt"
"math"
"math/rand"
"sort"
Expand Down Expand Up @@ -102,11 +103,11 @@ func (q *QueryFeedback) DecodeToRanges(isIndex bool) ([]*ranger.Range, error) {
if isIndex {
var err error
// As we do not know the origin length, just use a custom value here.
lowVal, err = codec.Decode(low.GetBytes(), 4)
lowVal, err = codec.DecodeRange(low.GetBytes(), 4)
if err != nil {
return nil, errors.Trace(err)
}
highVal, err = codec.Decode(high.GetBytes(), 4)
highVal, err = codec.DecodeRange(high.GetBytes(), 4)
if err != nil {
return nil, errors.Trace(err)
}
Expand Down Expand Up @@ -759,3 +760,135 @@ func splitFeedbackByQueryType(feedbacks []feedback) ([]feedback, []feedback) {
}
return eqFB, ranFB
}

// formatBuckets formats bucket from lowBkt to highBkt.
func formatBuckets(hg *Histogram, lowBkt, highBkt, idxCols int) string {
if lowBkt == highBkt {
return hg.bucketToString(lowBkt, idxCols)
}
if lowBkt+1 == highBkt {
return fmt.Sprintf("%s, %s", hg.bucketToString(lowBkt, 0), hg.bucketToString(highBkt, 0))
}
// do not care the middle buckets
return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.bucketToString(lowBkt, 0),
highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.bucketToString(highBkt, 0))
}

func colRangeToStr(c *Column, ran *ranger.Range, actual int64, factor float64) string {
lowCount, lowBkt := c.lessRowCountWithBktIdx(ran.LowVal[0])
highCount, highBkt := c.lessRowCountWithBktIdx(ran.HighVal[0])
return fmt.Sprintf("range: %s, actual: %d, expected: %d, buckets: {%s}", ran.String(), actual,
int64((highCount-lowCount)*factor), formatBuckets(&c.Histogram, lowBkt, highBkt, 0))
}

func logForPK(prefix string, c *Column, ranges []*ranger.Range, actual []int64, factor float64) {
for i, ran := range ranges {
if ran.LowVal[0].GetInt64()+1 >= ran.HighVal[0].GetInt64() {
continue
}
log.Debugf("%s column: %s, %s", prefix, c.Info.Name, colRangeToStr(c, ran, actual[i], factor))
}
}

func logForIndexRange(idx *Index, ran *ranger.Range, actual int64, factor float64) string {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
lb, err := codec.EncodeKey(sc, nil, ran.LowVal...)
if err != nil {
return ""
}
rb, err := codec.EncodeKey(sc, nil, ran.HighVal...)
if err != nil {
return ""
}
if idx.CMSketch != nil && bytes.Compare(kv.Key(lb).PrefixNext(), rb) >= 0 {
str, err := types.DatumsToString(ran.LowVal, true)
if err != nil {
return ""
}
return fmt.Sprintf("value: %s, actual: %d, expected: %d", str, actual, int64(float64(idx.QueryBytes(lb))*factor))
}
l, r := types.NewBytesDatum(lb), types.NewBytesDatum(rb)
lowCount, lowBkt := idx.lessRowCountWithBktIdx(l)
highCount, highBkt := idx.lessRowCountWithBktIdx(r)
return fmt.Sprintf("range: %s, actual: %d, expected: %d, histogram: {%s}", ran.String(), actual,
int64((highCount-lowCount)*factor), formatBuckets(&idx.Histogram, lowBkt, highBkt, len(idx.Info.Columns)))
}

func logForIndex(prefix string, t *Table, idx *Index, ranges []*ranger.Range, actual []int64, factor float64) {
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
if idx.CMSketch == nil || idx.statsVer != version1 {
for i, ran := range ranges {
log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor))
}
return
}
for i, ran := range ranges {
rangePosition := getOrdinalOfRangeCond(sc, ran)
// only contains range or equality query
if rangePosition == 0 || rangePosition == len(ran.LowVal) {
log.Debugf("%s index: %s, %s", prefix, idx.Info.Name.O, logForIndexRange(idx, ran, actual[i], factor))
continue
}
equalityString, err := types.DatumsToString(ran.LowVal[:rangePosition], true)
if err != nil {
continue
}
bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...)
if err != nil {
continue
}
equalityCount := idx.CMSketch.QueryBytes(bytes)
rang := ranger.Range{
LowVal: []types.Datum{ran.LowVal[rangePosition]},
HighVal: []types.Datum{ran.HighVal[rangePosition]},
}
colName := idx.Info.Columns[rangePosition].Name.L
var rangeString string
// prefer index stats over column stats
if idx, ok := t.colName2Idx[colName]; ok {
if t.Indices[idx] == nil {
return
}
rangeString = logForIndexRange(t.Indices[idx], &rang, -1, factor)
} else {
id := t.colName2ID[colName]
if t.Columns[id] == nil {
return
}
rangeString = colRangeToStr(t.Columns[t.colName2ID[colName]], &rang, -1, factor)
}
log.Debugf("%s index: %s, actual: %d, equality: %s, expected equality: %d, %s", prefix, idx.Info.Name.O,
actual[i], equalityString, equalityCount, rangeString)
}
}

func (q *QueryFeedback) logDetailedInfo(h *Handle) {
t, ok := h.statsCache.Load().(statsCache)[q.tableID]
if !ok {
return
}
isIndex := q.hist.isIndexHist()
ranges, err := q.DecodeToRanges(isIndex)
if err != nil {
log.Debug(err)
return
}
actual := make([]int64, 0, len(q.feedback))
for _, fb := range q.feedback {
actual = append(actual, fb.count)
}
logPrefix := fmt.Sprintf("[stats-feedback] %s,", t.name)
if isIndex {
idx := t.Indices[q.hist.ID]
if idx == nil {
return
}
logForIndex(logPrefix, t, idx, ranges, actual, idx.getIncreaseFactor(t.Count))
} else {
c := t.Columns[q.hist.ID]
if c == nil {
return
}
logForPK(logPrefix, c, ranges, actual, c.getIncreaseFactor(t.Count))
}
}
56 changes: 28 additions & 28 deletions statistics/feedback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
defer func() { defaultBucketCount = originBucketCount }()
c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals,
"column:0 ndv:0 totColSize:0\n"+
"num: 10000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 10008\tlower_bound: 2\tupper_bound: 7\trepeats: 0\n"+
"num: 10019\tlower_bound: 8\tupper_bound: 19\trepeats: 0\n"+
"num: 10019\tlower_bound: 20\tupper_bound: 20\trepeats: 0\n"+
"num: 10037\tlower_bound: 21\tupper_bound: 39\trepeats: 0\n"+
"num: 10055\tlower_bound: 40\tupper_bound: 58\trepeats: 0\n"+
"num: 10057\tlower_bound: 59\tupper_bound: 60\trepeats: 0")
"num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
"num: 8 lower_bound: 2 upper_bound: 7 repeats: 0\n"+
"num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+
"num: 0 lower_bound: 20 upper_bound: 20 repeats: 0\n"+
"num: 18 lower_bound: 21 upper_bound: 39 repeats: 0\n"+
"num: 18 lower_bound: 40 upper_bound: 58 repeats: 0\n"+
"num: 2 lower_bound: 59 upper_bound: 60 repeats: 0")
}

func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
Expand All @@ -91,12 +91,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0 totColSize:0\n"+
"num: 1\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 1\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
"num: 1\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
"num: 6\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+
"num: 6\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+
"num: 6\tlower_bound: 30\tupper_bound: 50\trepeats: 0")
"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
c.Assert(totalCount, Equals, int64(6))

Expand All @@ -110,12 +110,12 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0 totColSize:0\n"+
"num: 100000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 100000\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
"num: 100000\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
"num: 100001\tlower_bound: 10\tupper_bound: 15\trepeats: 0\n"+
"num: 100001\tlower_bound: 16\tupper_bound: 20\trepeats: 0\n"+
"num: 100001\tlower_bound: 30\tupper_bound: 50\trepeats: 0")
"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
c.Assert(totalCount, Equals, int64(100001))

Expand All @@ -132,7 +132,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0 totColSize:0\n"+
"num: 1000000\tlower_bound: 0\tupper_bound: 1000000\trepeats: 0")
"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0")
c.Assert(isNewBuckets, DeepEquals, []bool{false})
c.Assert(totalCount, Equals, int64(1000000))

Expand All @@ -148,8 +148,8 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0 totColSize:0\n"+
"num: 1\tlower_bound: 0\tupper_bound: 10\trepeats: 0\n"+
"num: 1\tlower_bound: 11\tupper_bound: 1000000\trepeats: 0")
"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+
"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0")
c.Assert(isNewBuckets, DeepEquals, []bool{true, true})
c.Assert(totalCount, Equals, int64(1))
}
Expand All @@ -169,16 +169,16 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
counts: []int64{1},
isNewBuckets: []bool{false},
bucketCount: 1,
result: "column:0 ndv:0 totColSize:0\nnum: 1\tlower_bound: 1\tupper_bound: 2\trepeats: 0",
result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0",
},
{
points: []int64{1, 2, 2, 3, 3, 4},
counts: []int64{100000, 1, 1},
isNewBuckets: []bool{false, false, false},
bucketCount: 2,
result: "column:0 ndv:0 totColSize:0\n" +
"num: 100000\tlower_bound: 1\tupper_bound: 2\trepeats: 0\n" +
"num: 100002\tlower_bound: 2\tupper_bound: 4\trepeats: 0",
"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" +
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0",
},
// test do not merge if the result bucket count is too large
{
Expand All @@ -187,9 +187,9 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
isNewBuckets: []bool{false, false, false, false},
bucketCount: 3,
result: "column:0 ndv:0 totColSize:0\n" +
"num: 2\tlower_bound: 1\tupper_bound: 3\trepeats: 0\n" +
"num: 100002\tlower_bound: 3\tupper_bound: 4\trepeats: 0\n" +
"num: 200002\tlower_bound: 4\tupper_bound: 5\trepeats: 0",
"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" +
"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" +
"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0",
},
}
for _, t := range tests {
Expand Down
1 change: 1 addition & 0 deletions statistics/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ func (h *Handle) Update(is infoschema.InfoSchema) error {
tbl.Version = version
tbl.Count = count
tbl.ModifyCount = modifyCount
tbl.name = getFullTableName(is, tableInfo)
tables = append(tables, tbl)
}
h.mu.Lock()
Expand Down
33 changes: 21 additions & 12 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) {
if idxCols == 0 {
return value.ToString()
}
decodedVals, err := codec.Decode(value.GetBytes(), idxCols)
decodedVals, err := codec.DecodeRange(value.GetBytes(), idxCols)
if err != nil {
return "", errors.Trace(err)
}
Expand All @@ -352,6 +352,14 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) {
return str, nil
}

func (hg *Histogram) bucketToString(bktID, idxCols int) string {
upperVal, err := ValueToString(hg.GetUpper(bktID), idxCols)
terror.Log(errors.Trace(err))
lowerVal, err := ValueToString(hg.GetLower(bktID), idxCols)
terror.Log(errors.Trace(err))
return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat)
}

// ToString gets the string representation for the histogram.
func (hg *Histogram) ToString(idxCols int) string {
strs := make([]string, 0, hg.Len()+1)
Expand All @@ -361,11 +369,7 @@ func (hg *Histogram) ToString(idxCols int) string {
strs = append(strs, fmt.Sprintf("column:%d ndv:%d totColSize:%d", hg.ID, hg.NDV, hg.TotColSize))
}
for i := 0; i < hg.Len(); i++ {
upperVal, err := ValueToString(hg.GetUpper(i), idxCols)
terror.Log(errors.Trace(err))
lowerVal, err := ValueToString(hg.GetLower(i), idxCols)
terror.Log(errors.Trace(err))
strs = append(strs, fmt.Sprintf("num: %d\tlower_bound: %s\tupper_bound: %s\trepeats: %d", hg.Buckets[i].Count, lowerVal, upperVal, hg.Buckets[i].Repeat))
strs = append(strs, hg.bucketToString(i, idxCols))
}
return strings.Join(strs, "\n")
}
Expand Down Expand Up @@ -405,14 +409,14 @@ func (hg *Histogram) greaterAndEqRowCount(value types.Datum) float64 {
}

// lessRowCount estimates the row count where the column less than value.
func (hg *Histogram) lessRowCount(value types.Datum) float64 {
func (hg *Histogram) lessRowCountWithBktIdx(value types.Datum) (float64, int) {
// all the values is null
if hg.Bounds == nil {
return 0
return 0, 0
}
index, match := hg.Bounds.LowerBound(0, &value)
if index == hg.Bounds.NumRows() {
return hg.totalRowCount()
return hg.totalRowCount(), hg.Len() - 1
}
// Since we store the lower and upper bound together, so dividing the index by 2 will get the bucket index.
bucketIdx := index / 2
Expand All @@ -423,11 +427,16 @@ func (hg *Histogram) lessRowCount(value types.Datum) float64 {
}
if index%2 == 1 {
if match {
return curCount - curRepeat
return curCount - curRepeat, bucketIdx
}
return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount)
return preCount + hg.calcFraction(bucketIdx, &value)*(curCount-curRepeat-preCount), bucketIdx
}
return preCount
return preCount, bucketIdx
}

func (hg *Histogram) lessRowCount(value types.Datum) float64 {
result, _ := hg.lessRowCountWithBktIdx(value)
return result
}

// lessAndEqRowCount estimates the row count where the column less than or equal to value.
Expand Down
Loading

0 comments on commit 29addca

Please sign in to comment.