Skip to content

Commit

Permalink
statistics: avoid mutate global variable (pingcap#28678)
Browse files Browse the repository at this point in the history
  • Loading branch information
tisonkun authored Oct 9, 2021
1 parent 501e87e commit 219c02e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 23 deletions.
25 changes: 15 additions & 10 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,8 @@ func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt buc
const (
defaultSplitCount = 10
splitPerFeedback = 10
// defaultBucketCount is the number of buckets a column histogram has.
defaultBucketCount = 256
)

// getSplitCount gets the split count for the histogram. It is based on the intuition that:
Expand Down Expand Up @@ -686,11 +688,8 @@ func getBucketScore(bkts []bucket, totalCount float64, id int) bucketScore {
return bucketScore{id, math.Abs(err / (preCount + count))}
}

// defaultBucketCount is the number of buckets a column histogram has.
var defaultBucketCount = 256

func mergeBuckets(bkts []bucket, isNewBuckets []bool, totalCount float64) []bucket {
mergeCount := len(bkts) - defaultBucketCount
func mergeBuckets(bkts []bucket, isNewBuckets []bool, bucketCount int, totalCount float64) []bucket {
mergeCount := len(bkts) - bucketCount
if mergeCount <= 0 {
return bkts
}
Expand Down Expand Up @@ -726,11 +725,11 @@ func mergeBuckets(bkts []bucket, isNewBuckets []bool, totalCount float64) []buck
}

// splitBuckets split the histogram buckets according to the feedback.
func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int64) {
func splitBuckets(h *Histogram, feedback *QueryFeedback, bucketCount int) ([]bucket, []bool, int64) {
bktID2FB, numTotalFBs := buildBucketFeedback(h, feedback)
buckets := make([]bucket, 0, h.Len())
isNewBuckets := make([]bool, 0, h.Len())
splitCount := getSplitCount(numTotalFBs, defaultBucketCount-h.Len())
splitCount := getSplitCount(numTotalFBs, bucketCount-h.Len())
for i := 0; i < h.Len(); i++ {
bktFB, ok := bktID2FB[i]
// No feedback, just use the original one.
Expand Down Expand Up @@ -760,14 +759,20 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6

// UpdateHistogram updates the histogram according buckets.
func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram {
return UpdateHistogramWithBucketCount(h, feedback, statsVer, defaultBucketCount)
}

// UpdateHistogramWithBucketCount updates the histogram according buckets with customized
// bucketCount for testing.
func UpdateHistogramWithBucketCount(h *Histogram, feedback *QueryFeedback, statsVer int, bucketCount int) *Histogram {
if statsVer < Version2 {
// If it's the stats we haven't maintain the bucket NDV yet. Reset the ndv.
// If it's the stats we haven't maintained the bucket NDV yet. Reset the ndv.
for i := range feedback.Feedback {
feedback.Feedback[i].Ndv = 0
}
}
buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount))
buckets, isNewBuckets, totalCount := splitBuckets(h, feedback, bucketCount)
buckets = mergeBuckets(buckets, isNewBuckets, bucketCount, float64(totalCount))
hist := buildNewHistogram(h, buckets)
// Update the NDV of primary key column.
if feedback.Tp == PkType {
Expand Down
20 changes: 7 additions & 13 deletions statistics/feedback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ func TestUpdateHistogram(t *testing.T) {

q := NewQueryFeedback(0, genHistogram(), 0, false)
q.Feedback = feedbacks
originBucketCount := defaultBucketCount
defaultBucketCount = 7
defer func() { defaultBucketCount = originBucketCount }()
require.Equal(t,
"column:0 ndv:10053 totColSize:0\n"+
"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+
Expand All @@ -79,7 +76,7 @@ func TestUpdateHistogram(t *testing.T) {
"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0 ndv: 11\n"+
"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0 ndv: 19\n"+
"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0 ndv: 11",
UpdateHistogram(q.Hist, q, Version2).ToString(0))
UpdateHistogramWithBucketCount(q.Hist, q, Version2, 7).ToString(0))
}

func TestSplitBuckets(t *testing.T) {
Expand All @@ -100,7 +97,7 @@ func TestSplitBuckets(t *testing.T) {
oldNdvs[i] = q.Hist.Buckets[i].NDV
}
log.Warn("in test", zap.Int64s("ndvs", oldNdvs), zap.Int64s("cnts", oldCnts))
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q)
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q, defaultBucketCount)
ndvs := make([]int64, len(buckets))
for i := range buckets {
ndvs[i] = buckets[i].Ndv
Expand All @@ -125,7 +122,7 @@ func TestSplitBuckets(t *testing.T) {
}
q = NewQueryFeedback(0, genHistogram(), 0, false)
q.Feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
require.Equal(t,
"column:0 ndv:0 totColSize:0\n"+
"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+
Expand All @@ -149,7 +146,7 @@ func TestSplitBuckets(t *testing.T) {
}
q = NewQueryFeedback(0, h, 0, false)
q.Feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
require.Equal(t,
"column:0 ndv:0 totColSize:0\n"+
"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0 ndv: 1000000",
Expand All @@ -166,7 +163,7 @@ func TestSplitBuckets(t *testing.T) {
}
q = NewQueryFeedback(0, h, 0, false)
q.Feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
require.Equal(t,
"column:0 ndv:0 totColSize:0\n"+
"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0 ndv: 1\n"+
Expand All @@ -183,7 +180,7 @@ func TestSplitBuckets(t *testing.T) {
feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000, 1000))
q = NewQueryFeedback(0, h, 0, false)
q.Feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q, defaultBucketCount)
require.Equal(t,
"column:0 ndv:0 totColSize:0\n"+
"num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0 ndv: 5001",
Expand All @@ -194,8 +191,6 @@ func TestSplitBuckets(t *testing.T) {

func TestMergeBuckets(t *testing.T) {
t.Parallel()
originBucketCount := defaultBucketCount
defer func() { defaultBucketCount = originBucketCount }()
tests := []struct {
points []int64
counts []int64
Expand Down Expand Up @@ -244,8 +239,7 @@ func TestMergeBuckets(t *testing.T) {
bkts = append(bkts, bucket{&lower, &upper, tt.counts[i], 0, tt.ndvs[i]})
totalCount += tt.counts[i]
}
defaultBucketCount = tt.bucketCount
bkts = mergeBuckets(bkts, tt.isNewBuckets, float64(totalCount))
bkts = mergeBuckets(bkts, tt.isNewBuckets, tt.bucketCount, float64(totalCount))
result := buildNewHistogram(&Histogram{Tp: types.NewFieldType(mysql.TypeLong)}, bkts).ToString(0)
require.Equal(t, tt.result, result)
}
Expand Down

0 comments on commit 219c02e

Please sign in to comment.