Skip to content

Commit

Permalink
stats: fix estimation in between row count (pingcap#5682)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored Jan 19, 2018
1 parent 45d916b commit 384a108
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 4 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package statistics
import (
"bytes"
"fmt"
"math"
"strings"
"time"

Expand Down Expand Up @@ -344,8 +345,10 @@ func (hg *Histogram) lessAndEqRowCount(value types.Datum) float64 {
func (hg *Histogram) betweenRowCount(a, b types.Datum) float64 {
lessCountA := hg.lessRowCount(a)
lessCountB := hg.lessRowCount(b)
// If lessCountA is not less than lessCountB, it may be that they fall to the same bucket and we cannot estimate
// the fraction, so we use `totalCount / NDV` to estimate the row count, but the result should not greater than lessCountB.
if lessCountA >= lessCountB {
return hg.totalRowCount() / float64(hg.NDV)
return math.Min(lessCountB, hg.totalRowCount()/float64(hg.NDV))
}
return lessCountB - lessCountA
}
Expand Down
2 changes: 2 additions & 0 deletions statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
c.Check(int(count), Equals, 19999)
count = col.betweenRowCount(encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
c.Check(int(count), Equals, 4999)
count = col.betweenRowCount(encodeKey(types.MinNotNullDatum()), encodeKey(types.NewIntDatum(0)))
c.Check(int(count), Equals, 0)
count = col.lessRowCount(encodeKey(types.NewIntDatum(0)))
c.Check(int(count), Equals, 0)

Expand Down

0 comments on commit 384a108

Please sign in to comment.