Skip to content

Commit

Permalink
planner/core: refactor deriveStats for LogicalPlan (pingcap#8753)
Browse files Browse the repository at this point in the history
  • Loading branch information
eurekaka authored and zz-jason committed Dec 25, 2018
1 parent 351c4a5 commit 4ded4f6
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 106 deletions.
2 changes: 1 addition & 1 deletion planner/core/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func logicalOptimize(flag uint64, logic LogicalPlan) (LogicalPlan, error) {
}

func physicalOptimize(logic LogicalPlan) (PhysicalPlan, error) {
if _, err := logic.deriveStats(); err != nil {
if _, err := logic.recursiveDeriveStats(); err != nil {
return nil, errors.Trace(err)
}

Expand Down
7 changes: 5 additions & 2 deletions planner/core/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,11 @@ type LogicalPlan interface {
// pushDownTopN will push down the topN or limit operator during logical optimization.
pushDownTopN(topN *LogicalTopN) LogicalPlan

// deriveStats derives statistic info between plans.
deriveStats() (*property.StatsInfo, error)
// recursiveDeriveStats derives statistic info between plans.
recursiveDeriveStats() (*property.StatsInfo, error)

// DeriveStats derives statistic info for current plan node given child stats.
DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error)

// preparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is
// valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking.
Expand Down
2 changes: 1 addition & 1 deletion planner/core/rule_join_reorder_dp.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (s *joinReorderDPSolver) newJoinWithEdge(leftPlan, rightPlan LogicalPlan, e
}
}
join := s.newJoin(leftPlan, rightPlan, eqConds)
_, err := join.deriveStats()
_, err := join.recursiveDeriveStats()
return join, err
}

Expand Down
2 changes: 1 addition & 1 deletion planner/core/rule_join_reorder_dp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (mj mockLogicalJoin) init(ctx sessionctx.Context) *mockLogicalJoin {
return &mj
}

func (mj *mockLogicalJoin) deriveStats() (*property.StatsInfo, error) {
func (mj *mockLogicalJoin) recursiveDeriveStats() (*property.StatsInfo, error) {
if mj.stats == nil {
mj.stats = mj.statsMap[mj.involvedNodeSet]
}
Expand Down
4 changes: 2 additions & 2 deletions planner/core/rule_join_reorder_greedy.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ type joinReorderGreedySingleGroupSolver struct {
// connect them, we make a bushy join tree to do the cartesian joins finally.
func (s *joinReorderGreedySingleGroupSolver) solve() (LogicalPlan, error) {
for _, node := range s.curJoinGroup {
_, err := node.deriveStats()
_, err := node.recursiveDeriveStats()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -106,7 +106,7 @@ func (s *joinReorderGreedySingleGroupSolver) constructConnectedJoinTree() (Logic
if newJoin == nil {
continue
}
_, err := newJoin.deriveStats()
_, err := newJoin.recursiveDeriveStats()
if err != nil {
return nil, err
}
Expand Down
148 changes: 49 additions & 99 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@ func (p *basePhysicalPlan) StatsCount() float64 {
return p.stats.RowCount
}

func (p *LogicalTableDual) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalTableDual) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
profile := &property.StatsInfo{
RowCount: float64(p.RowCount),
Cardinality: make([]float64, p.Schema().Len()),
Expand All @@ -40,20 +38,31 @@ func (p *LogicalTableDual) deriveStats() (*property.StatsInfo, error) {
return p.stats, nil
}

func (p *baseLogicalPlan) deriveStats() (*property.StatsInfo, error) {
func (p *baseLogicalPlan) recursiveDeriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
if len(p.children) > 1 {
panic("LogicalPlans with more than one child should implement their own deriveStats().")
childStats := make([]*property.StatsInfo, len(p.children))
for i, child := range p.children {
childProfile, err := child.recursiveDeriveStats()
if err != nil {
return nil, err
}
childStats[i] = childProfile
}
return p.self.DeriveStats(childStats)
}

if len(p.children) == 1 {
var err error
p.stats, err = p.children[0].deriveStats()
return p.stats, err
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *baseLogicalPlan) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
if len(childStats) == 1 {
p.stats = childStats[0]
return p.stats, nil
}
if len(childStats) > 1 {
err := ErrInternal.GenWithStack("LogicalPlans with more than one child should implement their own DeriveStats().")
return nil, err
}

profile := &property.StatsInfo{
RowCount: float64(1),
Cardinality: make([]float64, p.self.Schema().Len()),
Expand Down Expand Up @@ -90,10 +99,8 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *property.Stat
return profile.Scale(selectivity)
}

func (ds *DataSource) deriveStats() (*property.StatsInfo, error) {
if ds.stats != nil {
return ds.stats, nil
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
// PushDownNot here can convert query 'not (a != 1)' to 'a = 1'.
for i, expr := range ds.pushedDownConds {
ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false)
Expand Down Expand Up @@ -127,30 +134,18 @@ func (ds *DataSource) deriveStats() (*property.StatsInfo, error) {
return ds.stats, nil
}

func (p *LogicalSelection) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
childProfile, err := p.children[0].deriveStats()
if err != nil {
return nil, err
}
p.stats = childProfile.Scale(selectionFactor)
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
p.stats = childStats[0].Scale(selectionFactor)
return p.stats, nil
}

func (p *LogicalUnionAll) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalUnionAll) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
p.stats = &property.StatsInfo{
Cardinality: make([]float64, p.Schema().Len()),
}
for _, child := range p.children {
childProfile, err := child.deriveStats()
if err != nil {
return nil, err
}
for _, childProfile := range childStats {
p.stats.RowCount += childProfile.RowCount
for i := range p.stats.Cardinality {
p.stats.Cardinality[i] += childProfile.Cardinality[i]
Expand All @@ -159,14 +154,9 @@ func (p *LogicalUnionAll) deriveStats() (*property.StatsInfo, error) {
return p.stats, nil
}

func (p *LogicalLimit) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
childProfile, err := p.children[0].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalLimit) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
childProfile := childStats[0]
p.stats = &property.StatsInfo{
RowCount: math.Min(float64(p.Count), childProfile.RowCount),
Cardinality: make([]float64, len(childProfile.Cardinality)),
Expand All @@ -177,14 +167,9 @@ func (p *LogicalLimit) deriveStats() (*property.StatsInfo, error) {
return p.stats, nil
}

func (lt *LogicalTopN) deriveStats() (*property.StatsInfo, error) {
if lt.stats != nil {
return lt.stats, nil
}
childProfile, err := lt.children[0].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (lt *LogicalTopN) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
childProfile := childStats[0]
lt.stats = &property.StatsInfo{
RowCount: math.Min(float64(lt.Count), childProfile.RowCount),
Cardinality: make([]float64, len(childProfile.Cardinality)),
Expand All @@ -211,14 +196,9 @@ func getCardinality(cols []*expression.Column, schema *expression.Schema, profil
return cardinality
}

func (p *LogicalProjection) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
childProfile, err := p.children[0].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalProjection) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
childProfile := childStats[0]
p.stats = &property.StatsInfo{
RowCount: childProfile.RowCount,
Cardinality: make([]float64, len(p.Exprs)),
Expand All @@ -230,14 +210,9 @@ func (p *LogicalProjection) deriveStats() (*property.StatsInfo, error) {
return p.stats, nil
}

func (la *LogicalAggregation) deriveStats() (*property.StatsInfo, error) {
if la.stats != nil {
return la.stats, nil
}
childProfile, err := la.children[0].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (la *LogicalAggregation) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
childProfile := childStats[0]
gbyCols := make([]*expression.Column, 0, len(la.GroupByItems))
for _, gbyExpr := range la.GroupByItems {
cols := expression.ExtractColumns(gbyExpr)
Expand All @@ -256,25 +231,15 @@ func (la *LogicalAggregation) deriveStats() (*property.StatsInfo, error) {
return la.stats, nil
}

// deriveStats prepares property.StatsInfo.
// DeriveStats implement LogicalPlan DeriveStats interface.
// If the type of join is SemiJoin, the selectivity of it will be same as selection's.
// If the type of join is LeftOuterSemiJoin, it will not add or remove any row. The last column is a boolean value, whose Cardinality should be two.
// If the type of join is inner/outer join, the output of join(s, t) should be N(s) * N(t) / (V(s.key) * V(t.key)) * Min(s.key, t.key).
// N(s) stands for the number of rows in relation s. V(s.key) means the Cardinality of join key in s.
// This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for
// every matched bucket.
func (p *LogicalJoin) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
leftProfile, err := p.children[0].deriveStats()
if err != nil {
return nil, err
}
rightProfile, err := p.children[1].deriveStats()
if err != nil {
return nil, err
}
func (p *LogicalJoin) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
leftProfile, rightProfile := childStats[0], childStats[1]
if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin {
p.stats = &property.StatsInfo{
RowCount: leftProfile.RowCount * selectionFactor,
Expand Down Expand Up @@ -328,18 +293,9 @@ func (p *LogicalJoin) deriveStats() (*property.StatsInfo, error) {
return p.stats, nil
}

func (la *LogicalApply) deriveStats() (*property.StatsInfo, error) {
if la.stats != nil {
return la.stats, nil
}
leftProfile, err := la.children[0].deriveStats()
if err != nil {
return nil, err
}
_, err = la.children[1].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (la *LogicalApply) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
leftProfile := childStats[0]
la.stats = &property.StatsInfo{
RowCount: leftProfile.RowCount,
Cardinality: make([]float64, la.schema.Len()),
Expand Down Expand Up @@ -367,14 +323,8 @@ func getSingletonStats(len int) *property.StatsInfo {
return ret
}

func (p *LogicalMaxOneRow) deriveStats() (*property.StatsInfo, error) {
if p.stats != nil {
return p.stats, nil
}
_, err := p.children[0].deriveStats()
if err != nil {
return nil, err
}
// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalMaxOneRow) DeriveStats(childStats []*property.StatsInfo) (*property.StatsInfo, error) {
p.stats = getSingletonStats(p.Schema().Len())
return p.stats, nil
}

0 comments on commit 4ded4f6

Please sign in to comment.