Skip to content

Commit

Permalink
planner: don't regenarate the same group by column when decorrelation (
Browse files Browse the repository at this point in the history
  • Loading branch information
francis0407 authored Oct 31, 2019
1 parent b80b417 commit ed070c4
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 9 deletions.
15 changes: 15 additions & 0 deletions cmd/explaintest/r/subquery.result
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,18 @@ Projection_11 5.00 root Column#15
│ └─IndexScan_34 1.00 cop[tikv] table:s, index:b, c, d, range: decided by [eq(Column#6, 1) eq(Column#7, 1) eq(Column#8, Column#1)], keep order:false
└─TableReader_27 1.00 root data:TableScan_26
└─TableScan_26 1.00 cop[tikv] table:t1, range: decided by [Column#5], keep order:true
drop table if exists t;
create table t(a int, b int, c int);
explain select a from t t1 where t1.a = (select max(t2.a) from t t2 where t1.b=t2.b and t1.c=t2.b);
id count task operator info
Projection_11 7992.00 root Column#1
└─HashLeftJoin_12 7992.00 root inner join, inner:Selection_17, equal:[eq(Column#2, Column#6) eq(Column#3, Column#6) eq(Column#1, Column#9)]
├─TableReader_16 9970.03 root data:Selection_15
│ └─Selection_15 9970.03 cop[tikv] not(isnull(Column#1)), not(isnull(Column#2)), not(isnull(Column#3))
│ └─TableScan_14 10000.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─Selection_17 6393.60 root not(isnull(Column#9))
└─HashAgg_23 7992.00 root group by:Column#14, funcs:max(Column#12), firstrow(Column#14)
└─TableReader_24 7992.00 root data:HashAgg_18
└─HashAgg_18 7992.00 cop[tikv] group by:Column#6, funcs:max(Column#5)
└─Selection_22 9990.00 cop[tikv] not(isnull(Column#6))
└─TableScan_21 10000.00 cop[tikv] table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
4 changes: 4 additions & 0 deletions cmd/explaintest/t/subquery.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ create table t(a int primary key, b int, c int, d int, index idx(b,c,d));
insert into t values(1,1,1,1),(2,2,2,2),(3,2,2,2),(4,2,2,2),(5,2,2,2);
analyze table t;
explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = 1 and s.c = 1 and s.d = t.a and s.a = t1.a) from t;

drop table if exists t;
create table t(a int, b int, c int);
explain select a from t t1 where t1.a = (select max(t2.a) from t t2 where t1.b=t2.b and t1.c=t2.b);
8 changes: 5 additions & 3 deletions planner/core/rule_decorrelate.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,24 +198,26 @@ func (s *decorrelateSolver) optimize(ctx context.Context, p LogicalPlan) (Logica
sel.Conditions = remainedExpr
apply.corCols = extractCorColumnsBySchema(apply.children[1], apply.children[0].Schema())
// There's no other correlated column.
groupByCols := expression.NewSchema(agg.groupByCols...)
if len(apply.corCols) == 0 {
join := &apply.LogicalJoin
join.EqualConditions = append(join.EqualConditions, eqCondWithCorCol...)
for _, eqCond := range eqCondWithCorCol {
clonedCol := eqCond.GetArgs()[1]
clonedCol := eqCond.GetArgs()[1].(*expression.Column)
// If the join key is not in the aggregation's schema, add first row function.
if agg.schema.ColumnIndex(eqCond.GetArgs()[1].(*expression.Column)) == -1 {
newFunc, err := aggregation.NewAggFuncDesc(apply.ctx, ast.AggFuncFirstRow, []expression.Expression{clonedCol}, false)
if err != nil {
return nil, err
}
agg.AggFuncs = append(agg.AggFuncs, newFunc)
agg.schema.Append(clonedCol.(*expression.Column))
agg.schema.Append(clonedCol)
agg.schema.Columns[agg.schema.Len()-1].RetType = newFunc.RetTp
}
// If group by cols don't contain the join key, add it into this.
if agg.getGbyColIndex(eqCond.GetArgs()[1].(*expression.Column)) == -1 {
if !groupByCols.Contains(clonedCol) {
agg.GroupByItems = append(agg.GroupByItems, clonedCol)
groupByCols.Append(clonedCol)
}
}
agg.collectGroupByColumns()
Expand Down
8 changes: 2 additions & 6 deletions planner/core/rule_predicate_push_down.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,18 +390,14 @@ func (p *LogicalUnionAll) PredicatePushDown(predicates []expression.Expression)
return nil, p
}

// getGbyColIndex gets the column's index in the group-by columns.
func (la *LogicalAggregation) getGbyColIndex(col *expression.Column) int {
return expression.NewSchema(la.groupByCols...).ColumnIndex(col)
}

// PredicatePushDown implements LogicalPlan PredicatePushDown interface.
func (la *LogicalAggregation) PredicatePushDown(predicates []expression.Expression) (ret []expression.Expression, retPlan LogicalPlan) {
var condsToPush []expression.Expression
exprsOriginal := make([]expression.Expression, 0, len(la.AggFuncs))
for _, fun := range la.AggFuncs {
exprsOriginal = append(exprsOriginal, fun.Args[0])
}
groupByColumns := expression.NewSchema(la.groupByCols...)
for _, cond := range predicates {
switch cond.(type) {
case *expression.Constant:
Expand All @@ -414,7 +410,7 @@ func (la *LogicalAggregation) PredicatePushDown(predicates []expression.Expressi
extractedCols := expression.ExtractColumns(cond)
ok := true
for _, col := range extractedCols {
if la.getGbyColIndex(col) == -1 {
if !groupByColumns.Contains(col) {
ok = false
break
}
Expand Down

0 comments on commit ed070c4

Please sign in to comment.