Skip to content

Commit

Permalink
support join reorder. (pingcap#1522)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanfei1991 authored Aug 1, 2016
1 parent 5cae3c3 commit 9574322
Show file tree
Hide file tree
Showing 11 changed files with 598 additions and 82 deletions.
94 changes: 94 additions & 0 deletions executor/executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,100 @@ func (s *testSuite) TestJoin(c *C) {

}

func (s *testSuite) TestMultiJoin(c *C) {
defer testleak.AfterTest(c)()
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t35(a35 int primary key, b35 int, x35 int)")
tk.MustExec("create table t40(a40 int primary key, b40 int, x40 int)")
tk.MustExec("create table t14(a14 int primary key, b14 int, x14 int)")
tk.MustExec("create table t42(a42 int primary key, b42 int, x42 int)")
tk.MustExec("create table t15(a15 int primary key, b15 int, x15 int)")
tk.MustExec("create table t7(a7 int primary key, b7 int, x7 int)")
tk.MustExec("create table t64(a64 int primary key, b64 int, x64 int)")
tk.MustExec("create table t19(a19 int primary key, b19 int, x19 int)")
tk.MustExec("create table t9(a9 int primary key, b9 int, x9 int)")
tk.MustExec("create table t8(a8 int primary key, b8 int, x8 int)")
tk.MustExec("create table t57(a57 int primary key, b57 int, x57 int)")
tk.MustExec("create table t37(a37 int primary key, b37 int, x37 int)")
tk.MustExec("create table t44(a44 int primary key, b44 int, x44 int)")
tk.MustExec("create table t38(a38 int primary key, b38 int, x38 int)")
tk.MustExec("create table t18(a18 int primary key, b18 int, x18 int)")
tk.MustExec("create table t62(a62 int primary key, b62 int, x62 int)")
tk.MustExec("create table t4(a4 int primary key, b4 int, x4 int)")
tk.MustExec("create table t48(a48 int primary key, b48 int, x48 int)")
tk.MustExec("create table t31(a31 int primary key, b31 int, x31 int)")
tk.MustExec("create table t16(a16 int primary key, b16 int, x16 int)")
tk.MustExec("create table t12(a12 int primary key, b12 int, x12 int)")
tk.MustExec("insert into t35 values(1,1,1)")
tk.MustExec("insert into t40 values(1,1,1)")
tk.MustExec("insert into t14 values(1,1,1)")
tk.MustExec("insert into t42 values(1,1,1)")
tk.MustExec("insert into t15 values(1,1,1)")
tk.MustExec("insert into t7 values(1,1,1)")
tk.MustExec("insert into t64 values(1,1,1)")
tk.MustExec("insert into t19 values(1,1,1)")
tk.MustExec("insert into t9 values(1,1,1)")
tk.MustExec("insert into t8 values(1,1,1)")
tk.MustExec("insert into t57 values(1,1,1)")
tk.MustExec("insert into t37 values(1,1,1)")
tk.MustExec("insert into t44 values(1,1,1)")
tk.MustExec("insert into t38 values(1,1,1)")
tk.MustExec("insert into t18 values(1,1,1)")
tk.MustExec("insert into t62 values(1,1,1)")
tk.MustExec("insert into t4 values(1,1,1)")
tk.MustExec("insert into t48 values(1,1,1)")
tk.MustExec("insert into t31 values(1,1,1)")
tk.MustExec("insert into t16 values(1,1,1)")
tk.MustExec("insert into t12 values(1,1,1)")
tk.MustExec("insert into t35 values(7,7,7)")
tk.MustExec("insert into t40 values(7,7,7)")
tk.MustExec("insert into t14 values(7,7,7)")
tk.MustExec("insert into t42 values(7,7,7)")
tk.MustExec("insert into t15 values(7,7,7)")
tk.MustExec("insert into t7 values(7,7,7)")
tk.MustExec("insert into t64 values(7,7,7)")
tk.MustExec("insert into t19 values(7,7,7)")
tk.MustExec("insert into t9 values(7,7,7)")
tk.MustExec("insert into t8 values(7,7,7)")
tk.MustExec("insert into t57 values(7,7,7)")
tk.MustExec("insert into t37 values(7,7,7)")
tk.MustExec("insert into t44 values(7,7,7)")
tk.MustExec("insert into t38 values(7,7,7)")
tk.MustExec("insert into t18 values(7,7,7)")
tk.MustExec("insert into t62 values(7,7,7)")
tk.MustExec("insert into t4 values(7,7,7)")
tk.MustExec("insert into t48 values(7,7,7)")
tk.MustExec("insert into t31 values(7,7,7)")
tk.MustExec("insert into t16 values(7,7,7)")
tk.MustExec("insert into t12 values(7,7,7)")
result := tk.MustQuery(`SELECT x4,x8,x38,x44,x31,x9,x57,x48,x19,x40,x14,x12,x7,x64,x37,x18,x62,x35,x42,x15,x16 FROM
t35,t40,t14,t42,t15,t7,t64,t19,t9,t8,t57,t37,t44,t38,t18,t62,t4,t48,t31,t16,t12
WHERE b48=a57
AND a4=b19
AND a14=b16
AND b37=a48
AND a40=b42
AND a31=7
AND a15=b40
AND a38=b8
AND b15=a31
AND b64=a18
AND b12=a44
AND b7=a8
AND b35=a16
AND a12=b14
AND a64=b57
AND b62=a7
AND a35=b38
AND b9=a19
AND a62=b18
AND b4=a37
AND b44=a42`)
result.Check(testkit.Rows("7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7"))
}

func (s *testSuite) TestIndexScan(c *C) {
defer testleak.AfterTest(c)()
tk := testkit.NewTestKit(c, s.store)
Expand Down
1 change: 1 addition & 0 deletions expression/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ func ScalarFuncs2Exprs(funcs []*ScalarFunction) []Expression {
// DeepCopy implements Expression interface.
func (sf *ScalarFunction) DeepCopy() Expression {
newFunc := &ScalarFunction{FuncName: sf.FuncName, Function: sf.Function, RetType: sf.RetType}
newFunc.Args = make([]Expression, 0, len(sf.Args))
for _, arg := range sf.Args {
newFunc.Args = append(newFunc.Args, arg.DeepCopy())
}
Expand Down
206 changes: 206 additions & 0 deletions plan/join_reorder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package plan

import (
"sort"

"github.com/ngaut/log"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
)

// tryToGetJoinGroup tries to fetch a whole join group, which all joins is cartesian join.
func tryToGetJoinGroup(j *Join) ([]LogicalPlan, bool) {
if j.reordered || !j.cartesianJoin {
return nil, false
}
lChild := j.GetChildByIndex(0).(LogicalPlan)
rChild := j.GetChildByIndex(1).(LogicalPlan)
if nj, ok := lChild.(*Join); ok {
plans, valid := tryToGetJoinGroup(nj)
return append(plans, rChild), valid
}
return []LogicalPlan{lChild, rChild}, true
}

func findColumnIndexByGroup(groups []LogicalPlan, col *expression.Column) int {
for i, plan := range groups {
idx := plan.GetSchema().GetIndex(col)
if idx != -1 {
return i
}
}
log.Errorf("Unknown columns %s, from id %s, position %d", col.ToString(), col.FromID, col.Position)
return -1
}

type joinReOrderSolver struct {
graph []edgeList
group []LogicalPlan
visited []bool
resultJoin LogicalPlan
groupRank []*rankInfo
allocator *idAllocator
}

type edgeList []*rankInfo

func (l edgeList) Len() int {
return len(l)
}

func (l edgeList) Less(i, j int) bool {
return l[i].rate < l[j].rate
}

func (l edgeList) Swap(i, j int) {
l[i], l[j] = l[j], l[i]
}

type rankInfo struct {
nodeID int
rate float64
}

func (e *joinReOrderSolver) Less(i, j int) bool {
return e.groupRank[i].rate < e.groupRank[j].rate
}

func (e *joinReOrderSolver) Swap(i, j int) {
e.groupRank[i], e.groupRank[j] = e.groupRank[j], e.groupRank[i]
}

func (e *joinReOrderSolver) Len() int {
return len(e.groupRank)
}

// reorderJoin implements a simple join reorder algorithm. It will extract all the equal conditions and compose them to a graph.
// Then walk through the graph and pick the nodes connected by some edges to compose a join tree.
// We will pick the node with least result set as early as possible.
func (e *joinReOrderSolver) reorderJoin(group []LogicalPlan, conds []expression.Expression) {
e.graph = make([]edgeList, len(group))
e.group = group
e.visited = make([]bool, len(group))
e.resultJoin = nil
e.groupRank = make([]*rankInfo, len(group))
for i := 0; i < len(e.groupRank); i++ {
e.groupRank[i] = &rankInfo{
nodeID: i,
rate: 1.0,
}
}
for _, cond := range conds {
if f, ok := cond.(*expression.ScalarFunction); ok {
if f.FuncName.L == ast.EQ {
lCol, lok := f.Args[0].(*expression.Column)
rCol, rok := f.Args[1].(*expression.Column)
if lok && rok && !lCol.Correlated && !rCol.Correlated {
lID := findColumnIndexByGroup(group, lCol)
rID := findColumnIndexByGroup(group, rCol)
if lID != rID {
e.graph[lID] = append(e.graph[lID], &rankInfo{nodeID: rID})
e.graph[rID] = append(e.graph[rID], &rankInfo{nodeID: lID})
continue
}
}
}
id := -1
rate := 1.0
cols, _ := extractColumn(f, nil, nil)
for _, col := range cols {
idx := findColumnIndexByGroup(group, col)
if id == -1 {
switch f.FuncName.L {
case ast.EQ:
rate *= 0.1
case ast.LT, ast.LE, ast.GE, ast.GT:
rate *= 0.3
// TODO: Estimate it more precisely in future.
default:
rate *= 0.9
}
id = idx
} else {
id = -1
break
}
}
if id != -1 {
e.groupRank[id].rate *= rate
}
}
}
for _, node := range e.graph {
for _, edge := range node {
edge.rate = e.groupRank[edge.nodeID].rate
}
}
sort.Sort(e)
for _, edge := range e.graph {
sort.Sort(edge)
}
var cartesianJoinGroup []LogicalPlan
for j := 0; j < len(e.groupRank); j++ {
i := e.groupRank[j].nodeID
if !e.visited[i] {
e.resultJoin = e.group[i]
e.walkGraphAndComposeJoin(i)
cartesianJoinGroup = append(cartesianJoinGroup, e.resultJoin)
}
}
e.makeBushyJoin(cartesianJoinGroup)
}

// Make cartesian join as bushy tree.
func (e *joinReOrderSolver) makeBushyJoin(cartesianJoinGroup []LogicalPlan) {
for len(cartesianJoinGroup) > 1 {
resultJoinGroup := make([]LogicalPlan, 0, len(cartesianJoinGroup))
for i := 0; i < len(cartesianJoinGroup); i += 2 {
if i+1 == len(cartesianJoinGroup) {
resultJoinGroup = append(resultJoinGroup, cartesianJoinGroup[i])
break
}
resultJoinGroup = append(resultJoinGroup, e.newJoin(cartesianJoinGroup[i], cartesianJoinGroup[i+1]))
}
cartesianJoinGroup = resultJoinGroup
}
e.resultJoin = cartesianJoinGroup[0]
}

func (e *joinReOrderSolver) newJoin(lChild, rChild LogicalPlan) *Join {
join := &Join{
JoinType: InnerJoin,
reordered: true,
baseLogicalPlan: newBaseLogicalPlan(Jn, e.allocator),
}
join.initID()
join.SetChildren(lChild, rChild)
join.SetSchema(append(lChild.GetSchema().DeepCopy(), rChild.GetSchema().DeepCopy()...))
lChild.SetParents(join)
rChild.SetParents(join)
return join
}

// walkGraph implements a dfs algorithm. Each time it picks a edge with lowest rate, which has been sorted before.
func (e *joinReOrderSolver) walkGraphAndComposeJoin(u int) {
e.visited[u] = true
for _, edge := range e.graph[u] {
v := edge.nodeID
if !e.visited[v] {
e.resultJoin = e.newJoin(e.resultJoin, e.group[v])
e.walkGraphAndComposeJoin(v)
}
}
}
2 changes: 2 additions & 0 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ func (b *planBuilder) buildNewJoin(join *ast.Join) LogicalPlan {
joinPlan.LeftConditions = leftCond
joinPlan.RightConditions = rightCond
joinPlan.OtherConditions = otherCond
} else if joinPlan.JoinType == InnerJoin {
joinPlan.cartesianJoin = true
}
if join.Tp == ast.LeftJoin {
joinPlan.JoinType = LeftOuterJoin
Expand Down
10 changes: 5 additions & 5 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@ import (
type JoinType int

const (
// CrossJoin means Cartesian Product, but not used now.
CrossJoin JoinType = iota
// InnerJoin means inner join.
InnerJoin
InnerJoin JoinType = iota
// LeftOuterJoin means left join.
LeftOuterJoin
// RightOuterJoin means right join.
Expand All @@ -43,8 +41,10 @@ const (
type Join struct {
baseLogicalPlan

JoinType JoinType
anti bool
JoinType JoinType
anti bool
reordered bool
cartesianJoin bool

EqualConditions []*expression.ScalarFunction
LeftConditions []expression.Expression
Expand Down
Loading

0 comments on commit 9574322

Please sign in to comment.