Skip to content

Commit

Permalink
Optimizer rule for TopN (vesoft-inc#279)
Browse files Browse the repository at this point in the history
* add TopN PlanNode and Executor

* add topn rule

* fix topn rule

* fix auto

* add topn rule test

* fix indent

* fix conflict

* fix conflict

* fix ;

* add newline

* fix colNames

* update code

* update code

* fix static function naming style

* fix name

* fix name

* check result data in test_optimzier

* fix
  • Loading branch information
jievince authored Oct 19, 2020
1 parent 3392dc7 commit fafb0b2
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ nebula_add_library(
OptRule.cpp
rule/PushFilterDownGetNbrsRule.cpp
rule/IndexScanRule.cpp
rule/TopNRule.cpp
)

nebula_add_subdirectory(test)
4 changes: 4 additions & 0 deletions src/optimizer/OptRule.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ class Pattern final {
class OptRule {
public:
struct TransformResult {
static const TransformResult &noTransform() {
static TransformResult kNoTrans{false, false, {}};
return kNoTrans;
}
bool eraseCurr{false};
bool eraseAll{false};
std::vector<OptGroupExpr *> newGroupExprs;
Expand Down
4 changes: 2 additions & 2 deletions src/optimizer/rule/PushFilterDownGetNbrsRule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform(
graph::ExtractFilterExprVisitor visitor;
condition->accept(&visitor);
if (!visitor.ok()) {
return TransformResult{false, false, {}};
return TransformResult::noTransform();
}

auto pool = qctx->objPool();
Expand Down Expand Up @@ -91,8 +91,8 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform(
}

TransformResult result;
result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr);
result.eraseCurr = true;
result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr);
return result;
}

Expand Down
2 changes: 2 additions & 0 deletions src/optimizer/rule/PushFilterDownGetNbrsRule.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ namespace opt {
class PushFilterDownGetNbrsRule final : public OptRule {
public:
const Pattern &pattern() const override;

StatusOr<TransformResult> transform(graph::QueryContext *qctx,
const MatchedResult &matched) const override;

std::string toString() const override;

private:
Expand Down
75 changes: 75 additions & 0 deletions src/optimizer/rule/TopNRule.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include "optimizer/rule/TopNRule.h"

#include "common/expression/BinaryExpression.h"
#include "common/expression/ConstantExpression.h"
#include "common/expression/Expression.h"
#include "common/expression/FunctionCallExpression.h"
#include "common/expression/LogicalExpression.h"
#include "common/expression/UnaryExpression.h"
#include "optimizer/OptGroup.h"
#include "planner/PlanNode.h"
#include "planner/Query.h"
#include "visitor/ExtractFilterExprVisitor.h"

using nebula::graph::Limit;
using nebula::graph::PlanNode;
using nebula::graph::QueryContext;
using nebula::graph::Sort;
using nebula::graph::TopN;

namespace nebula {
namespace opt {

std::unique_ptr<OptRule> TopNRule::kInstance = std::unique_ptr<TopNRule>(new TopNRule());

TopNRule::TopNRule() {
RuleSet::QueryRules().addRule(this);
}

const Pattern &TopNRule::pattern() const {
static Pattern pattern = Pattern::create(graph::PlanNode::Kind::kLimit,
{Pattern::create(graph::PlanNode::Kind::kSort)});
return pattern;
}

StatusOr<OptRule::TransformResult> TopNRule::transform(QueryContext *qctx,
const MatchedResult &matched) const {
auto limitExpr = matched.node;
auto sortExpr = matched.dependencies.front().node;
auto limit = static_cast<const Limit *>(limitExpr->node());
auto sort = static_cast<const Sort *>(sortExpr->node());

// Currently, we cannot know the total amount of input data,
// so only apply topn rule when offset of limit is 0
if (limit->offset() != 0) {
return TransformResult::noTransform();
}

auto topn = TopN::make(qctx, nullptr, sort->factors(), limit->offset(), limit->count());
topn->setOutputVar(limit->outputVar());
topn->setInputVar(sort->inputVar());
topn->setColNames(sort->colNames());
auto topnExpr = OptGroupExpr::create(qctx, topn, limitExpr->group());
for (auto dep : sortExpr->dependencies()) {
topnExpr->dependsOn(dep);
}

TransformResult result;
result.newGroupExprs.emplace_back(topnExpr);
result.eraseAll = true;
result.eraseCurr = true;
return result;
}

std::string TopNRule::toString() const {
return "TopNRule";
}

} // namespace opt
} // namespace nebula
35 changes: 35 additions & 0 deletions src/optimizer/rule/TopNRule.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#ifndef OPTIMIZER_RULE_TOPNRULE_H_
#define OPTIMIZER_RULE_TOPNRULE_H_

#include <memory>

#include "optimizer/OptRule.h"

namespace nebula {
namespace opt {

class TopNRule final : public OptRule {
public:
const Pattern &pattern() const override;

StatusOr<OptRule::TransformResult> transform(graph::QueryContext *qctx,
const MatchedResult &matched) const override;

std::string toString() const override;

private:
TopNRule();

static std::unique_ptr<OptRule> kInstance;
};

} // namespace opt
} // namespace nebula

#endif // OPTIMIZER_RULE_TOPNRULE_H_
1 change: 0 additions & 1 deletion src/planner/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,6 @@ class TopN final : public SingleInputNode {
int64_t count_{-1};
};


/**
* Do Aggregation with the given set of records,
* such as AVG(), COUNT()...
Expand Down
110 changes: 97 additions & 13 deletions tests/query/v2/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,53 +18,61 @@ def prepare(cls):

def test_PushFilterDownGetNbrsRule(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
WHERE $^.player.age > 18 YIELD $^.player.name AS name
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [[2003], [2005], [2008], [2012], [2016]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER like REVERSELY
WHERE $^.player.age > 18 YIELD $^.player.name AS name
GO 1 STEPS FROM "James Harden" OVER like REVERSELY
WHERE $^.player.age > 18 YIELD like.likeness as likeness
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [[90], [80], [99]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
WHERE serve.start_year > 2002 YIELD $^.player.name AS name
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE serve.start_year > 2005 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['(serve.start_year>2002)']],
["GetNeighbors", [2], ['(serve.start_year>2005)']],
["Start", []]
]
expected_data = [[2008], [2012], [2016]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Lakerys" OVER serve REVERSELY
WHERE serve.start_year > 2002 YIELD $^.player.name AS name
GO 1 STEPS FROM "Lakers" OVER serve REVERSELY
WHERE serve.start_year < 2017 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['(serve.start_year>2002)']],
["GetNeighbors", [2], ['(serve.start_year<2017)']],
["Start", []]
]
expected_data = [[2012], [1996], [2008], [1996], [2012]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

@pytest.mark.skip(reason="Depends on other opt rules to eliminate duplicate project nodes")
def test_PushFilterDownGetNbrsRule_Failed(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 AND $$.team.name == "Lakers"
YIELD $^.player.name AS name
''')
Expand All @@ -74,10 +82,12 @@ def test_PushFilterDownGetNbrsRule_Failed(self):
["GetNeighbors", [3], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 OR $$.team.name == "Lakers"
YIELD $^.player.name AS name
''')
Expand All @@ -87,11 +97,13 @@ def test_PushFilterDownGetNbrsRule_Failed(self):
["GetNeighbors", [3]],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

# fail to optimize cases
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve \
GO 1 STEPS FROM "Boris Diaw" OVER serve \
WHERE $$.team.name == "Lakers" YIELD $^.player.name AS name
''')
expected_plan = [
Expand All @@ -100,4 +112,76 @@ def test_PushFilterDownGetNbrsRule_Failed(self):
["GetNeighbors", [3]],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)

def test_TopNRule(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness AS likeness
| ORDER BY likeness
| LIMIT 2
''')
expected_plan = [
["DataCollect", [1]],
["TopN", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[50], [55]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like REVERSELY
YIELD like.likeness AS likeness |
ORDER BY likeness |
LIMIT 1
''')
expected_plan = [
["DataCollect", [1]],
["TopN", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[83]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)

def test_TopNRule_Failed(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness as likeness
| ORDER BY likeness
| LIMIT 2, 3
''')
expected_plan = [
["DataCollect", [1]],
["Limit", [2]],
["Sort", [3]],
["Project", [4]],
["GetNeighbors", [5]],
["Start", []]
]
expected_data = [[60]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)

resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness AS likeness
| ORDER BY likeness
''')
expected_plan = [
["DataCollect", [1]],
["Sort", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[50], [55], [60]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)

0 comments on commit fafb0b2

Please sign in to comment.