Skip to content

Commit

Permalink
HIVE-5771 : Constant propagation optimizer for Hive (Ted Xu via Ashut…
Browse files Browse the repository at this point in the history
…osh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1613661 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
ashutoshc committed Jul 26, 2014
1 parent dbeb2c5 commit 44c4e54
Show file tree
Hide file tree
Showing 204 changed files with 4,172 additions and 1,910 deletions.
3 changes: 2 additions & 1 deletion common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,8 @@ public static enum ConfVars {
"Whether to transitively replicate predicate filters over equijoin conditions."),
HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true,
"Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."),
// Constant propagation optimizer
HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true, ""),
HIVENULLSCANOPTIMIZE("hive.optimize.null.scan", true, "Dont scan relations which are guaranteed to not generate any rows"),
HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true,
Expand Down Expand Up @@ -2250,7 +2252,6 @@ public static URL getHiveServer2SiteLocation() {
return hiveServer2SiteUrl;
}


/**
* @return the user name set in hadoop.job.ugi param or the current user from System
* @throws IOException
Expand Down
12 changes: 10 additions & 2 deletions conf/hive-default.xml.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
Expand All @@ -14,7 +16,8 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--><configuration>
-->
<configuration>
<!-- WARNING!!! This file is auto generated for documentation purposes ONLY! -->
<!-- WARNING!!! Any changes you make to this file will be ignored by Hive. -->
<!-- WARNING!!! You must make your changes in hive-site.xml instead. -->
Expand Down Expand Up @@ -1702,6 +1705,11 @@
<value>true</value>
<description>Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false.</description>
</property>
<property>
<key>hive.optimize.constant.propagation</key>
<value>true</value>
<description>Whether to enable constant propagation optimizer</description>
</property>
<property>
<key>hive.optimize.metadataonly</key>
<value>true</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: example_add(1, 2) (type: int), example_add(1, 2, 3) (type: int), example_add(1, 2, 3, 4) (type: int), example_add(1.1, 2.2) (type: double), example_add(1.1, 2.2, 3.3) (type: double), example_add(1.1, 2.2, 3.3, 4.4) (type: double), example_add(1, 2, 3, 4.4) (type: double)
expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
Limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: example_format('abc') (type: string), example_format('%1$s', 1.1) (type: string), example_format('%1$s %2$e', 1.1, 1.2) (type: string), example_format('%1$x %2$o %3$d', 10, 10, 10) (type: string)
expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE
Limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: hbase_pushdown
filterExpr: (key >= UDFToString((40 + 50))) (type: boolean)
filterExpr: (key >= '90') (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Filter Operator
predicate: (key >= UDFToString((40 + 50))) (type: boolean)
predicate: (key >= '90') (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
Expand Down
6 changes: 3 additions & 3 deletions hbase-handler/src/test/results/positive/hbase_pushdown.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ STAGE PLANS:
predicate: (key = 90) (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
expressions: 90 (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
Expand Down Expand Up @@ -235,7 +235,7 @@ STAGE PLANS:
predicate: (((key = 80) and (key = 90)) and (value like '%90%')) (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
expressions: 90 (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
Expand Down Expand Up @@ -398,7 +398,7 @@ STAGE PLANS:
predicate: (key = 90) (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
expressions: 90 (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ STAGE PLANS:
predicate: (((key >= 9) and (key < 17)) and (key = 11)) (type: boolean)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
expressions: 11 (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
Expand Down
4 changes: 4 additions & 0 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,8 @@ public boolean equals(Object obj) {

return true;
}

public void setObjectinspector(ObjectInspector writableObjectInspector) {
this.objectInspector = writableObjectInspector;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,9 @@ public boolean isStateful() {
public ExprNodeEvaluator[] getChildren() {
return null;
}

@Override
public String toString() {
return "ExprNodeEvaluator[" + expr + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ protected List<ObjectInspector> getValueObjectInspectors(
int bigPos = conf.getPosBigTable();
List<ObjectInspector> valueOI = new ArrayList<ObjectInspector>();
for (int i = 0; i < valueIndex.length; i++) {
if (valueIndex[i] >= 0) {
if (valueIndex[i] >= 0 && !joinKeysObjectInspectors[bigPos].isEmpty()) {
valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i]));
} else {
valueOI.add(inspectors.get(i));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ public void processOp(Object row, int tag) throws HiveException {
// TODO: this is fishy - we init object inspectors based on first tag. We
// should either init for each tag, or if rowInspector doesn't really
// matter, then we can create this in ctor and get rid of firstRow.
LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " + conf.getNumDistributionKeys());
keyObjectInspector = initEvaluatorsAndReturnStruct(keyEval,
distinctColIndices,
conf.getOutputKeyColumnNames(), numDistributionKeys, rowInspector);
Expand Down
13 changes: 5 additions & 8 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@
/**
* Select operator implementation.
*/
public class SelectOperator extends Operator<SelectDesc> implements
Serializable {
public class SelectOperator extends Operator<SelectDesc> implements Serializable {

private static final long serialVersionUID = 1L;
protected transient ExprNodeEvaluator[] eval;
Expand All @@ -60,10 +59,9 @@ protected void initializeOp(Configuration hconf) throws HiveException {
}
}
output = new Object[eval.length];
LOG.info("SELECT "
+ ((StructObjectInspector) inputObjInspectors[0]).getTypeName());
outputObjInspector = initEvaluatorsAndReturnStruct(eval, conf
.getOutputColumnNames(), inputObjInspectors[0]);
LOG.info("SELECT " + ((StructObjectInspector) inputObjInspectors[0]).getTypeName());
outputObjInspector = initEvaluatorsAndReturnStruct(eval, conf.getOutputColumnNames(),
inputObjInspectors[0]);
initializeChildren(hconf);
}

Expand All @@ -81,8 +79,7 @@ public void processOp(Object row, int tag) throws HiveException {
} catch (HiveException e) {
throw e;
} catch (RuntimeException e) {
throw new HiveException("Error evaluating "
+ conf.getColList().get(i).getExprString(), e);
throw new HiveException("Error evaluating " + conf.getColList().get(i).getExprString(), e);
}
forward(output, outputObjInspector);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
Expand Down Expand Up @@ -87,6 +88,7 @@ private ColumnPrunerProcFactory() {
* Node Processor for Column Pruning on Filter Operators.
*/
public static class ColumnPrunerFilterProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
FilterOperator op = (FilterOperator) nd;
Expand Down Expand Up @@ -120,6 +122,7 @@ public static ColumnPrunerFilterProc getFilterProc() {
* Node Processor for Column Pruning on Group By Operators.
*/
public static class ColumnPrunerGroupByProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
GroupByOperator op = (GroupByOperator) nd;
Expand Down Expand Up @@ -154,6 +157,7 @@ public static ColumnPrunerGroupByProc getGroupByProc() {
}

public static class ColumnPrunerScriptProc implements NodeProcessor {
@Override
@SuppressWarnings("unchecked")
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
Expand Down Expand Up @@ -224,6 +228,7 @@ public static ColumnPrunerScriptProc getScriptProc() {
* and update the RR & signature on the PTFOp.
*/
public static class ColumnPrunerPTFProc extends ColumnPrunerScriptProc {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {

Expand Down Expand Up @@ -327,6 +332,7 @@ public static ColumnPrunerPTFProc getPTFProc() {
* The Default Node Processor for Column Pruning.
*/
public static class ColumnPrunerDefaultProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
Expand All @@ -351,6 +357,7 @@ public static ColumnPrunerDefaultProc getDefaultProc() {
* store needed columns in tableScanDesc.
*/
public static class ColumnPrunerTableScanProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
TableScanOperator scanOp = (TableScanOperator) nd;
Expand Down Expand Up @@ -426,6 +433,7 @@ public static ColumnPrunerTableScanProc getTableScanProc() {
* The Node Processor for Column Pruning on Reduce Sink Operators.
*/
public static class ColumnPrunerReduceSinkProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
ReduceSinkOperator op = (ReduceSinkOperator) nd;
Expand All @@ -435,6 +443,7 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,

List<String> colLists = new ArrayList<String>();
ArrayList<ExprNodeDesc> keys = conf.getKeyCols();
LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys);
for (ExprNodeDesc key : keys) {
colLists = Utilities.mergeUniqElems(colLists, key.getCols());
}
Expand All @@ -456,7 +465,6 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,

if (childCols != null) {
boolean[] flags = new boolean[valCols.size()];
Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap();

for (String childCol : childCols) {
int index = valColNames.indexOf(Utilities.removeValueTag(childCol));
Expand Down Expand Up @@ -497,6 +505,7 @@ public static ColumnPrunerReduceSinkProc getReduceSinkProc() {
* The Node Processor for Column Pruning on Lateral View Join Operators.
*/
public static class ColumnPrunerLateralViewJoinProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
LateralViewJoinOperator op = (LateralViewJoinOperator) nd;
Expand Down Expand Up @@ -585,6 +594,7 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
* The Node Processor for Column Pruning on Select Operators.
*/
public static class ColumnPrunerSelectProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
SelectOperator op = (SelectOperator) nd;
Expand Down Expand Up @@ -748,6 +758,12 @@ private static void pruneReduceSinkOperator(boolean[] retainFlags,
nm = oldRR.reverseLookup(outputCol);
}

// In case there are multiple columns referenced to the same column name, we won't
// do row resolve once more because the ColumnInfo in row resolver is already removed
if (nm == null) {
continue;
}

// Only remove information of a column if it is not a key,
// i.e. this column is not appearing in keyExprs of the RS
if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
Expand Down Expand Up @@ -795,6 +811,7 @@ public static ColumnPrunerLateralViewForwardProc getLateralViewForwardProc() {
* The Node Processor for Column Pruning on Join Operators.
*/
public static class ColumnPrunerJoinProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
JoinOperator op = (JoinOperator) nd;
Expand All @@ -817,9 +834,10 @@ public static ColumnPrunerJoinProc getJoinProc() {
* The Node Processor for Column Pruning on Map Join Operators.
*/
public static class ColumnPrunerMapJoinProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
MapJoinOperator op = (MapJoinOperator) nd;
AbstractMapJoinOperator<MapJoinDesc> op = (AbstractMapJoinOperator<MapJoinDesc>) nd;
pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), op
.getConf().getRetainList(), true);
return null;
Expand Down Expand Up @@ -878,6 +896,7 @@ private static void pruneJoinOperator(NodeProcessorCtx ctx,
List<Operator<? extends OperatorDesc>> childOperators = op
.getChildOperators();

LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
List<String> childColLists = cppCtx.genColLists(op);
if (childColLists == null) {
return;
Expand Down Expand Up @@ -985,6 +1004,7 @@ private static void pruneJoinOperator(NodeProcessorCtx ctx,
rs.add(col);
}

LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
op.setColumnExprMap(newColExprMap);
conf.setOutputColumnNames(outputCols);
op.getSchema().setSignature(rs);
Expand Down
Loading

0 comments on commit 44c4e54

Please sign in to comment.