Skip to content

Commit

Permalink
Represent min and max statistics in SPI as double
Browse files Browse the repository at this point in the history
min and max for other types than numeric were simply ignored by the optimizer.
Although SHOW STATS used to print min and max statistics for strings. Since the
min and max are represented as double, the SHOW STATS command will no longer print
these statistics, better representing the statistics that the optimizer actually takes
into account.
  • Loading branch information
arhimondr committed Sep 19, 2018
1 parent 5b9a58f commit ea3a814
Show file tree
Hide file tree
Showing 15 changed files with 374 additions and 194 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.predicate.NullableValue;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.DoubleRange;
import com.facebook.presto.spi.statistics.Estimate;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.facebook.presto.spi.type.DecimalType;
import com.facebook.presto.spi.type.Decimals;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spi.type.TypeManager;
import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -59,6 +61,8 @@
import static com.facebook.presto.spi.predicate.Utils.nativeValueToBlock;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.DateType.DATE;
import static com.facebook.presto.spi.type.Decimals.isLongDecimal;
import static com.facebook.presto.spi.type.Decimals.isShortDecimal;
import static com.facebook.presto.spi.type.DoubleType.DOUBLE;
import static com.facebook.presto.spi.type.IntegerType.INTEGER;
import static com.facebook.presto.spi.type.RealType.REAL;
Expand All @@ -72,6 +76,8 @@
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Maps.immutableEntry;
import static com.google.common.hash.Hashing.murmur3_128;
import static java.lang.Double.parseDouble;
import static java.lang.Float.intBitsToFloat;
import static java.util.Collections.unmodifiableList;
import static java.util.Objects.requireNonNull;

Expand Down Expand Up @@ -165,8 +171,11 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab
Block rightBlock = nativeValueToBlock(prestoType, rightValue);
return prestoType.compareTo(leftBlock, 0, rightBlock, 0);
};
columnStatistics.setLowValue(lowValueCandidates.stream().min(comparator));
columnStatistics.setHighValue(highValueCandidates.stream().max(comparator));
Optional<Object> min = lowValueCandidates.stream().min(comparator);
Optional<Object> max = highValueCandidates.stream().max(comparator);
if (min.isPresent() && max.isPresent()) {
columnStatistics.setRange(createPrestoRange(prestoType, min.get(), max.get()));
}
columnStatistics.setDataSize(dataSize);

columnStatistics.setNullsFraction(nullsFraction);
Expand All @@ -192,6 +201,38 @@ private boolean isLowHighSupportedForType(Type type)
return false;
}

public static DoubleRange createPrestoRange(Type type, Object min, Object max)
{
return new DoubleRange(convertPrestoValueToStatsRepresentation(type, min), convertPrestoValueToStatsRepresentation(type, max));
}

private static double convertPrestoValueToStatsRepresentation(Type type, Object value)
{
if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) {
return (Long) value;
}
if (type.equals(DOUBLE)) {
return (Double) value;
}
if (type.equals(REAL)) {
return intBitsToFloat(((Long) value).intValue());
}
if (type instanceof DecimalType) {
DecimalType decimalType = (DecimalType) type;
if (isShortDecimal(decimalType)) {
return parseDouble(Decimals.toString((Long) value, decimalType.getScale()));
}
if (isLongDecimal(decimalType)) {
return parseDouble(Decimals.toString((Slice) value, decimalType.getScale()));
}
throw new IllegalArgumentException("Unexpected decimal type: " + decimalType);
}
if (type.equals(DATE)) {
return (Long) value;
}
throw new IllegalArgumentException("Unsupported type: " + type);
}

private OptionalDouble calculateRowsPerPartition(Map<String, PartitionStatistics> statisticsSample)
{
return statisticsSample.values().stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2799,8 +2799,8 @@ public void testInsertMultipleColumnsFromSameChannel()

assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar_1 = '2' AND p_varchar_2 = '2')", tableName),
"SELECT * FROM VALUES " +
"('c_bigint_1', null, 1.0E0, 0.0E0, null, 1, 1), " +
"('c_bigint_2', null, 1.0E0, 0.0E0, null, 1, 1), " +
"('c_bigint_1', null, 1.0E0, 0.0E0, null, '1', '1'), " +
"('c_bigint_2', null, 1.0E0, 0.0E0, null, '1', '1'), " +
"('p_varchar_1', 1.0E0, 1.0E0, 0.0E0, null, null, null), " +
"('p_varchar_2', 1.0E0, 1.0E0, 0.0E0, null, null, null), " +
"(null, null, null, null, 1.0E0, null, null)");
Expand All @@ -2813,8 +2813,8 @@ public void testInsertMultipleColumnsFromSameChannel()

assertQuery(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_varchar_1 = 'O' AND p_varchar_2 = 'O')", tableName),
"SELECT * FROM VALUES " +
"('c_bigint_1', null, 1.0E0, 0.0E0, null, 15008, 15008), " +
"('c_bigint_2', null, 1.0E0, 0.0E0, null, 15008, 15008), " +
"('c_bigint_1', null, 1.0E0, 0.0E0, null, '15008', '15008'), " +
"('c_bigint_2', null, 1.0E0, 0.0E0, null, '15008', '15008'), " +
"('p_varchar_1', 1.0E0, 1.0E0, 0.0E0, null, null, null), " +
"('p_varchar_2', 1.0E0, 1.0E0, 0.0E0, null, null, null), " +
"(null, null, null, null, 1.0E0, null, null)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.sql.planner.Symbol;
import com.facebook.presto.sql.planner.TypeProvider;
import com.facebook.presto.sql.planner.iterative.Lookup;
Expand All @@ -29,13 +28,9 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalDouble;

import static com.facebook.presto.cost.StatsUtil.toStatsRepresentation;
import static com.facebook.presto.cost.SymbolStatsEstimate.UNKNOWN_STATS;
import static com.facebook.presto.sql.planner.plan.Patterns.tableScan;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.POSITIVE_INFINITY;
import static java.util.Objects.requireNonNull;

public class TableScanStatsRule
Expand Down Expand Up @@ -68,9 +63,8 @@ protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsP

for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
Symbol symbol = entry.getKey();
Type symbolType = types.get(symbol);
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, session, symbolType)).orElse(UNKNOWN_STATS));
outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics)).orElse(UNKNOWN_STATS));
}

return Optional.of(PlanNodeStatsEstimate.builder()
Expand All @@ -79,24 +73,19 @@ protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsP
.build());
}

private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics, Session session, Type type)
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics)
{
double nullsFraction = columnStatistics.getNullsFraction().getValue();
double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction);
double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount;
return SymbolStatsEstimate.builder()
.setLowValue(asDouble(session, type, columnStatistics.getLowValue()).orElse(NEGATIVE_INFINITY))
.setHighValue(asDouble(session, type, columnStatistics.getHighValue()).orElse(POSITIVE_INFINITY))
.setNullsFraction(nullsFraction)
.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue())
.setAverageRowSize(averageRowSize)
.build();
}

private OptionalDouble asDouble(Session session, Type type, Optional<Object> optionalValue)
{
return optionalValue
.map(value -> toStatsRepresentation(metadata, session, type, value))
.orElseGet(OptionalDouble::empty);
SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder();
result.setNullsFraction(nullsFraction);
result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue());
result.setAverageRowSize(averageRowSize);
columnStatistics.getRange().ifPresent(range -> {
result.setLowValue(range.getMin());
result.setHighValue(range.getMax());
});
return result.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,26 @@
package com.facebook.presto.sql.rewrite;

import com.facebook.presto.Session;
import com.facebook.presto.metadata.FunctionRegistry;
import com.facebook.presto.metadata.Metadata;
import com.facebook.presto.metadata.QualifiedObjectName;
import com.facebook.presto.metadata.Signature;
import com.facebook.presto.metadata.TableHandle;
import com.facebook.presto.metadata.TableMetadata;
import com.facebook.presto.security.AccessControl;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.DoubleRange;
import com.facebook.presto.spi.statistics.Estimate;
import com.facebook.presto.spi.statistics.TableStatistics;
import com.facebook.presto.spi.type.BigintType;
import com.facebook.presto.spi.type.DecimalType;
import com.facebook.presto.spi.type.DoubleType;
import com.facebook.presto.spi.type.IntegerType;
import com.facebook.presto.spi.type.RealType;
import com.facebook.presto.spi.type.SmallintType;
import com.facebook.presto.spi.type.TinyintType;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.spi.type.VarcharType;
import com.facebook.presto.sql.InterpretedFunctionInvoker;
import com.facebook.presto.sql.QueryUtil;
import com.facebook.presto.sql.analyzer.QueryExplainer;
import com.facebook.presto.sql.analyzer.SemanticException;
Expand Down Expand Up @@ -63,13 +67,14 @@
import com.facebook.presto.sql.tree.TableSubquery;
import com.facebook.presto.sql.tree.Values;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;

import java.time.LocalDate;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.metadata.MetadataUtil.createQualifiedObjectName;
import static com.facebook.presto.spi.type.DateType.DATE;
import static com.facebook.presto.spi.type.StandardTypes.DOUBLE;
import static com.facebook.presto.spi.type.StandardTypes.VARCHAR;
import static com.facebook.presto.sql.QueryUtil.aliased;
Expand All @@ -80,7 +85,7 @@
import static com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Collections.singletonList;
import static java.lang.Math.round;
import static java.util.Objects.requireNonNull;

public class ShowStatsRewrite
Expand All @@ -91,7 +96,6 @@ public class ShowStatsRewrite

private static final Expression NULL_DOUBLE = new Cast(new NullLiteral(), DOUBLE);
private static final Expression NULL_VARCHAR = new Cast(new NullLiteral(), VARCHAR);
private static final int MAX_LOW_HIGH_LENGTH = 32;

@Override
public Statement rewrite(Session session, Metadata metadata, SqlParser parser, Optional<QueryExplainer> queryExplainer, Statement node, List<Expression> parameters, AccessControl accessControl)
Expand Down Expand Up @@ -300,12 +304,12 @@ private Row createColumnStatsRow(String columnName, Type type, ColumnStatistics
{
ImmutableList.Builder<Expression> rowValues = ImmutableList.builder();
rowValues.add(new StringLiteral(columnName));
rowValues.add(createStatisticValueOrNull(columnStatistics.getDataSize()));
rowValues.add(createStatisticValueOrNull(columnStatistics.getDistinctValuesCount()));
rowValues.add(createStatisticValueOrNull(columnStatistics.getNullsFraction()));
rowValues.add(createEstimateRepresentation(columnStatistics.getDataSize()));
rowValues.add(createEstimateRepresentation(columnStatistics.getDistinctValuesCount()));
rowValues.add(createEstimateRepresentation(columnStatistics.getNullsFraction()));
rowValues.add(NULL_DOUBLE);
rowValues.add(lowHighAsLiteral(type, columnStatistics.getLowValue()));
rowValues.add(lowHighAsLiteral(type, columnStatistics.getHighValue()));
rowValues.add(toStringLiteral(type, columnStatistics.getRange().map(DoubleRange::getMin)));
rowValues.add(toStringLiteral(type, columnStatistics.getRange().map(DoubleRange::getMax)));
return new Row(rowValues.build());
}

Expand All @@ -329,34 +333,40 @@ private static Row createTableStatsRow(TableStatistics tableStatistics)
rowValues.add(NULL_DOUBLE);
rowValues.add(NULL_DOUBLE);
rowValues.add(NULL_DOUBLE);
rowValues.add(createStatisticValueOrNull(tableStatistics.getRowCount()));
rowValues.add(createEstimateRepresentation(tableStatistics.getRowCount()));
rowValues.add(NULL_VARCHAR);
rowValues.add(NULL_VARCHAR);
return new Row(rowValues.build());
}

private Expression lowHighAsLiteral(Type valueType, Optional<Object> value)
private static Expression createEstimateRepresentation(Estimate estimate)
{
if (!value.isPresent()) {
return new Cast(new NullLiteral(), VARCHAR);
}
FunctionRegistry functionRegistry = metadata.getFunctionRegistry();
InterpretedFunctionInvoker functionInvoker = new InterpretedFunctionInvoker(functionRegistry);
Signature castSignature = functionRegistry.getCoercion(valueType, VarcharType.createUnboundedVarcharType());
Slice varcharValue = (Slice) functionInvoker.invoke(castSignature, session.toConnectorSession(), singletonList(value.get()));
String stringValue = varcharValue.toStringUtf8();
if (stringValue.length() > MAX_LOW_HIGH_LENGTH) {
stringValue = stringValue.substring(0, MAX_LOW_HIGH_LENGTH) + "...";
if (estimate.isUnknown()) {
return NULL_DOUBLE;
}
return new StringLiteral(stringValue);
return new DoubleLiteral(Double.toString(estimate.getValue()));
}

private static Expression createStatisticValueOrNull(Estimate estimate)
private static Expression toStringLiteral(Type type, Optional<Double> optionalValue)
{
if (estimate.isUnknown()) {
return NULL_DOUBLE;
return optionalValue.map(value -> toStringLiteral(type, value)).orElse(NULL_VARCHAR);
}

private static Expression toStringLiteral(Type type, double value)
{
if (type.equals(BigintType.BIGINT) || type.equals(IntegerType.INTEGER) || type.equals(SmallintType.SMALLINT) || type.equals(TinyintType.TINYINT)) {
return new StringLiteral(Long.toString(round(value)));
}
return new DoubleLiteral(Double.toString(estimate.getValue()));
if (type.equals(DoubleType.DOUBLE) || type instanceof DecimalType) {
return new StringLiteral(Double.toString(value));
}
if (type.equals(RealType.REAL)) {
return new StringLiteral(Float.toString((float) value));
}
if (type.equals(DATE)) {
return new StringLiteral(LocalDate.ofEpochDay(round(value)).toString());
}
throw new IllegalArgumentException("Unexpected type: " + type);
}
}
}
Loading

0 comments on commit ea3a814

Please sign in to comment.