Skip to content

Commit

Permalink
Disable ORC String and Date statistics which are broken
Browse files Browse the repository at this point in the history
The merge logic ORC String and Date statistics are broken so the statistics
for stripes and files in ORC can not be used for String and Date columns.
  • Loading branch information
dain committed Nov 5, 2014
1 parent dd70ef0 commit c46af91
Showing 1 changed file with 20 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public StripeStatistics apply(OrcProto.StripeStatistics type)

private static StripeStatistics toStripeStatistics(OrcProto.StripeStatistics stripeStatistics)
{
return new StripeStatistics(toColumnStatistics(stripeStatistics.getColStatsList()));
return new StripeStatistics(toColumnStatistics(stripeStatistics.getColStatsList(), false));
}

@Override
Expand All @@ -88,7 +88,7 @@ public Footer readFooter(InputStream inputStream)
footer.getRowIndexStride(),
toStripeInformation(footer.getStripesList()),
toType(footer.getTypesList()),
toColumnStatistics(footer.getStatisticsList()));
toColumnStatistics(footer.getStatisticsList(), false));
}

private static List<StripeInformation> toStripeInformation(List<OrcProto.StripeInformation> types)
Expand Down Expand Up @@ -184,21 +184,21 @@ private static RowGroupIndex toRowGroupIndex(RowIndexEntry rowIndexEntry)

positions.add(intPosition);
}
return new RowGroupIndex(positions.build(), toColumnStatistics(rowIndexEntry.getStatistics()));
return new RowGroupIndex(positions.build(), toColumnStatistics(rowIndexEntry.getStatistics(), true));
}

private static ColumnStatistics toColumnStatistics(OrcProto.ColumnStatistics statistics)
private static ColumnStatistics toColumnStatistics(OrcProto.ColumnStatistics statistics, boolean isRowGroup)
{
return new ColumnStatistics(
statistics.getNumberOfValues(),
toBooleanStatistics(statistics.getBucketStatistics()),
toIntegerStatistics(statistics.getIntStatistics()),
toDoubleStatistics(statistics.getDoubleStatistics()),
toStringStatistics(statistics.getStringStatistics()),
toDateStatistics(statistics.getDateStatistics()));
toStringStatistics(statistics.getStringStatistics(), isRowGroup),
toDateStatistics(statistics.getDateStatistics(), isRowGroup));
}

private static List<ColumnStatistics> toColumnStatistics(List<OrcProto.ColumnStatistics> columnStatistics)
private static List<ColumnStatistics> toColumnStatistics(List<OrcProto.ColumnStatistics> columnStatistics, final boolean isRowGroup)
{
if (columnStatistics == null) {
return ImmutableList.of();
Expand All @@ -208,7 +208,7 @@ private static List<ColumnStatistics> toColumnStatistics(List<OrcProto.ColumnSta
@Override
public ColumnStatistics apply(OrcProto.ColumnStatistics columnStatistics)
{
return toColumnStatistics(columnStatistics);
return toColumnStatistics(columnStatistics, isRowGroup);
}
}));
}
Expand Down Expand Up @@ -244,8 +244,13 @@ private static DoubleStatistics toDoubleStatistics(OrcProto.DoubleStatistics dou
doubleStatistics.hasMaximum() ? doubleStatistics.getMaximum() : null);
}

private static StringStatistics toStringStatistics(OrcProto.StringStatistics stringStatistics)
private static StringStatistics toStringStatistics(OrcProto.StringStatistics stringStatistics, boolean isRowGroup)
{
// TODO remove this when date statistics in ORC are fixed https://issues.apache.org/jira/browse/HIVE-8732
if (!isRowGroup) {
return null;
}

if (!stringStatistics.hasMinimum() && !stringStatistics.hasMaximum()) {
return null;
}
Expand All @@ -255,8 +260,13 @@ private static StringStatistics toStringStatistics(OrcProto.StringStatistics str
stringStatistics.hasMaximum() ? stringStatistics.getMaximum() : null);
}

private static DateStatistics toDateStatistics(OrcProto.DateStatistics dateStatistics)
private static DateStatistics toDateStatistics(OrcProto.DateStatistics dateStatistics, boolean isRowGroup)
{
// TODO remove this when date statistics in ORC are fixed https://issues.apache.org/jira/browse/HIVE-8732
if (!isRowGroup) {
return null;
}

if (!dateStatistics.hasMinimum() && !dateStatistics.hasMaximum()) {
return null;
}
Expand Down

0 comments on commit c46af91

Please sign in to comment.