Skip to content

Commit

Permalink
Fix failure from reading parquet column index for INT96
Browse files Browse the repository at this point in the history
org.apache.parquet.format.converter.ParquetMetadataConverter#fromParquetColumnIndex
returns null if the parquet primitive type for a column does not support min/max stats.
Fixed query failure by avoiding reading column index for such cases.
  • Loading branch information
raunaqmorarka committed Nov 24, 2023
1 parent ee27de4 commit 08baad3
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.ColumnOrder;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
Expand Down Expand Up @@ -207,6 +208,10 @@ public boolean matches(Map<ColumnDescriptor, Long> valueCounts, ColumnIndexStore
continue;
}

// ParquetMetadataConverter#fromParquetColumnIndex returns null if the parquet primitive type does not support min/max stats
if (!isColumnIndexStatsSupported(column.getPrimitiveType())) {
continue;
}
ColumnIndex columnIndex = columnIndexStore.getColumnIndex(ColumnPath.get(column.getPath()));
if (columnIndex == null) {
continue;
Expand Down Expand Up @@ -685,6 +690,11 @@ private FilterPredicate convertToParquetFilter(DateTimeZone timeZone)
continue;
}

// ParquetMetadataConverter#fromParquetColumnIndex returns null if the parquet primitive type does not support min/max stats
if (!isColumnIndexStatsSupported(column.getPrimitiveType())) {
continue;
}

FilterPredicate columnFilter = FilterApi.userDefined(
new TrinoIntColumn(ColumnPath.get(column.getPath())),
new DomainUserDefinedPredicate<>(column, domain, timeZone));
Expand Down Expand Up @@ -808,4 +818,10 @@ private static final class TrinoIntColumn
super(columnPath, Integer.class);
}
}

// Copy of org.apache.parquet.format.converter.ParquetMetadataConverter#isMinMaxStatsSupported
private static boolean isColumnIndexStatsSupported(PrimitiveType type)
{
return type.columnOrder().getColumnOrderName() == ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,30 @@ public void testFilteringOnColumnNameWithDot()
assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testUnsupportedColumnIndex()
throws URISyntaxException
{
String tableName = "test_unsupported_column_index_" + randomNameSuffix();

// Test for https://github.com/trinodb/trino/issues/16801
File parquetFile = new File(Resources.getResource("parquet_page_skipping/unsupported_column_index").toURI());
assertUpdate(format(
"CREATE TABLE %s (stime timestamp(3), btime timestamp(3), detail varchar) WITH (format = 'PARQUET', external_location = '%s')",
tableName,
parquetFile.getAbsolutePath()));

assertQuery(
"SELECT * FROM " + tableName + " WHERE btime >= timestamp '2023-03-27 13:30:00'",
"VALUES ('2023-03-31 18:00:00.000', '2023-03-31 18:00:00.000', 'record_1')");

assertQuery(
"SELECT * FROM " + tableName + " WHERE detail = 'record_2'",
"VALUES ('2023-03-31 18:00:00.000', null, 'record_2')");

assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testPageSkipping()
{
Expand Down
Binary file not shown.

0 comments on commit 08baad3

Please sign in to comment.