From 6d0c9ac576bbd8a891ca5321db0908d7bad53528 Mon Sep 17 00:00:00 2001 From: Raunaq Morarka Date: Fri, 28 Jun 2024 11:07:20 +0530 Subject: [PATCH] Convert BlockMetadata into a record --- .../io/trino/parquet/BloomFilterStore.java | 4 +- .../trino/parquet/ParquetWriteValidation.java | 8 +- .../trino/parquet/metadata/BlockMetadata.java | 92 +------------------ .../parquet/predicate/PredicateUtils.java | 12 +-- .../trino/parquet/reader/MetadataReader.java | 10 +- .../trino/parquet/reader/ParquetReader.java | 12 +-- .../parquet/reader/TrinoColumnIndexStore.java | 4 +- .../trino/parquet/writer/ParquetWriter.java | 2 +- .../parquet/reader/TestParquetReader.java | 8 +- .../parquet/writer/TestParquetWriter.java | 24 ++--- .../plugin/deltalake/DeltaLakeWriter.java | 2 +- .../hive/parquet/TestBloomFilterStore.java | 2 +- .../iceberg/IcebergPageSourceProvider.java | 2 +- .../plugin/iceberg/util/ParquetUtil.java | 4 +- .../plugin/iceberg/IcebergTestUtils.java | 2 +- 15 files changed, 49 insertions(+), 139 deletions(-) diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/BloomFilterStore.java b/lib/trino-parquet/src/main/java/io/trino/parquet/BloomFilterStore.java index d6b5861d833e..1afc665c1494 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/BloomFilterStore.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/BloomFilterStore.java @@ -56,7 +56,7 @@ public BloomFilterStore(ParquetDataSource dataSource, BlockMetadata block, Set bloomFilterOffsetBuilder = ImmutableMap.builder(); - for (ColumnChunkMetadata column : block.getColumns()) { + for (ColumnChunkMetadata column : block.columns()) { ColumnPath path = column.getPath(); if (hasBloomFilter(column) && columnsFiltered.contains(path)) { bloomFilterOffsetBuilder.put(path, column.getBloomFilterOffset()); @@ -106,7 +106,7 @@ public static Optional getBloomFilterStore( return Optional.empty(); } - boolean hasBloomFilter = blockMetadata.getColumns().stream().anyMatch(BloomFilterStore::hasBloomFilter); + boolean hasBloomFilter = blockMetadata.columns().stream().anyMatch(BloomFilterStore::hasBloomFilter); if (!hasBloomFilter) { return Optional.empty(); } diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetWriteValidation.java b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetWriteValidation.java index 558558bacc14..304ff69be662 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetWriteValidation.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetWriteValidation.java @@ -138,14 +138,14 @@ public void validateBlocksMetadata(ParquetDataSourceId dataSourceId, List columnChunkMetaData = block.getColumns(); + List columnChunkMetaData = block.columns(); validateParquet( columnChunkMetaData.size() == rowGroup.getColumnsSize(), dataSourceId, @@ -358,7 +358,7 @@ public WriteChecksum build() public void validateRowGroupStatistics(ParquetDataSourceId dataSourceId, BlockMetadata blockMetaData, List actualColumnStatistics) throws ParquetCorruptionException { - List columnChunks = blockMetaData.getColumns(); + List columnChunks = blockMetaData.columns(); checkArgument( columnChunks.size() == actualColumnStatistics.size(), "Column chunk metadata count %s did not match column fields count %s", diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/metadata/BlockMetadata.java b/lib/trino-parquet/src/main/java/io/trino/parquet/metadata/BlockMetadata.java index 710543564d3c..43defc21b834 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/metadata/BlockMetadata.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/metadata/BlockMetadata.java @@ -13,100 +13,12 @@ */ package io.trino.parquet.metadata; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; -public class BlockMetadata +public record BlockMetadata(long rowCount, List columns) { - private final List columns = new ArrayList<>(); - private long rowCount; - private long totalByteSize; - private String path; - private int ordinal; - private long rowIndexOffset = -1; - - public void setPath(String path) - { - this.path = path; - } - - public String getPath() - { - return path; - } - - public long getRowCount() - { - return rowCount; - } - - public void setRowCount(long rowCount) - { - this.rowCount = rowCount; - } - - public long getRowIndexOffset() - { - return rowIndexOffset; - } - - public void setRowIndexOffset(long rowIndexOffset) - { - this.rowIndexOffset = rowIndexOffset; - } - - public long getTotalByteSize() - { - return totalByteSize; - } - - public void setTotalByteSize(long totalByteSize) - { - this.totalByteSize = totalByteSize; - } - - public void addColumn(ColumnChunkMetadata column) - { - columns.add(column); - } - - public List getColumns() - { - return Collections.unmodifiableList(columns); - } - public long getStartingPos() { - return getColumns().getFirst().getStartingPos(); - } - - @Override - public String toString() - { - String rowIndexOffsetString = ""; - if (rowIndexOffset != -1) { - rowIndexOffsetString = ", rowIndexOffset = " + rowIndexOffset; - } - return "BlockMetaData{" + rowCount + ", " + totalByteSize + rowIndexOffsetString + " " + columns + "}"; - } - - public long getCompressedSize() - { - long totalSize = 0; - for (ColumnChunkMetadata col : getColumns()) { - totalSize += col.getTotalSize(); - } - return totalSize; - } - - public int getOrdinal() - { - return ordinal; - } - - public void setOrdinal(int ordinal) - { - this.ordinal = ordinal; + return columns().getFirst().getStartingPos(); } } diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/PredicateUtils.java b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/PredicateUtils.java index 38aaa76567b9..2f3b72a4d49a 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/PredicateUtils.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/PredicateUtils.java @@ -141,7 +141,7 @@ public static boolean predicateMatches( int domainCompactionThreshold) throws IOException { - if (block.getRowCount() == 0) { + if (block.rowCount() == 0) { return false; } Map> columnStatistics = getStatistics(block, descriptorsByPath); @@ -192,7 +192,7 @@ public static List getFilteredRowGroups( long fileRowCount = 0; ImmutableList.Builder rowGroupInfoBuilder = ImmutableList.builder(); for (BlockMetadata block : blocksMetaData) { - long blockStart = block.getColumns().getFirst().getStartingPos(); + long blockStart = block.getStartingPos(); boolean splitContainsBlock = splitStart <= blockStart && blockStart < splitStart + splitLength; if (splitContainsBlock) { for (int i = 0; i < parquetTupleDomains.size(); i++) { @@ -215,7 +215,7 @@ public static List getFilteredRowGroups( } } } - fileRowCount += block.getRowCount(); + fileRowCount += block.rowCount(); } return rowGroupInfoBuilder.build(); } @@ -223,7 +223,7 @@ public static List getFilteredRowGroups( private static Map> getStatistics(BlockMetadata blockMetadata, Map, ColumnDescriptor> descriptorsByPath) { ImmutableMap.Builder> statistics = ImmutableMap.builder(); - for (ColumnChunkMetadata columnMetaData : blockMetadata.getColumns()) { + for (ColumnChunkMetadata columnMetaData : blockMetadata.columns()) { Statistics columnStatistics = columnMetaData.getStatistics(); if (columnStatistics != null) { ColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); @@ -238,7 +238,7 @@ private static Map> getStatistics(BlockMetadata private static Map getColumnValueCounts(BlockMetadata blockMetadata, Map, ColumnDescriptor> descriptorsByPath) { ImmutableMap.Builder columnValueCounts = ImmutableMap.builder(); - for (ColumnChunkMetadata columnMetaData : blockMetadata.getColumns()) { + for (ColumnChunkMetadata columnMetaData : blockMetadata.columns()) { ColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { columnValueCounts.put(descriptor, columnMetaData.getValueCount()); @@ -256,7 +256,7 @@ private static boolean dictionaryPredicatesMatch( Optional columnIndexStore) throws IOException { - for (ColumnChunkMetadata columnMetaData : blockMetadata.getColumns()) { + for (ColumnChunkMetadata columnMetaData : blockMetadata.columns()) { ColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor == null || !candidateColumns.contains(descriptor)) { continue; diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/MetadataReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/MetadataReader.java index 86c9a50be1c4..fe0635646f98 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/MetadataReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/MetadataReader.java @@ -13,6 +13,7 @@ */ package io.trino.parquet.reader; +import com.google.common.collect.ImmutableList; import io.airlift.log.Logger; import io.airlift.slice.Slice; import io.airlift.slice.Slices; @@ -134,12 +135,10 @@ public static ParquetMetadata createParquetMetadata(FileMetaData fileMetaData, P List rowGroups = fileMetaData.getRow_groups(); if (rowGroups != null) { for (RowGroup rowGroup : rowGroups) { - BlockMetadata blockMetaData = new BlockMetadata(); - blockMetaData.setRowCount(rowGroup.getNum_rows()); - blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size()); List columns = rowGroup.getColumns(); validateParquet(!columns.isEmpty(), dataSourceId, "No columns in row group: %s", rowGroup); String filePath = columns.get(0).getFile_path(); + ImmutableList.Builder columnMetadataBuilder = ImmutableList.builderWithExpectedSize(columns.size()); for (ColumnChunk columnChunk : columns) { validateParquet( (filePath == null && columnChunk.getFile_path() == null) @@ -167,10 +166,9 @@ public static ParquetMetadata createParquetMetadata(FileMetaData fileMetaData, P column.setColumnIndexReference(toColumnIndexReference(columnChunk)); column.setOffsetIndexReference(toOffsetIndexReference(columnChunk)); column.setBloomFilterOffset(metaData.bloom_filter_offset); - blockMetaData.addColumn(column); + columnMetadataBuilder.add(column); } - blockMetaData.setPath(filePath); - blocks.add(blockMetaData); + blocks.add(new BlockMetadata(rowGroup.getNum_rows(), columnMetadataBuilder.build())); } } diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java index 4670476ce2e4..13c904cf585f 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java @@ -183,7 +183,7 @@ public ParquetReader( int columnId = field.getId(); ColumnChunkMetadata chunkMetadata = getColumnChunkMetaData(metadata, field.getDescriptor()); ColumnPath columnPath = chunkMetadata.getPath(); - long rowGroupRowCount = metadata.getRowCount(); + long rowGroupRowCount = metadata.rowCount(); long startingPosition = chunkMetadata.getStartingPos(); long totalLength = chunkMetadata.getTotalSize(); long totalDataSize = 0; @@ -299,7 +299,7 @@ private boolean advanceToNextRowGroup() RowGroupInfo rowGroupInfo = rowGroups.get(currentRowGroup); currentBlockMetadata = rowGroupInfo.blockMetaData(); firstRowIndexInGroup = rowGroupInfo.fileRowOffset(); - currentGroupRowCount = currentBlockMetadata.getRowCount(); + currentGroupRowCount = currentBlockMetadata.rowCount(); FilteredRowRanges currentGroupRowRanges = blockRowRanges[currentRowGroup]; log.debug("advanceToNextRowGroup dataSource %s, currentRowGroup %d, rowRanges %s, currentBlockMetadata %s", dataSource.getId(), currentRowGroup, currentGroupRowRanges, currentBlockMetadata); if (currentGroupRowRanges != null) { @@ -448,12 +448,12 @@ private ColumnChunk readPrimitive(PrimitiveField field) int fieldId = field.getId(); ColumnReader columnReader = columnReaders.get(fieldId); if (!columnReader.hasPageReader()) { - validateParquet(currentBlockMetadata.getRowCount() > 0, dataSource.getId(), "Row group has 0 rows"); + validateParquet(currentBlockMetadata.rowCount() > 0, dataSource.getId(), "Row group has 0 rows"); ColumnChunkMetadata metadata = getColumnChunkMetaData(currentBlockMetadata, columnDescriptor); FilteredRowRanges rowRanges = blockRowRanges[currentRowGroup]; OffsetIndex offsetIndex = null; if (rowRanges != null) { - offsetIndex = getFilteredOffsetIndex(rowRanges, currentRowGroup, currentBlockMetadata.getRowCount(), metadata.getPath()); + offsetIndex = getFilteredOffsetIndex(rowRanges, currentRowGroup, currentBlockMetadata.rowCount(), metadata.getPath()); } ChunkedInputStream columnChunkInputStream = chunkReaders.get(new ChunkKey(fieldId, currentRowGroup)); columnReader.setPageReader( @@ -493,7 +493,7 @@ public Metrics getMetrics() private ColumnChunkMetadata getColumnChunkMetaData(BlockMetadata blockMetaData, ColumnDescriptor columnDescriptor) throws IOException { - for (ColumnChunkMetadata metadata : blockMetaData.getColumns()) { + for (ColumnChunkMetadata metadata : blockMetaData.columns()) { // Column paths for nested structures have common root, so we compare in reverse to find mismatch sooner if (arrayEqualsReversed(metadata.getPath().toArray(), columnDescriptor.getPath())) { return metadata; @@ -585,7 +585,7 @@ private static FilteredRowRanges[] calculateFilteredRowRanges( continue; } BlockMetadata metadata = rowGroupInfo.blockMetaData(); - long rowGroupRowCount = metadata.getRowCount(); + long rowGroupRowCount = metadata.rowCount(); FilteredRowRanges rowRanges = new FilteredRowRanges(ColumnIndexFilter.calculateRowRanges( FilterCompat.get(filter.get()), rowGroupColumnIndexStore.get(), diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TrinoColumnIndexStore.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TrinoColumnIndexStore.java index ca5eedba8924..fa9b7ae142d5 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TrinoColumnIndexStore.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/TrinoColumnIndexStore.java @@ -84,7 +84,7 @@ public TrinoColumnIndexStore( ImmutableList.Builder columnIndexBuilder = ImmutableList.builderWithExpectedSize(columnsFiltered.size()); ImmutableList.Builder offsetIndexBuilder = ImmutableList.builderWithExpectedSize(columnsRead.size()); - for (ColumnChunkMetadata column : block.getColumns()) { + for (ColumnChunkMetadata column : block.columns()) { ColumnPath path = column.getPath(); if (column.getColumnIndexReference() != null && columnsFiltered.contains(path)) { columnIndexBuilder.add(new ColumnIndexMetadata( @@ -149,7 +149,7 @@ public static Optional getColumnIndexStore( } boolean hasColumnIndex = false; - for (ColumnChunkMetadata column : blockMetadata.getColumns()) { + for (ColumnChunkMetadata column : blockMetadata.columns()) { if (column.getColumnIndexReference() != null && column.getOffsetIndexReference() != null) { hasColumnIndex = true; break; diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java index b1696f656755..35d91be42ef7 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriter.java @@ -274,7 +274,7 @@ private ParquetReader createParquetReader(ParquetDataSource input, ParquetMetada ImmutableList.Builder rowGroupInfoBuilder = ImmutableList.builder(); for (BlockMetadata block : parquetMetadata.getBlocks()) { rowGroupInfoBuilder.add(new RowGroupInfo(block, nextStart, Optional.empty())); - nextStart += block.getRowCount(); + nextStart += block.rowCount(); } return new ParquetReader( Optional.ofNullable(fileMetaData.getCreatedBy()), diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java index 66ba1fc04f9b..2ef475a7644f 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestParquetReader.java @@ -83,9 +83,9 @@ public void testColumnReaderMemoryUsage() assertThat(parquetMetadata.getBlocks().size()).isGreaterThan(1); // Verify file has only non-dictionary encodings as dictionary memory usage is already tested in TestFlatColumnReader#testMemoryUsage parquetMetadata.getBlocks().forEach(block -> { - block.getColumns() + block.columns() .forEach(columnChunkMetaData -> assertThat(columnChunkMetaData.getEncodingStats().hasDictionaryEncodedPages()).isFalse()); - assertThat(block.getRowCount()).isEqualTo(100); + assertThat(block.rowCount()).isEqualTo(100); }); AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext(); @@ -105,7 +105,7 @@ public void testColumnReaderMemoryUsage() assertThat(currentMemoryUsage).isGreaterThan(initialMemoryUsage); // Memory usage does not change until next row group (1 page per row-group) - long rowGroupRowCount = parquetMetadata.getBlocks().get(0).getRowCount(); + long rowGroupRowCount = parquetMetadata.getBlocks().get(0).rowCount(); int rowsRead = page.getPositionCount(); while (rowsRead < rowGroupRowCount) { rowsRead += reader.nextPage().getPositionCount(); @@ -153,7 +153,7 @@ public void testEmptyRowRangesWithColumnIndex() assertThat(metrics).containsKey(COLUMN_INDEX_ROWS_FILTERED); // Column index should filter at least the first row group assertThat(((Count) metrics.get(COLUMN_INDEX_ROWS_FILTERED)).getTotal()) - .isGreaterThanOrEqualTo(parquetMetadata.getBlocks().get(0).getRowCount()); + .isGreaterThanOrEqualTo(parquetMetadata.getBlocks().get(0).rowCount()); } } diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestParquetWriter.java b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestParquetWriter.java index e5a4fc726eec..846080c3297a 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestParquetWriter.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/writer/TestParquetWriter.java @@ -129,9 +129,9 @@ public void testWrittenPageSize() new ParquetReaderOptions()); ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); assertThat(parquetMetadata.getBlocks().size()).isEqualTo(1); - assertThat(parquetMetadata.getBlocks().get(0).getRowCount()).isEqualTo(100 * 1000); + assertThat(parquetMetadata.getBlocks().get(0).rowCount()).isEqualTo(100 * 1000); - ColumnChunkMetadata chunkMetaData = parquetMetadata.getBlocks().get(0).getColumns().get(0); + ColumnChunkMetadata chunkMetaData = parquetMetadata.getBlocks().get(0).columns().get(0); DiskRange range = new DiskRange(chunkMetaData.getStartingPos(), chunkMetaData.getTotalSize()); Map chunkReader = dataSource.planRead(ImmutableListMultimap.of(0, range), newSimpleAggregatedMemoryContext()); @@ -178,10 +178,10 @@ public void testWrittenPageValueCount() new ParquetReaderOptions()); ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); assertThat(parquetMetadata.getBlocks().size()).isEqualTo(1); - assertThat(parquetMetadata.getBlocks().get(0).getRowCount()).isEqualTo(100 * 1000); + assertThat(parquetMetadata.getBlocks().get(0).rowCount()).isEqualTo(100 * 1000); - ColumnChunkMetadata columnAMetaData = parquetMetadata.getBlocks().get(0).getColumns().get(0); - ColumnChunkMetadata columnBMetaData = parquetMetadata.getBlocks().get(0).getColumns().get(1); + ColumnChunkMetadata columnAMetaData = parquetMetadata.getBlocks().get(0).columns().get(0); + ColumnChunkMetadata columnBMetaData = parquetMetadata.getBlocks().get(0).columns().get(1); Map chunkReader = dataSource.planRead( ImmutableListMultimap.of( 0, new DiskRange(columnAMetaData.getStartingPos(), columnAMetaData.getTotalSize()), @@ -260,12 +260,12 @@ public void testLargeStringTruncation() ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); BlockMetadata blockMetaData = getOnlyElement(parquetMetadata.getBlocks()); - ColumnChunkMetadata chunkMetaData = blockMetaData.getColumns().get(0); + ColumnChunkMetadata chunkMetaData = blockMetaData.columns().get(0); assertThat(chunkMetaData.getStatistics().getMinBytes()).isEqualTo(minA.getBytes()); Slice truncatedMax = Slices.utf8Slice("y".repeat(1023) + "z"); assertThat(chunkMetaData.getStatistics().getMaxBytes()).isEqualTo(truncatedMax.getBytes()); - chunkMetaData = blockMetaData.getColumns().get(1); + chunkMetaData = blockMetaData.columns().get(1); Slice truncatedMin = varcharToVarcharSaturatedFloorCast(1024, minB); assertThat(chunkMetaData.getStatistics().getMinBytes()).isEqualTo(truncatedMin.getBytes()); truncatedMax = Slices.utf8Slice(maxCodePoint + "d".repeat(1016) + "e"); @@ -294,7 +294,7 @@ public void testColumnReordering() assertThat(parquetMetadata.getBlocks().size()).isGreaterThanOrEqualTo(10); for (BlockMetadata blockMetaData : parquetMetadata.getBlocks()) { // Verify that the columns are stored in the same order as the metadata - List offsets = blockMetaData.getColumns().stream() + List offsets = blockMetaData.columns().stream() .map(ColumnChunkMetadata::getFirstDataPageOffset) .collect(toImmutableList()); assertThat(offsets).isSorted(); @@ -350,7 +350,7 @@ public void testDictionaryPageOffset() ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); assertThat(parquetMetadata.getBlocks().size()).isGreaterThanOrEqualTo(1); for (BlockMetadata blockMetaData : parquetMetadata.getBlocks()) { - ColumnChunkMetadata chunkMetaData = getOnlyElement(blockMetaData.getColumns()); + ColumnChunkMetadata chunkMetaData = getOnlyElement(blockMetaData.columns()); assertThat(chunkMetaData.getDictionaryPageOffset()).isGreaterThan(0); int dictionaryPageSize = toIntExact(chunkMetaData.getFirstDataPageOffset() - chunkMetaData.getDictionaryPageOffset()); assertThat(dictionaryPageSize).isGreaterThan(0); @@ -397,9 +397,9 @@ public void testWriteBloomFilters(Type type, List data) // Check that bloom filters are right after each other int bloomFilterSize = Integer.highestOneBit(BlockSplitBloomFilter.optimalNumOfBits(BLOOM_FILTER_EXPECTED_ENTRIES, DEFAULT_BLOOM_FILTER_FPP) / 8) << 1; for (BlockMetadata block : parquetMetadata.getBlocks()) { - for (int i = 1; i < block.getColumns().size(); i++) { - assertThat(block.getColumns().get(i - 1).getBloomFilterOffset() + bloomFilterSize + 17) // + 17 bytes for Bloom filter metadata - .isEqualTo(block.getColumns().get(i).getBloomFilterOffset()); + for (int i = 1; i < block.columns().size(); i++) { + assertThat(block.columns().get(i - 1).getBloomFilterOffset() + bloomFilterSize + 17) // + 17 bytes for Bloom filter metadata + .isEqualTo(block.columns().get(i).getBloomFilterOffset()); } } int rowGroupCount = parquetMetadata.getBlocks().size(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java index 1f14089203ff..c20412f8040c 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java @@ -201,7 +201,7 @@ private static DeltaLakeJsonFileStatistics readStatistics(FileMetaData fileMetaD ImmutableMultimap.Builder metadataForColumn = ImmutableMultimap.builder(); for (BlockMetadata blockMetaData : parquetMetadata.getBlocks()) { - for (ColumnChunkMetadata columnChunkMetaData : blockMetaData.getColumns()) { + for (ColumnChunkMetadata columnChunkMetaData : blockMetaData.columns()) { if (columnChunkMetaData.getPath().size() != 1) { continue; // Only base column stats are supported } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestBloomFilterStore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestBloomFilterStore.java index 7e8a06416fc6..10e1e3378366 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestBloomFilterStore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestBloomFilterStore.java @@ -309,7 +309,7 @@ private static BloomFilterStore generateBloomFilterStore(ParquetTester.TempFile TrinoParquetDataSource dataSource = new TrinoParquetDataSource(inputFile, new ParquetReaderOptions(), new FileFormatDataSourceStats()); ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, Optional.empty()); - ColumnChunkMetadata columnChunkMetaData = getOnlyElement(getOnlyElement(parquetMetadata.getBlocks()).getColumns()); + ColumnChunkMetadata columnChunkMetaData = getOnlyElement(getOnlyElement(parquetMetadata.getBlocks()).columns()); return new BloomFilterStore(dataSource, getOnlyElement(parquetMetadata.getBlocks()), Set.of(columnChunkMetaData.getPath())); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index 4e260541063a..1b114c443a30 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -953,7 +953,7 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( if (!rowGroups.isEmpty()) { startRowPosition = Optional.of(rowGroups.getFirst().fileRowOffset()); RowGroupInfo lastRowGroup = rowGroups.getLast(); - endRowPosition = Optional.of(lastRowGroup.fileRowOffset() + lastRowGroup.blockMetaData().getRowCount()); + endRowPosition = Optional.of(lastRowGroup.fileRowOffset() + lastRowGroup.blockMetaData().rowCount()); } MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/ParquetUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/ParquetUtil.java index 8515b3fcfe46..0a676ca339ca 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/ParquetUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/ParquetUtil.java @@ -97,8 +97,8 @@ public static Metrics footerMetrics( List blocks = metadata.getBlocks(); for (BlockMetadata block : blocks) { - rowCount += block.getRowCount(); - for (ColumnChunkMetadata column : block.getColumns()) { + rowCount += block.rowCount(); + for (ColumnChunkMetadata column : block.columns()) { Integer fieldId = fileSchema.aliasToId(column.getPath().toDotString()); if (fieldId == null) { // fileSchema may contain a subset of columns present in the file diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java index f5e16bf32e29..a0955ebad79e 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java @@ -140,7 +140,7 @@ public static boolean checkParquetFileSorting(TrinoInputFile inputFile, String s Comparable previousMax = null; verify(parquetMetadata.getBlocks().size() > 1, "Test must produce at least two row groups"); for (BlockMetadata blockMetaData : parquetMetadata.getBlocks()) { - ColumnChunkMetadata columnMetadata = blockMetaData.getColumns().stream() + ColumnChunkMetadata columnMetadata = blockMetaData.columns().stream() .filter(column -> getOnlyElement(column.getPath().iterator()).equalsIgnoreCase(sortColumnName)) .collect(onlyElement()); if (previousMax != null) {