Skip to content

Commit

Permalink
Core: Validate transforms while building partition type (apache#2992)
Browse files Browse the repository at this point in the history
  • Loading branch information
aokolnychyi authored Aug 18, 2021
1 parent bde2d89 commit 56a2f3b
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
19 changes: 19 additions & 0 deletions core/src/main/java/org/apache/iceberg/Partitioning.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.iceberg.transforms.PartitionSpecVisitor;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.transforms.UnknownTransform;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StructType;

Expand Down Expand Up @@ -199,6 +200,11 @@ public Void alwaysNull(int fieldId, String sourceName, int sourceId) {
* @return the constructed common partition type
*/
public static StructType partitionType(Table table) {
// we currently don't know the output type of unknown transforms
List<Transform<?, ?>> unknownTransforms = collectUnknownTransforms(table);
ValidationException.check(unknownTransforms.isEmpty(),
"Cannot build table partition type, unknown transforms: %s", unknownTransforms);

if (table.specs().size() == 1) {
return table.spec().partitionType();
}
Expand Down Expand Up @@ -237,6 +243,19 @@ public static StructType partitionType(Table table) {
return StructType.of(sortedStructFields);
}

private static List<Transform<?, ?>> collectUnknownTransforms(Table table) {
List<Transform<?, ?>> unknownTransforms = Lists.newArrayList();

table.specs().values().forEach(spec -> {
spec.fields().stream()
.map(PartitionField::transform)
.filter(transform -> transform instanceof UnknownTransform)
.forEach(unknownTransforms::add);
});

return unknownTransforms;
}

private static boolean equivalentIgnoringNames(PartitionField field, PartitionField anotherField) {
return field.fieldId() == anotherField.fieldId() &&
field.sourceId() == anotherField.sourceId() &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -254,6 +261,32 @@ public void testEntriesMetadataTable() throws ParseException {
}
}

@Test
public void testMetadataTablesWithUnknownTransforms() {
sql("CREATE TABLE %s (id bigint NOT NULL, category string, data string) USING iceberg", tableName);
initTable();

sql("INSERT INTO TABLE %s VALUES (1, 'a1', 'b1')", tableName);

Table table = validationCatalog.loadTable(tableIdent);

PartitionSpec unknownSpec = PartitionSpecParser.fromJson(table.schema(),
"{ \"spec-id\": 1, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");

// replace the table spec to include an unknown transform
TableOperations ops = ((HasTableOperations) table).operations();
TableMetadata base = ops.current();
ops.commit(base, base.updatePartitionSpec(unknownSpec));

sql("REFRESH TABLE %s", tableName);

for (MetadataTableType tableType : Arrays.asList(FILES, ALL_DATA_FILES, ENTRIES, ALL_ENTRIES)) {
AssertHelpers.assertThrows("Should complain about the partition type",
ValidationException.class, "Cannot build table partition type, unknown transforms",
() -> loadMetadataTable(tableType));
}
}

private void assertPartitions(List<Object[]> expectedPartitions, String expectedTypeAsString,
MetadataTableType tableType) throws ParseException {
Dataset<Row> df = loadMetadataTable(tableType);
Expand Down

0 comments on commit 56a2f3b

Please sign in to comment.