diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 9bcf691f5a03..44f65ff56a54 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -54,6 +54,7 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; + private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; private static final Map MIN_FORMAT_VERSIONS = ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3); @@ -586,16 +587,37 @@ private List reassignIds(List columns, TypeUtil.GetID * @param formatVersion table format version */ public static void checkCompatibility(Schema schema, int formatVersion) { - // check the type in each field + // accumulate errors as a treemap to keep them in a reasonable order + Map problems = Maps.newTreeMap(); + + // check each field's type and defaults for (NestedField field : schema.lazyIdToField().values()) { Integer minFormatVersion = MIN_FORMAT_VERSIONS.get(field.type().typeId()); - Preconditions.checkState( - minFormatVersion == null || formatVersion >= minFormatVersion, - "Invalid type in v%s schema: %s %s is not supported until v%s", - formatVersion, - schema.findColumnName(field.fieldId()), - field.type(), - minFormatVersion); + if (minFormatVersion != null && formatVersion < minFormatVersion) { + problems.put( + field.fieldId(), + String.format( + "Invalid type for %s: %s is not supported until v%s", + schema.findColumnName(field.fieldId()), field.type(), minFormatVersion)); + } + + if (field.initialDefault() != null && formatVersion < DEFAULT_VALUES_MIN_FORMAT_VERSION) { + problems.put( + field.fieldId(), + String.format( + "Invalid initial default for %s: non-null default (%s) is not supported until v%s", + schema.findColumnName(field.fieldId()), + field.initialDefault(), + DEFAULT_VALUES_MIN_FORMAT_VERSION)); + } + } + + // throw if there are any compatibility problems + if (!problems.isEmpty()) { + throw new IllegalStateException( + String.format( + "Invalid schema for v%s:\n- %s", + formatVersion, Joiner.on("\n- ").join(problems.values()))); } } } diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java new file mode 100644 index 000000000000..fec7343c5cbc --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestSchema { + private static final Schema TS_NANO_CASES = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "ts", Types.TimestampNanoType.withZone()), + Types.NestedField.optional( + 3, "arr", Types.ListType.ofRequired(4, Types.TimestampNanoType.withoutZone())), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional(6, "inner_ts", Types.TimestampNanoType.withZone()), + Types.NestedField.required(7, "data", Types.StringType.get()))), + Types.NestedField.optional( + 8, + "struct_arr", + Types.StructType.of( + Types.NestedField.optional(9, "ts", Types.TimestampNanoType.withoutZone())))); + + private static final Schema INITIAL_DEFAULT_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.required("has_default") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("--") + .withWriteDefault("--") + .build()); + + private static final Schema WRITE_DEFAULT_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.required("has_default") + .withId(2) + .ofType(Types.StringType.get()) + .withWriteDefault("--") + .build()); + + @ParameterizedTest + @ValueSource(ints = {1, 2}) + public void testUnsupportedTimestampNano(int formatVersion) { + assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid type for ts: timestamptz_ns is not supported until v3\n" + + "- Invalid type for arr.element: timestamp_ns is not supported until v3\n" + + "- Invalid type for struct.inner_ts: timestamptz_ns is not supported until v3\n" + + "- Invalid type for struct_arr.ts: timestamp_ns is not supported until v3", + formatVersion); + } + + @Test + public void testSupportedTimestampNano() { + assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)).doesNotThrowAnyException(); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2}) + public void testUnsupportedInitialDefault(int formatVersion) { + assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid initial default for has_default: " + + "non-null default (--) is not supported until v3", + formatVersion); + } + + @Test + public void testSupportedInitialDefault() { + assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3)) + .doesNotThrowAnyException(); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void testSupportedWriteDefault(int formatVersion) { + // only the initial default is a forward-incompatible change + assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) + .doesNotThrowAnyException(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 5ada35765773..71254b3abb1b 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1672,7 +1672,8 @@ public void testV3TimestampNanoTypeSupport() { unsupportedFormatVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( - "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is not supported until v3", + "Invalid schema for v%s:\n" + + "- Invalid type for struct.ts_nanos: timestamptz_ns is not supported until v3", unsupportedFormatVersion); }