Skip to content

Commit

Permalink
API: Add compatibility checks for Schemas with default values (apache…
Browse files Browse the repository at this point in the history
…#11434)


Co-authored-by: Russell Spitzer <[email protected]>
Co-authored-by: Fokko Driesprong <[email protected]>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent f4b36a5 commit 91e04c9
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 9 deletions.
38 changes: 30 additions & 8 deletions api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class Schema implements Serializable {
private static final Joiner NEWLINE = Joiner.on('\n');
private static final String ALL_COLUMNS = "*";
private static final int DEFAULT_SCHEMA_ID = 0;
private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3;
private static final Map<Type.TypeID, Integer> MIN_FORMAT_VERSIONS =
ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3);

Expand Down Expand Up @@ -586,16 +587,37 @@ private List<NestedField> reassignIds(List<NestedField> columns, TypeUtil.GetID
* @param formatVersion table format version
*/
public static void checkCompatibility(Schema schema, int formatVersion) {
// check the type in each field
// accumulate errors as a treemap to keep them in a reasonable order
Map<Integer, String> problems = Maps.newTreeMap();

// check each field's type and defaults
for (NestedField field : schema.lazyIdToField().values()) {
Integer minFormatVersion = MIN_FORMAT_VERSIONS.get(field.type().typeId());
Preconditions.checkState(
minFormatVersion == null || formatVersion >= minFormatVersion,
"Invalid type in v%s schema: %s %s is not supported until v%s",
formatVersion,
schema.findColumnName(field.fieldId()),
field.type(),
minFormatVersion);
if (minFormatVersion != null && formatVersion < minFormatVersion) {
problems.put(
field.fieldId(),
String.format(
"Invalid type for %s: %s is not supported until v%s",
schema.findColumnName(field.fieldId()), field.type(), minFormatVersion));
}

if (field.initialDefault() != null && formatVersion < DEFAULT_VALUES_MIN_FORMAT_VERSION) {
problems.put(
field.fieldId(),
String.format(
"Invalid initial default for %s: non-null default (%s) is not supported until v%s",
schema.findColumnName(field.fieldId()),
field.initialDefault(),
DEFAULT_VALUES_MIN_FORMAT_VERSION));
}
}

// throw if there are any compatibility problems
if (!problems.isEmpty()) {
throw new IllegalStateException(
String.format(
"Invalid schema for v%s:\n- %s",
formatVersion, Joiner.on("\n- ").join(problems.values())));
}
}
}
111 changes: 111 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestSchema.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import static org.assertj.core.api.Assertions.assertThatCode;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class TestSchema {
private static final Schema TS_NANO_CASES =
new Schema(
Types.NestedField.required(1, "id", Types.LongType.get()),
Types.NestedField.optional(2, "ts", Types.TimestampNanoType.withZone()),
Types.NestedField.optional(
3, "arr", Types.ListType.ofRequired(4, Types.TimestampNanoType.withoutZone())),
Types.NestedField.required(
5,
"struct",
Types.StructType.of(
Types.NestedField.optional(6, "inner_ts", Types.TimestampNanoType.withZone()),
Types.NestedField.required(7, "data", Types.StringType.get()))),
Types.NestedField.optional(
8,
"struct_arr",
Types.StructType.of(
Types.NestedField.optional(9, "ts", Types.TimestampNanoType.withoutZone()))));

private static final Schema INITIAL_DEFAULT_SCHEMA =
new Schema(
Types.NestedField.required(1, "id", Types.LongType.get()),
Types.NestedField.required("has_default")
.withId(2)
.ofType(Types.StringType.get())
.withInitialDefault("--")
.withWriteDefault("--")
.build());

private static final Schema WRITE_DEFAULT_SCHEMA =
new Schema(
Types.NestedField.required(1, "id", Types.LongType.get()),
Types.NestedField.required("has_default")
.withId(2)
.ofType(Types.StringType.get())
.withWriteDefault("--")
.build());

@ParameterizedTest
@ValueSource(ints = {1, 2})
public void testUnsupportedTimestampNano(int formatVersion) {
assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion))
.isInstanceOf(IllegalStateException.class)
.hasMessage(
"Invalid schema for v%s:\n"
+ "- Invalid type for ts: timestamptz_ns is not supported until v3\n"
+ "- Invalid type for arr.element: timestamp_ns is not supported until v3\n"
+ "- Invalid type for struct.inner_ts: timestamptz_ns is not supported until v3\n"
+ "- Invalid type for struct_arr.ts: timestamp_ns is not supported until v3",
formatVersion);
}

@Test
public void testSupportedTimestampNano() {
assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)).doesNotThrowAnyException();
}

@ParameterizedTest
@ValueSource(ints = {1, 2})
public void testUnsupportedInitialDefault(int formatVersion) {
assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion))
.isInstanceOf(IllegalStateException.class)
.hasMessage(
"Invalid schema for v%s:\n"
+ "- Invalid initial default for has_default: "
+ "non-null default (--) is not supported until v3",
formatVersion);
}

@Test
public void testSupportedInitialDefault() {
assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3))
.doesNotThrowAnyException();
}

@ParameterizedTest
@ValueSource(ints = {1, 2, 3})
public void testSupportedWriteDefault(int formatVersion) {
// only the initial default is a forward-incompatible change
assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion))
.doesNotThrowAnyException();
}
}
3 changes: 2 additions & 1 deletion core/src/test/java/org/apache/iceberg/TestTableMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -1672,7 +1672,8 @@ public void testV3TimestampNanoTypeSupport() {
unsupportedFormatVersion))
.isInstanceOf(IllegalStateException.class)
.hasMessage(
"Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is not supported until v3",
"Invalid schema for v%s:\n"
+ "- Invalid type for struct.ts_nanos: timestamptz_ns is not supported until v3",
unsupportedFormatVersion);
}

Expand Down

0 comments on commit 91e04c9

Please sign in to comment.