forked from apache/iceberg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Avro: Fix BuildAvroProjection list and map handling to preserve field…
… IDs (apache#4120) Co-authored-by: Haizhou Zhao <[email protected]>
- Loading branch information
1 parent
9a403d6
commit 013d4ad
Showing
6 changed files
with
469 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
core/src/main/java/org/apache/iceberg/avro/MissingIds.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.avro; | ||
|
||
import java.util.List; | ||
import java.util.function.Supplier; | ||
import org.apache.avro.Schema; | ||
import org.apache.iceberg.relocated.com.google.common.collect.Iterables; | ||
|
||
/** | ||
* Returns true once the first node is found with ID property missing. Reverse of {@link HasIds} | ||
* <p> | ||
* Note: To use {@link AvroSchemaUtil#toIceberg(Schema)} on an avro schema, the avro schema need to be either | ||
* have IDs on every node or not have IDs at all. Invoke {@link AvroSchemaUtil#hasIds(Schema)} only proves | ||
* that the schema has at least one ID, and not sufficient condition for invoking | ||
* {@link AvroSchemaUtil#toIceberg(Schema)} on the schema. | ||
*/ | ||
class MissingIds extends AvroCustomOrderSchemaVisitor<Boolean, Boolean> { | ||
@Override | ||
public Boolean record(Schema record, List<String> names, Iterable<Boolean> fields) { | ||
return Iterables.any(fields, Boolean.TRUE::equals); | ||
} | ||
|
||
@Override | ||
public Boolean field(Schema.Field field, Supplier<Boolean> fieldResult) { | ||
// either this field is missing ID, or the subtree is missing ID somewhere | ||
return !AvroSchemaUtil.hasFieldId(field) || fieldResult.get(); | ||
} | ||
|
||
@Override | ||
public Boolean map(Schema map, Supplier<Boolean> value) { | ||
// either this map node is missing (key/value) ID, or the subtree is missing ID somewhere | ||
return !AvroSchemaUtil.hasProperty(map, AvroSchemaUtil.KEY_ID_PROP) || | ||
!AvroSchemaUtil.hasProperty(map, AvroSchemaUtil.VALUE_ID_PROP) || | ||
value.get(); | ||
} | ||
|
||
@Override | ||
public Boolean array(Schema array, Supplier<Boolean> element) { | ||
// either this list node is missing (elem) ID, or the subtree is missing ID somewhere | ||
return !AvroSchemaUtil.hasProperty(array, AvroSchemaUtil.ELEMENT_ID_PROP) || element.get(); | ||
} | ||
|
||
@Override | ||
public Boolean union(Schema union, Iterable<Boolean> options) { | ||
return Iterables.any(options, Boolean.TRUE::equals); | ||
} | ||
|
||
@Override | ||
public Boolean primitive(Schema primitive) { | ||
// primitive node cannot be missing ID as Iceberg do not assign primitive node IDs in the first place | ||
return false; | ||
} | ||
} |
90 changes: 90 additions & 0 deletions
90
core/src/test/java/org/apache/iceberg/avro/TestAvroSchemaProjection.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.avro; | ||
|
||
import java.util.Collections; | ||
import org.apache.avro.SchemaBuilder; | ||
import org.apache.iceberg.Schema; | ||
import org.junit.Test; | ||
|
||
import static org.junit.Assert.assertFalse; | ||
|
||
public class TestAvroSchemaProjection { | ||
|
||
@Test | ||
public void projectWithListSchemaChanged() { | ||
final org.apache.avro.Schema currentAvroSchema = SchemaBuilder.record("myrecord").namespace("unit.test").fields() | ||
.name("f1").type().nullable().array().items( | ||
SchemaBuilder.record("elem").fields() | ||
.name("f11").type().nullable().intType().noDefault().endRecord()) | ||
.noDefault().endRecord(); | ||
|
||
final org.apache.avro.Schema updatedAvroSchema = SchemaBuilder.record("myrecord").namespace("unit.test").fields() | ||
.name("f1").type().nullable().array().items( | ||
SchemaBuilder.record("elem").fields() | ||
.name("f11").type().nullable().intType().noDefault() | ||
.name("f12").type().nullable().stringType().noDefault() | ||
.endRecord()) | ||
.noDefault().endRecord(); | ||
|
||
final Schema currentIcebergSchema = AvroSchemaUtil.toIceberg(currentAvroSchema); | ||
|
||
// Getting the node ID in updatedAvroSchema allocated by converting into iceberg schema and back | ||
final org.apache.avro.Schema idAllocatedUpdatedAvroSchema = | ||
AvroSchemaUtil.convert(AvroSchemaUtil.toIceberg(updatedAvroSchema).asStruct()); | ||
|
||
final org.apache.avro.Schema projectedAvroSchema = | ||
AvroSchemaUtil.buildAvroProjection(idAllocatedUpdatedAvroSchema, currentIcebergSchema, Collections.emptyMap()); | ||
|
||
assertFalse("Result of buildAvroProjection is missing some IDs", | ||
AvroSchemaUtil.missingIds(projectedAvroSchema)); | ||
} | ||
|
||
|
||
@Test | ||
public void projectWithMapSchemaChanged() { | ||
final org.apache.avro.Schema currentAvroSchema = SchemaBuilder.record("myrecord").namespace("unit.test").fields() | ||
.name("f1").type().nullable().map().values( | ||
SchemaBuilder.record("elem").fields() | ||
.name("f11").type().nullable().intType().noDefault().endRecord()) | ||
.noDefault().endRecord(); | ||
|
||
final org.apache.avro.Schema updatedAvroSchema = SchemaBuilder.record("myrecord").namespace("unit.test").fields() | ||
.name("f1").type().nullable().map().values( | ||
SchemaBuilder.record("elem").fields() | ||
.name("f11").type().nullable().intType().noDefault() | ||
.name("f12").type().nullable().stringType().noDefault() | ||
.endRecord()) | ||
.noDefault().endRecord(); | ||
|
||
final Schema currentIcebergSchema = AvroSchemaUtil.toIceberg(currentAvroSchema); | ||
|
||
// Getting the node ID in updatedAvroSchema allocated by converting into iceberg schema and back | ||
final org.apache.avro.Schema idAllocatedUpdatedAvroSchema = | ||
AvroSchemaUtil.convert(AvroSchemaUtil.toIceberg(updatedAvroSchema).asStruct()); | ||
|
||
final org.apache.avro.Schema projectedAvroSchema = | ||
AvroSchemaUtil.buildAvroProjection(idAllocatedUpdatedAvroSchema, currentIcebergSchema, Collections.emptyMap()); | ||
|
||
assertFalse("Result of buildAvroProjection is missing some IDs", | ||
AvroSchemaUtil.missingIds(projectedAvroSchema)); | ||
} | ||
|
||
} |
Oops, something went wrong.