Skip to content

Commit

Permalink
Python: Add more tests for schema_conversion (apache#4972)
Browse files Browse the repository at this point in the history
  • Loading branch information
Fokko authored Jun 6, 2022
1 parent fe089fb commit 2d30814
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 5 deletions.
10 changes: 5 additions & 5 deletions python/src/iceberg/utils/schema_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def _convert_schema(self, avro_type: str | dict[str, Any]) -> IcebergType:
Raises:
ValueError: When there are unknown types
"""
if isinstance(avro_type, str):
if isinstance(avro_type, str) and avro_type in PRIMITIVE_FIELD_TYPE_MAPPING:
return PRIMITIVE_FIELD_TYPE_MAPPING[avro_type]
elif isinstance(avro_type, dict):
if "logicalType" in avro_type:
Expand All @@ -186,12 +186,12 @@ def _convert_schema(self, avro_type: str | dict[str, Any]) -> IcebergType:
return self._convert_map_type(avro_type)
elif type_identifier == "fixed":
return self._convert_fixed_type(avro_type)
elif isinstance(type_identifier, str):
elif isinstance(type_identifier, str) and type_identifier in PRIMITIVE_FIELD_TYPE_MAPPING:
return PRIMITIVE_FIELD_TYPE_MAPPING[type_identifier]
else:
raise ValueError(f"Unknown type: {avro_type}")
raise TypeError(f"Unknown type: {avro_type}")
else:
raise ValueError(f"Unknown type: {avro_type}")
raise TypeError(f"Unknown type: {avro_type}")

def _convert_field(self, field: dict[str, Any]) -> NestedField:
"""
Expand Down Expand Up @@ -265,7 +265,7 @@ def _convert_record_type(self, record_type: dict[str, Any]) -> StructType:
Returns:
"""
if record_type["type"] != "record":
raise ValueError(f"Expected type, got: {record_type}")
raise ValueError(f"Expected record type, got: {record_type}")

return StructType(*[self._convert_field(field) for field in record_type["fields"]])

Expand Down
130 changes: 130 additions & 0 deletions python/tests/utils/test_schema_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest

from iceberg.schema import Schema
from iceberg.types import (
BinaryType,
BooleanType,
DateType,
DecimalType,
FixedType,
IntegerType,
ListType,
LongType,
MapType,
NestedField,
StringType,
StructType,
Expand Down Expand Up @@ -229,3 +234,128 @@ def test_avro_list_required_record():
iceberg_schema = AvroSchemaConversion().avro_to_iceberg(avro_schema)

assert expected_iceberg_schema == iceberg_schema


def test_resolve_union():
with pytest.raises(TypeError) as exc_info:
AvroSchemaConversion()._resolve_union(["null", "string", "long"])

assert "Non-optional types aren't part of the Iceberg specification" in str(exc_info.value)


def test_nested_type():
# In the case a primitive field is nested
assert AvroSchemaConversion()._convert_schema({"type": {"type": "string"}}) == StringType()


def test_map_type():
avro_type = {
"type": "map",
"values": ["long", "null"],
"key-id": 101,
"value-id": 102,
}
actual = AvroSchemaConversion()._convert_schema(avro_type)
expected = MapType(key_id=101, key_type=StringType(), value_id=102, value_type=LongType(), value_is_optional=True)
assert actual == expected


def test_fixed_type():
avro_type = {"type": "fixed", "size": 22}
actual = AvroSchemaConversion()._convert_schema(avro_type)
expected = FixedType(22)
assert actual == expected


def test_unknown_primitive():
with pytest.raises(TypeError) as exc_info:
avro_type = "UnknownType"
AvroSchemaConversion()._convert_schema(avro_type)
assert "Unknown type: UnknownType" in str(exc_info.value)


def test_unknown_complex_type():
with pytest.raises(TypeError) as exc_info:
avro_type = {
"type": "UnknownType",
}
AvroSchemaConversion()._convert_schema(avro_type)
assert "Unknown type: {'type': 'UnknownType'}" in str(exc_info.value)


def test_convert_field_without_field_id():
with pytest.raises(ValueError) as exc_info:
avro_field = {
"name": "contains_null",
"type": "boolean",
}
AvroSchemaConversion()._convert_field(avro_field)
assert "Cannot convert field, missing field-id" in str(exc_info.value)


def test_convert_record_type_without_record():
with pytest.raises(ValueError) as exc_info:
avro_field = {"type": "non-record", "name": "avro_schema", "fields": []}
AvroSchemaConversion()._convert_record_type(avro_field)
assert "Expected record type, got" in str(exc_info.value)


def test_avro_list_missing_element_id():
avro_type = {
"name": "array_with_string",
"type": {
"type": "array",
"items": "string",
"default": [],
# "element-id": 101,
},
"field-id": 100,
}

with pytest.raises(ValueError) as exc_info:
AvroSchemaConversion()._convert_array_type(avro_type)

assert "Cannot convert array-type, missing element-id:" in str(exc_info.value)


def test_convert_decimal_type():
avro_decimal_type = {"type": "bytes", "logicalType": "decimal", "precision": 19, "scale": 25}
actual = AvroSchemaConversion()._convert_logical_type(avro_decimal_type)
expected = DecimalType(precision=19, scale=25)
assert actual == expected


def test_convert_date_type():
avro_logical_type = {"type": "int", "logicalType": "date"}
actual = AvroSchemaConversion()._convert_logical_type(avro_logical_type)
assert actual == DateType()


def test_unknown_logical_type():
"""Test raising a ValueError when converting an unknown logical type as part of an Avro schema conversion"""
avro_logical_type = {"type": "bytes", "logicalType": "date"}
with pytest.raises(ValueError) as exc_info:
AvroSchemaConversion()._convert_logical_type(avro_logical_type)

assert "Unknown logical/physical type combination:" in str(exc_info.value)


def test_logical_map_with_invalid_fields():
avro_type = {
"type": "array",
"logicalType": "map",
"items": {
"type": "record",
"name": "k101_v102",
"fields": [
{"name": "key", "type": "int", "field-id": 101},
{"name": "value", "type": "string", "field-id": 102},
{"name": "other", "type": "bytes", "field-id": 103},
],
},
}

with pytest.raises(ValueError) as exc_info:
AvroSchemaConversion()._convert_logical_map_type(avro_type)

assert "Invalid key-value pair schema:" in str(exc_info.value)

0 comments on commit 2d30814

Please sign in to comment.