Skip to content

Commit

Permalink
Python: Add PartitionField (apache#4590)
Browse files Browse the repository at this point in the history
Co-authored-by: Steve Zhang <[email protected]>
  • Loading branch information
dramaticlly and Steve Zhang authored Apr 25, 2022
1 parent 48669d2 commit c1b553d
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
66 changes: 66 additions & 0 deletions python/src/iceberg/table/partitioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from iceberg.transforms import Transform


class PartitionField:
"""
PartitionField is a single element with name and unique id,
It represents how one partition value is derived from the source column via transformation
Attributes:
source_id(int): The source column id of table's schema
field_id(int): The partition field id across all the table metadata's partition specs
transform(Transform): The transform used to produce partition values from source column
name(str): The name of this partition field
"""

def __init__(self, source_id: int, field_id: int, transform: Transform, name: str):
self._source_id = source_id
self._field_id = field_id
self._transform = transform
self._name = name

@property
def source_id(self) -> int:
return self._source_id

@property
def field_id(self) -> int:
return self._field_id

@property
def name(self) -> str:
return self._name

@property
def transform(self) -> Transform:
return self._transform

def __eq__(self, other):
return (
self.field_id == other.field_id
and self.source_id == other.source_id
and self.name == other.name
and self.transform == other.transform
)

def __str__(self):
return f"{self.field_id}: {self.name}: {self.transform}({self.source_id})"

def __repr__(self):
return f"PartitionField(field_id={self.field_id}, name={self.name}, transform={repr(self.transform)}, source_id={self.source_id})"
16 changes: 16 additions & 0 deletions python/tests/table/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
36 changes: 36 additions & 0 deletions python/tests/table/test_partitioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from iceberg.table.partitioning import PartitionField
from iceberg.transforms import bucket
from iceberg.types import IntegerType


def test_partition_field_init():
bucket_transform = bucket(IntegerType(), 100)
partition_field = PartitionField(3, 1000, bucket_transform, "id")

assert partition_field.source_id == 3
assert partition_field.field_id == 1000
assert partition_field.transform == bucket_transform
assert partition_field.name == "id"
assert partition_field == partition_field
assert str(partition_field) == "1000: id: bucket[100](3)"
assert (
repr(partition_field)
== "PartitionField(field_id=1000, name=id, transform=transforms.bucket(source_type=IntegerType(), num_buckets=100), source_id=3)"
)

0 comments on commit c1b553d

Please sign in to comment.