Skip to content

Commit

Permalink
[python] - Expose Hash Partition with Seed Method Signature
Browse files Browse the repository at this point in the history
The Kudu Python client currently doesn't expose the hash partition
with seed capability. This patch adds this functionality and
includes an update to an existing test.

Change-Id: I33a7f411c5a92583e85e445634c2e119f5ddee73
Reviewed-on: http://gerrit.cloudera.org:8080/4882
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <[email protected]>
  • Loading branch information
jtbirdsell authored and wdberkeley committed Oct 30, 2016
1 parent a1a8eef commit c91d04a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 6 deletions.
18 changes: 13 additions & 5 deletions python/kudu/client.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,14 @@ cdef class Client:
vector[string] v
PartialRow py_row
# Apply hash partitioning.
for col_names, num_buckets in part._hash_partitions:
for col_names, num_buckets, seed in part._hash_partitions:
v.clear()
for n in col_names:
v.push_back(tobytes(n))
c.add_hash_partitions(v, num_buckets)
if seed:
c.add_hash_partitions(v, num_buckets, seed)
else:
c.add_hash_partitions(v, num_buckets)
# Apply range partitioning
if part._range_partition_cols is not None:
v.clear()
Expand Down Expand Up @@ -919,28 +922,33 @@ class Partitioning(object):
self._hash_partitions = []
self._range_partition_cols = None

def add_hash_partitions(self, column_names, num_buckets):
def add_hash_partitions(self, column_names, num_buckets, seed=None):
"""
Adds a set of hash partitions to the table.
For each set of hash partitions added to the table, the total number of
table partitions is multiplied by the number of buckets. For example, if a
table is created with 3 split rows, and two hash partitions with 4 and 5
buckets respectively, the total number of table partitions will be 80
(4 range partitions * 4 hash buckets * 5 hash buckets).
(4 range partitions * 4 hash buckets * 5 hash buckets). Optionally, a
seed can be used to randomize the mapping of rows to hash buckets.
Setting the seed may provide some amount of protection against denial
of service attacks when the hashed columns contain user provided values.
Parameters
----------
column_names : list of string column names on which to partition
num_buckets : the number of buckets to create
seed : int - optional
Hash: seed for mapping rows to hash buckets.
Returns
-------
self: this object
"""
if isinstance(column_names, str):
column_names = [column_names]
self._hash_partitions.append( (column_names, num_buckets) )
self._hash_partitions.append( (column_names, num_buckets, seed) )
return self

def set_range_partition_columns(self, column_names):
Expand Down
6 changes: 5 additions & 1 deletion python/kudu/libkudu_client.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,11 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil:
cdef cppclass KuduTableCreator:
KuduTableCreator& table_name(string& name)
KuduTableCreator& schema(KuduSchema* schema)
KuduTableCreator& add_hash_partitions(vector[string]& columns, int num_buckets)
KuduTableCreator& add_hash_partitions(vector[string]& columns,
int num_buckets)
KuduTableCreator& add_hash_partitions(vector[string]& columns,
int num_buckets,
int seed)
KuduTableCreator& set_range_partition_columns(vector[string]& columns)
KuduTableCreator& split_rows(vector[const KuduPartialRow*]& split_rows)
KuduTableCreator& num_replicas(int n_replicas)
Expand Down
7 changes: 7 additions & 0 deletions python/kudu/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ def test_create_partitioned_table(self):
partitioning=Partitioning().set_range_partition_columns([]))
self.client.delete_table(name)

self.client.create_table(
name, self.schema,
partitioning=Partitioning().add_hash_partitions(['key'],
2,
seed=342310))
self.client.delete_table(name)

finally:
try:
self.client.delete_table(name)
Expand Down

0 comments on commit c91d04a

Please sign in to comment.