Skip to content

Commit

Permalink
[Python] Do not sort schema fields by default (apache#12232)
Browse files Browse the repository at this point in the history
### Motivation

In Avro schema, the order of fields is used in the validation process, so if we are sorting the fields, that will generate an unexpected schema for a python producer/consumer and it will make it not interoperable with Java and other clients.
  • Loading branch information
merlimat authored Sep 29, 2021
1 parent 879e93d commit 2f3ad4d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 5 deletions.
19 changes: 14 additions & 5 deletions pulsar-client-cpp/python/pulsar/schema/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ class Record(with_metaclass(RecordMeta, object)):
# This field is used to set namespace for Avro Record schema.
_avro_namespace = None

# Generate a schema where fields are sorted alphabetically
_sorted_fields = False

def __init__(self, default=None, required_default=False, required=False, *args, **kwargs):
self._required_default = required_default
self._default = default
Expand Down Expand Up @@ -114,20 +117,26 @@ def schema_info(cls, defined_names):

defined_names.add(namespace_name)

schema = {'name': str(cls.__name__)}
schema = {
'type': 'record',
'name': str(cls.__name__)
}
if cls._avro_namespace is not None:
schema['namespace'] = cls._avro_namespace
schema['type'] = 'record'
schema['fields'] = []

for name in sorted(cls._fields.keys()):
if cls._sorted_fields:
fields = sorted(cls._fields.keys())
else:
fields = cls._fields.keys()
for name in fields:
field = cls._fields[name]
field_type = field.schema_info(defined_names) \
if field._required else ['null', field.schema_info(defined_names)]
schema['fields'].append({
'name': name,
'type': field_type,
'default': field.default()
'default': field.default(),
'type': field_type
}) if field.required_default() else schema['fields'].append({
'name': name,
'type': field_type,
Expand Down
38 changes: 38 additions & 0 deletions pulsar-client-cpp/python/schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Color(Enum):
blue = 3

class Example(Record):
_sorted_fields = True
a = String()
b = Integer()
c = Array(String())
Expand Down Expand Up @@ -78,11 +79,13 @@ class Example(Record):

def test_complex(self):
class MySubRecord(Record):
_sorted_fields = True
x = Integer()
y = Long()
z = String()

class Example(Record):
_sorted_fields = True
a = String()
sub = MySubRecord # Test with class
sub2 = MySubRecord() # Test with instance
Expand Down Expand Up @@ -348,6 +351,34 @@ class Example(Record):
self.assertEqual(r2.__class__.__name__, 'Example')
self.assertEqual(r2, r)

def test_non_sorted_fields(self):
class T1(Record):
a = Integer()
b = Integer()
c = Double()
d = String()

class T2(Record):
b = Integer()
a = Integer()
d = String()
c = Double()

self.assertNotEqual(T1.schema()['fields'], T2.schema()['fields'])

def test_sorted_fields(self):
class T1(Record):
_sorted_fields = True
a = Integer()
b = Integer()

class T2(Record):
_sorted_fields = True
b = Integer()
a = Integer()

self.assertEqual(T1.schema()['fields'], T2.schema()['fields'])

def test_schema_version(self):
class Example(Record):
a = Integer()
Expand Down Expand Up @@ -691,6 +722,7 @@ class ArrayArray(Record):

def test_avro_required_default(self):
class MySubRecord(Record):
_sorted_fields = True
x = Integer()
y = Long()
z = String()
Expand All @@ -707,7 +739,9 @@ class Example(Record):
i = Map(String())
j = MySubRecord()


class ExampleRequiredDefault(Record):
_sorted_fields = True
a = Integer(required_default=True)
b = Boolean(required=True, required_default=True)
c = Long(required_default=True)
Expand Down Expand Up @@ -879,10 +913,12 @@ class MyRecord(Record):

def test_serialize_schema_complex(self):
class NestedObj1(Record):
_sorted_fields = True
na1 = String()
nb1 = Double()

class NestedObj2(Record):
_sorted_fields = True
na2 = Integer()
nb2 = Boolean()
nc2 = NestedObj1()
Expand All @@ -892,6 +928,7 @@ class NestedObj3(Record):

class NestedObj4(Record):
_avro_namespace = 'xxx4'
_sorted_fields = True
na4 = String()
nb4 = Integer()

Expand All @@ -902,6 +939,7 @@ class Color(Enum):

class ComplexRecord(Record):
_avro_namespace = 'xxx.xxx'
_sorted_fields = True
a = Integer()
b = Integer()
color = Color
Expand Down

0 comments on commit 2f3ad4d

Please sign in to comment.