Skip to content

Commit

Permalink
Add support for custom decimal string formatter (singer-io#125)
Browse files Browse the repository at this point in the history
* Handle possible input types to format as decimals

* Stringify resulting data post-decimal conversion, fix return

* Pylint fixes

* added tests for singer.decimal logic

* modified transform to parse NaN values as NaNs

* changed logic to transform occurrence of snan into a nan

Co-authored-by: Jacob Baca <[email protected]>
Co-authored-by: Jacob Baca <[email protected]>
  • Loading branch information
3 people authored Mar 3, 2021
1 parent 49c9f08 commit 0892140
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
19 changes: 19 additions & 0 deletions singer/transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import decimal
import logging
import re
from jsonschema import RefResolver
Expand Down Expand Up @@ -271,7 +272,25 @@ def _transform(self, data, typ, schema, path):
return False, None

return True, data
elif schema.get("format") == "singer.decimal":
if data is None:
return False, None

if isinstance(data, (str, float, int)):
try:
return True, str(decimal.Decimal(str(data)).normalize())
except:
return False, None
elif isinstance(data, decimal.Decimal):
try:
if data.is_snan():
return True, 'NaN'
else:
return True, str(data.normalize())
except:
return False, None

return False, None
elif typ == "object":
# Objects do not necessarily specify properties
return self._transform_object(data,
Expand Down
63 changes: 63 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
import decimal
from singer import transform
from singer.transform import *

Expand Down Expand Up @@ -252,6 +253,68 @@ def test_null_object_transform(self):
empty_data = {'addrs': {}}
self.assertDictEqual(empty_data, transform(empty_data, schema))

def test_decimal_types_transform(self):
schema = {"type": "object",
"properties": {"percentage": {"type": ["string"],
"format": "singer.decimal"}}}

inf = {'percentage': 'Infinity'}
negative_inf = {'percentage': '-Infinity'}
root2 = {'percentage': 1.4142135623730951}
nan = {'percentage': decimal.Decimal('NaN')}
snan = {'percentage': decimal.Decimal('sNaN')}

self.assertEquals(inf, transform(inf, schema))
self.assertEquals(negative_inf, transform(negative_inf, schema))
self.assertEquals({'percentage': '1.4142135623730951'}, transform(root2, schema))
self.assertEquals({'percentage': 'NaN'}, transform(nan, schema))
self.assertEquals({'percentage': 'NaN'}, transform(snan, schema))


str1 = {'percentage':'0.1'}
str2 = {'percentage': '0.0000000000001'}
str3 = {'percentage': '1E+13'}
str4 = {'percentage': '100'}
str5 = {'percentage': '-100'}
self.assertEquals(str1, transform(str1, schema))
self.assertEquals({'percentage': '1E-13'}, transform(str2, schema))
self.assertEquals({'percentage': '1E+13'}, transform(str3, schema))
self.assertEquals({'percentage': '1E+2'}, transform(str4, schema))
self.assertEquals({'percentage': '-1E+2'}, transform(str5, schema))

float1 = {'percentage': 12.0000000000000000000000000001234556}
float2 = {'percentage': 0.0123}
float3 = {'percentage': 100.0123}
float4 = {'percentage': -100.0123}
self.assertEquals({'percentage':'12'}, transform(float1, schema))
self.assertEquals({'percentage':'0.0123'}, transform(float2, schema))
self.assertEquals({'percentage':'100.0123'}, transform(float3, schema))
self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema))

int1 = {'percentage': 123}
int2 = {'percentage': 0}
int3 = {'percentage': -1000}
self.assertEquals({'percentage':'123'}, transform(int1, schema))
self.assertEquals({'percentage':'0'}, transform(int2, schema))
self.assertEquals({'percentage':'-1E+3'}, transform(int3, schema))

dec1 = {'percentage': decimal.Decimal('1.1010101')}
dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')}
dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')}
dec4 = {'percentage': decimal.Decimal('100')}
self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema))
self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema))
self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema))
self.assertEquals({'percentage':'1E+2'}, transform(dec4, schema))

bad1 = {'percentage': 'fsdkjl'}
with self.assertRaises(SchemaMismatch):
transform(bad1, schema)

badnull = {'percentage': None}
with self.assertRaises(SchemaMismatch):
self.assertEquals({'percentage':None}, transform(badnull, schema))

class TestTransformsWithMetadata(unittest.TestCase):

def test_drops_no_data_when_not_dict(self):
Expand Down

0 comments on commit 0892140

Please sign in to comment.