Skip to content

Commit

Permalink
Improve type detection for pandas.Series serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
maciejlach committed Apr 10, 2015
1 parent e66644a commit 7a7c676
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 31 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
------------------------------------------------------------------------------
qPython 1.0.0 [2015.04.08]
qPython 1.0.0 [2015.04.10]
------------------------------------------------------------------------------

- Improve type detection for pandas.Series serialization
- Fix: bug during handshake with blank username/password

------------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions qpython/_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ def _write_pandas_series(self, data, qtype = None):
if qtype is None:
# determinate type based on first element of the numpy array
qtype = Q_TYPE.get(type(data[0]), QGENERAL_LIST)

if qtype == QSTRING:
# assume we have a generic list of strings -> force representation as symbol list
qtype = QSYMBOL

if qtype is None:
raise QWriterException('Unable to serialize pandas series %s' % data)
Expand Down
59 changes: 37 additions & 22 deletions tests/pandas_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,17 @@
('("quick"; "brown"; "fox"; "jumps"; "over"; "a lazy"; "dog")',
{'data': pandas.Series(['quick', 'brown', 'fox', 'jumps', 'over', 'a lazy', 'dog']),
'meta': MetaData(qtype = QSTRING_LIST) }),
('`the`quick`brown`fox', ({'data': pandas.Series(numpy.array(['the', 'quick', 'brown', 'fox'])),
'meta': MetaData(qtype = QSYMBOL_LIST) },
{'data': pandas.Series(['the', 'quick', 'brown', 'fox']),
'meta': MetaData(qtype = QSYMBOL_LIST) },
pandas.Series(['the', 'quick', 'brown', 'fox'])
)),
('flip `name`iq!(`Dent`Beeblebrox`Prefect;98 42 126)',
pandas.DataFrame(OrderedDict((('name', pandas.Series(['Dent', 'Beeblebrox', 'Prefect'])),
('iq', pandas.Series(numpy.array([98, 42, 126], dtype = numpy.int64))),
))),
),
))

def arrays_equal(left, right):
Expand Down Expand Up @@ -203,7 +214,7 @@ def compare(left, right):
for c in left:
if not arrays_equal(left[c], right[c]):
return False

return True
elif type(left) == QFunction:
return type(right) == QFunction
Expand All @@ -226,7 +237,7 @@ def init():
break

BINARY[query] = binary


def test_reading_pandas():
print('Deserialization (pandas)')
Expand Down Expand Up @@ -263,9 +274,9 @@ def test_reading_pandas():

def test_writing_pandas():
w = qwriter.QWriter(None, 3)

for query, value in PANDAS_EXPRESSIONS.iteritems():
sys.stdout.write( '%-75s' % query )
sys.stdout.write('%-75s' % query)
if isinstance(value, dict):
data = value['data']
if 'index' in value:
Expand All @@ -275,25 +286,29 @@ def test_writing_pandas():
else:
data = value
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
sys.stdout.write( '.' )
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
sys.stdout.write('.')

print('')

for query, value in PANDAS_EXPRESSIONS_ALT.iteritems():
sys.stdout.write( '%-75s' % query )
if isinstance(value, dict):
data = value['data']
if 'index' in value:
data.reset_index(drop = True)
data = data.set_index(value['index'])
data.meta = value['meta']
else:
data = value
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)
sys.stdout.write( '.' )


for query, variants in PANDAS_EXPRESSIONS_ALT.iteritems():
sys.stdout.write('%-75s' % query)
variants = [variants] if not isinstance(variants, tuple) else variants

for value in variants:
if isinstance(value, dict):
data = value['data']
if 'index' in value:
data.reset_index(drop = True)
data = data.set_index(value['index'])
data.meta = value['meta']
else:
data = value
serialized = binascii.hexlify(w.write(data, 1))[16:].lower()
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (value, BINARY[query].lower(), serialized)

sys.stdout.write('.')

print('')


Expand Down
17 changes: 9 additions & 8 deletions tests/qwriter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@
('(`one;2 3;"456";(7;8 9))', [numpy.string_('one'), qlist(numpy.array([2, 3], dtype=numpy.int64), qtype=QLONG_LIST), '456', [numpy.int64(7), qlist(numpy.array([8, 9], dtype=numpy.int64), qtype=QLONG_LIST)]]),

('`jumps`over`a`lazy`dog', (numpy.array(['jumps', 'over', 'a', 'lazy', 'dog'], dtype=numpy.string_),
numpy.array(['jumps', 'over', 'a', 'lazy', 'dog']),
qlist(numpy.array(['jumps', 'over', 'a', 'lazy', 'dog']), qtype = QSYMBOL_LIST),
qlist(['jumps', 'over', 'a', 'lazy', 'dog'], qtype = QSYMBOL_LIST))),
('`the`quick`brown`fox', numpy.array([numpy.string_('the'), numpy.string_('quick'), numpy.string_('brown'), numpy.string_('fox')], dtype=numpy.object)),
Expand Down Expand Up @@ -307,6 +308,9 @@
(qtable(qlist(numpy.array(['pos', 'dates']), qtype = QSYMBOL_LIST),
[qlist(numpy.array(['d1', 'd2', 'd3']), qtype = QSYMBOL_LIST),
qlist(numpy.array([366, 121, qnull(QDATE)]), qtype=QDATE_LIST)]),
qtable(['pos', 'dates'],
[numpy.array(['d1', 'd2', 'd3']),
numpy.array([numpy.datetime64('2001-01-01'), numpy.datetime64('2000-05-01'), numpy.datetime64('NaT')], dtype='datetime64[D]')]),
qtable(['pos', 'dates'],
[qlist(numpy.array(['d1', 'd2', 'd3']), qtype = QSYMBOL_LIST),
numpy.array([numpy.datetime64('2001-01-01'), numpy.datetime64('2000-05-01'), numpy.datetime64('NaT')], dtype='datetime64[D]')])
Expand Down Expand Up @@ -338,16 +342,13 @@ def init():
def test_writing():
w = qwriter.QWriter(None, 3)

for query, value in EXPRESSIONS.iteritems():
for query, variants in EXPRESSIONS.iteritems():
sys.stdout.write( '%-75s' % query )
if isinstance(value, tuple):
for object in value:
sys.stdout.write( '.' )
serialized = binascii.hexlify(w.write(object, 1))[16:].lower()
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (query, BINARY[query].lower(), serialized)
else:
variants = [variants] if not isinstance(variants, tuple) else variants

for object in variants:
sys.stdout.write( '.' )
serialized = binascii.hexlify(w.write(value, 1))[16:].lower()
serialized = binascii.hexlify(w.write(object, 1))[16:].lower()
assert serialized == BINARY[query].lower(), 'serialization failed: %s, expected: %s actual: %s' % (query, BINARY[query].lower(), serialized)

print('')
Expand Down

0 comments on commit 7a7c676

Please sign in to comment.