Skip to content

Commit

Permalink
Merge pull request pandas-dev#5507 from jreback/msgpack_bug
Browse files Browse the repository at this point in the history
BUG: bug in to_msgpack for timezone aware datetime index
  • Loading branch information
jreback committed Nov 13, 2013
2 parents 3239b29 + 693a957 commit 6a54af8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Experimental Features
(:issue:`4897`).
- Add msgpack support via ``pd.read_msgpack()`` and ``pd.to_msgpack()`` /
``df.to_msgpack()`` for serialization of arbitrary pandas (and python
objects) in a lightweight portable binary format (:issue:`686`)
objects) in a lightweight portable binary format (:issue:`686`, :issue:`5506`)
- Added PySide support for the qtpandas DataFrameModel and DataFrameWidget.
- Added :mod:`pandas.io.gbq` for reading from (and writing to) Google
BigQuery into a DataFrame. (:issue:`4140`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
from pandas.io import pytables
return pytables.to_hdf(path_or_buf, key, self, **kwargs)

def to_msgpack(self, path_or_buf, **kwargs):
def to_msgpack(self, path_or_buf=None, **kwargs):
"""
msgpack (serialize) object to input file path
Expand Down
25 changes: 19 additions & 6 deletions pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,14 @@ def to_msgpack(path_or_buf, *args, **kwargs):
def writer(fh):
for a in args:
fh.write(pack(a, **kwargs))
return fh

if isinstance(path_or_buf, compat.string_types):
with open(path_or_buf, mode) as fh:
writer(fh)
elif path_or_buf is None:
return writer(compat.BytesIO())
buf = compat.BytesIO()
writer(buf)
return buf.getvalue()
else:
writer(path_or_buf)

Expand Down Expand Up @@ -263,17 +264,23 @@ def encode(obj):
return {'typ': 'period_index',
'klass': obj.__class__.__name__,
'name': getattr(obj, 'name', None),
'freq': obj.freqstr,
'freq': getattr(obj,'freqstr',None),
'dtype': obj.dtype.num,
'data': convert(obj.asi8)}
elif isinstance(obj, DatetimeIndex):
tz = getattr(obj,'tz',None)

# store tz info and data as UTC
if tz is not None:
tz = tz.zone
obj = obj.tz_convert('UTC')
return {'typ': 'datetime_index',
'klass': obj.__class__.__name__,
'name': getattr(obj, 'name', None),
'dtype': obj.dtype.num,
'data': convert(obj.asi8),
'freq': obj.freqstr,
'tz': obj.tz}
'freq': getattr(obj,'freqstr',None),
'tz': tz }
elif isinstance(obj, MultiIndex):
return {'typ': 'multi_index',
'klass': obj.__class__.__name__,
Expand Down Expand Up @@ -440,7 +447,13 @@ def decode(obj):
return globals()[obj['klass']](data, name=obj['name'], freq=obj['freq'])
elif typ == 'datetime_index':
data = unconvert(obj['data'], np.int64, obj.get('compress'))
return globals()[obj['klass']](data, freq=obj['freq'], tz=obj['tz'], name=obj['name'])
result = globals()[obj['klass']](data, freq=obj['freq'], name=obj['name'])
tz = obj['tz']

# reverse tz conversion
if tz is not None:
result = result.tz_localize('UTC').tz_convert(tz)
return result
elif typ == 'series':
dtype = dtype_for(obj['dtype'])
index = obj['index']
Expand Down
51 changes: 33 additions & 18 deletions pandas/io/tests/test_packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,29 +61,33 @@ def test_string_io(self):

df = DataFrame(np.random.randn(10,2))
s = df.to_msgpack(None)
result = read_msgpack(s.getvalue())
result = read_msgpack(s)
tm.assert_frame_equal(result,df)

s = df.to_msgpack()
result = read_msgpack(s)
tm.assert_frame_equal(result,df)

s = df.to_msgpack()
result = read_msgpack(compat.BytesIO(s))
tm.assert_frame_equal(result,df)

s = to_msgpack(None,df)
result = read_msgpack(s.getvalue())
result = read_msgpack(s)
tm.assert_frame_equal(result, df)

with ensure_clean(self.path) as p:

s = df.to_msgpack(None)
s = df.to_msgpack()
fh = open(p,'wb')
fh.write(s.getvalue())
fh.write(s)
fh.close()
result = read_msgpack(p)
tm.assert_frame_equal(result, df)

def test_iterator_with_string_io(self):

dfs = [ DataFrame(np.random.randn(10,2)) for i in range(5) ]
s = to_msgpack(None,*dfs)
for i, result in enumerate(read_msgpack(s.getvalue(),iterator=True)):
tm.assert_frame_equal(result,dfs[i])

s = to_msgpack(None,*dfs)
for i, result in enumerate(read_msgpack(s,iterator=True)):
tm.assert_frame_equal(result,dfs[i])
Expand All @@ -98,7 +102,7 @@ def test_numpy_scalar_float(self):
def test_numpy_scalar_complex(self):
x = np.complex64(np.random.rand() + 1j * np.random.rand())
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(np.allclose(x, x_rec))

def test_scalar_float(self):
x = np.random.rand()
Expand All @@ -108,10 +112,9 @@ def test_scalar_float(self):
def test_scalar_complex(self):
x = np.random.rand() + 1j * np.random.rand()
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(np.allclose(x, x_rec))

def test_list_numpy_float(self):
raise nose.SkipTest('buggy test')
x = [np.float32(np.random.rand()) for i in range(5)]
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
Expand All @@ -120,13 +123,11 @@ def test_list_numpy_float_complex(self):
if not hasattr(np, 'complex128'):
raise nose.SkipTest('numpy cant handle complex128')

# buggy test
raise nose.SkipTest('buggy test')
x = [np.float32(np.random.rand()) for i in range(5)] + \
[np.complex128(np.random.rand() + 1j * np.random.rand())
for i in range(5)]
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(np.allclose(x, x_rec))

def test_list_float(self):
x = [np.random.rand() for i in range(5)]
Expand All @@ -137,7 +138,7 @@ def test_list_float_complex(self):
x = [np.random.rand() for i in range(5)] + \
[(np.random.rand() + 1j * np.random.rand()) for i in range(5)]
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(np.allclose(x, x_rec))

def test_dict_float(self):
x = {'foo': 1.0, 'bar': 2.0}
Expand All @@ -147,7 +148,8 @@ def test_dict_float(self):
def test_dict_complex(self):
x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j}
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))

def test_dict_numpy_float(self):
x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)}
Expand All @@ -158,7 +160,9 @@ def test_dict_numpy_complex(self):
x = {'foo': np.complex128(
1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)}
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))


def test_numpy_array_float(self):

Expand All @@ -173,7 +177,8 @@ def test_numpy_array_float(self):
def test_numpy_array_complex(self):
x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128)
x_rec = self.encode_decode(x)
tm.assert_almost_equal(x,x_rec)
self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and
x.dtype == x_rec.dtype)

def test_list_mixed(self):
x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')]
Expand Down Expand Up @@ -235,6 +240,16 @@ def test_basic_index(self):
i_rec = self.encode_decode(i)
self.assert_(i.equals(i_rec))

# datetime with no freq (GH5506)
i = Index([Timestamp('20130101'),Timestamp('20130103')])
i_rec = self.encode_decode(i)
self.assert_(i.equals(i_rec))

# datetime with timezone
i = Index([Timestamp('20130101 9:00:00'),Timestamp('20130103 11:00:00')]).tz_localize('US/Eastern')
i_rec = self.encode_decode(i)
self.assert_(i.equals(i_rec))

def test_multi_index(self):

for s, i in self.mi.items():
Expand Down

0 comments on commit 6a54af8

Please sign in to comment.