Merge pull request pandas-dev#5507 from jreback/msgpack_bug

BUG: bug in to_msgpack for timezone aware datetime index
bingo · Nov 13, 2013 · 6a54af8 · 6a54af8
2 parents 3239b29 + 693a957
commit 6a54af8
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 26 deletions.
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -85,7 +85,7 @@ Experimental Features
     (:issue:`4897`).
   - Add msgpack support via ``pd.read_msgpack()`` and ``pd.to_msgpack()`` /
     ``df.to_msgpack()`` for serialization of arbitrary pandas (and python
-    objects) in a lightweight portable binary format (:issue:`686`)
+    objects) in a lightweight portable binary format (:issue:`686`, :issue:`5506`)
   - Added PySide support for the qtpandas DataFrameModel and DataFrameWidget.
   - Added :mod:`pandas.io.gbq` for reading from (and writing to) Google
     BigQuery into a DataFrame. (:issue:`4140`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -842,7 +842,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
         from pandas.io import pytables
         return pytables.to_hdf(path_or_buf, key, self, **kwargs)
 
-    def to_msgpack(self, path_or_buf, **kwargs):
+    def to_msgpack(self, path_or_buf=None, **kwargs):
         """
         msgpack (serialize) object to input file path
 

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
@@ -100,13 +100,14 @@ def to_msgpack(path_or_buf, *args, **kwargs):
     def writer(fh):
         for a in args:
             fh.write(pack(a, **kwargs))
-        return fh
 
     if isinstance(path_or_buf, compat.string_types):
         with open(path_or_buf, mode) as fh:
             writer(fh)
     elif path_or_buf is None:
-        return writer(compat.BytesIO())
+        buf = compat.BytesIO()
+        writer(buf)
+        return buf.getvalue()
     else:
         writer(path_or_buf)
 
@@ -263,17 +264,23 @@ def encode(obj):
             return {'typ': 'period_index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
-                    'freq': obj.freqstr,
+                    'freq': getattr(obj,'freqstr',None),
                     'dtype': obj.dtype.num,
                     'data': convert(obj.asi8)}
         elif isinstance(obj, DatetimeIndex):
+            tz = getattr(obj,'tz',None)
+
+            # store tz info and data as UTC
+            if tz is not None:
+                tz = tz.zone
+                obj = obj.tz_convert('UTC')
             return {'typ': 'datetime_index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
                     'dtype': obj.dtype.num,
                     'data': convert(obj.asi8),
-                    'freq': obj.freqstr,
-                    'tz': obj.tz}
+                    'freq': getattr(obj,'freqstr',None),
+                    'tz': tz }
         elif isinstance(obj, MultiIndex):
             return {'typ': 'multi_index',
                     'klass': obj.__class__.__name__,
@@ -440,7 +447,13 @@ def decode(obj):
         return globals()[obj['klass']](data, name=obj['name'], freq=obj['freq'])
     elif typ == 'datetime_index':
         data = unconvert(obj['data'], np.int64, obj.get('compress'))
-        return globals()[obj['klass']](data, freq=obj['freq'], tz=obj['tz'], name=obj['name'])
+        result = globals()[obj['klass']](data, freq=obj['freq'], name=obj['name'])
+        tz = obj['tz']
+
+        # reverse tz conversion
+        if tz is not None:
+            result = result.tz_localize('UTC').tz_convert(tz)
+        return result
     elif typ == 'series':
         dtype = dtype_for(obj['dtype'])
         index = obj['index']

diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
@@ -61,29 +61,33 @@ def test_string_io(self):
 
         df = DataFrame(np.random.randn(10,2))
         s = df.to_msgpack(None)
-        result = read_msgpack(s.getvalue())
+        result = read_msgpack(s)
+        tm.assert_frame_equal(result,df)
+
+        s = df.to_msgpack()
+        result = read_msgpack(s)
+        tm.assert_frame_equal(result,df)
+
+        s = df.to_msgpack()
+        result = read_msgpack(compat.BytesIO(s))
         tm.assert_frame_equal(result,df)
 
         s = to_msgpack(None,df)
-        result = read_msgpack(s.getvalue())
+        result = read_msgpack(s)
         tm.assert_frame_equal(result, df)
 
         with ensure_clean(self.path) as p:
 
-            s = df.to_msgpack(None)
+            s = df.to_msgpack()
             fh = open(p,'wb')
-            fh.write(s.getvalue())
+            fh.write(s)
             fh.close()
             result = read_msgpack(p)
             tm.assert_frame_equal(result, df)
 
     def test_iterator_with_string_io(self):
 
         dfs = [ DataFrame(np.random.randn(10,2)) for i in range(5) ]
-        s = to_msgpack(None,*dfs)
-        for i, result in enumerate(read_msgpack(s.getvalue(),iterator=True)):
-            tm.assert_frame_equal(result,dfs[i])
-
         s = to_msgpack(None,*dfs)
         for i, result in enumerate(read_msgpack(s,iterator=True)):
             tm.assert_frame_equal(result,dfs[i])
@@ -98,7 +102,7 @@ def test_numpy_scalar_float(self):
     def test_numpy_scalar_complex(self):
         x = np.complex64(np.random.rand() + 1j * np.random.rand())
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(np.allclose(x, x_rec))
 
     def test_scalar_float(self):
         x = np.random.rand()
@@ -108,10 +112,9 @@ def test_scalar_float(self):
     def test_scalar_complex(self):
         x = np.random.rand() + 1j * np.random.rand()
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(np.allclose(x, x_rec))
 
     def test_list_numpy_float(self):
-        raise nose.SkipTest('buggy test')
         x = [np.float32(np.random.rand()) for i in range(5)]
         x_rec = self.encode_decode(x)
         tm.assert_almost_equal(x,x_rec)
@@ -120,13 +123,11 @@ def test_list_numpy_float_complex(self):
         if not hasattr(np, 'complex128'):
             raise nose.SkipTest('numpy cant handle complex128')
 
-        # buggy test
-        raise nose.SkipTest('buggy test')
         x = [np.float32(np.random.rand()) for i in range(5)] + \
             [np.complex128(np.random.rand() + 1j * np.random.rand())
              for i in range(5)]
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(np.allclose(x, x_rec))
 
     def test_list_float(self):
         x = [np.random.rand() for i in range(5)]
@@ -137,7 +138,7 @@ def test_list_float_complex(self):
         x = [np.random.rand() for i in range(5)] + \
             [(np.random.rand() + 1j * np.random.rand()) for i in range(5)]
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(np.allclose(x, x_rec))
 
     def test_dict_float(self):
         x = {'foo': 1.0, 'bar': 2.0}
@@ -147,7 +148,8 @@ def test_dict_float(self):
     def test_dict_complex(self):
         x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j}
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
+                     all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))
 
     def test_dict_numpy_float(self):
         x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)}
@@ -158,7 +160,9 @@ def test_dict_numpy_complex(self):
         x = {'foo': np.complex128(
             1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)}
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
+                     all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))
+
 
     def test_numpy_array_float(self):
 
@@ -173,7 +177,8 @@ def test_numpy_array_float(self):
     def test_numpy_array_complex(self):
         x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128)
         x_rec = self.encode_decode(x)
-        tm.assert_almost_equal(x,x_rec)
+        self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and
+                     x.dtype == x_rec.dtype)
 
     def test_list_mixed(self):
         x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')]
@@ -235,6 +240,16 @@ def test_basic_index(self):
             i_rec = self.encode_decode(i)
             self.assert_(i.equals(i_rec))
 
+        # datetime with no freq (GH5506)
+        i = Index([Timestamp('20130101'),Timestamp('20130103')])
+        i_rec = self.encode_decode(i)
+        self.assert_(i.equals(i_rec))
+
+        # datetime with timezone
+        i = Index([Timestamp('20130101 9:00:00'),Timestamp('20130103 11:00:00')]).tz_localize('US/Eastern')
+        i_rec = self.encode_decode(i)
+        self.assert_(i.equals(i_rec))
+
     def test_multi_index(self):
 
         for s, i in self.mi.items():