Skip to content

Commit

Permalink
Merge pull request scipy#487 from matthew-brett/refactor-mio-fixes
Browse files Browse the repository at this point in the history
RF: refactor matlab endian fixes
  • Loading branch information
rgommers committed Mar 27, 2013
2 parents f031383 + 01bca13 commit dd40f08
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 42 deletions.
21 changes: 8 additions & 13 deletions scipy/io/matlab/mio5.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@

from scipy.lib.six import string_types

from . import byteordercodes as boc
from .byteordercodes import native_code, swapped_code

from .miobase import MatFileReader, docfiller, matdims, \
read_dtype, arr_to_chars, arr_dtype_number, \
Expand Down Expand Up @@ -373,7 +373,7 @@ def varmats_from_mat(file_obj):
rdr = MatFile5Reader(file_obj)
file_obj.seek(0)
# Raw read of top-level file header
hdr_len = MDTYPES[boc.native_code]['dtypes']['file_header'].itemsize
hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
raw_hdr = file_obj.read(hdr_len)
# Initialize variable reading
file_obj.seek(0)
Expand Down Expand Up @@ -490,10 +490,10 @@ def to_writeable(source):


# Native byte ordered dtypes for convenience for writers
NDT_FILE_HDR = MDTYPES[boc.native_code]['dtypes']['file_header']
NDT_TAG_FULL = MDTYPES[boc.native_code]['dtypes']['tag_full']
NDT_TAG_SMALL = MDTYPES[boc.native_code]['dtypes']['tag_smalldata']
NDT_ARRAY_FLAGS = MDTYPES[boc.native_code]['dtypes']['array_flags']
NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']


class VarWriter5(object):
Expand All @@ -520,14 +520,9 @@ def write_element(self, arr, mdtype=None):
''' write tag and data '''
if mdtype is None:
mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]

# We are writing a little-endian Matlab file but our incoming arrays may
# be big-endian. In particular, they might be big-endian because we originally
# *read* them from a big-endian Matlab file
byte_order = arr.dtype.byteorder
if byte_order == '>' or (byte_order == '=' and not boc.sys_is_le):
# Array needs to be in native byte order
if arr.dtype.byteorder == swapped_code:
arr = arr.byteswap().newbyteorder()

byte_count = arr.size*arr.itemsize
if byte_count <= 4:
self.write_smalldata_element(arr, mdtype, byte_count)
Expand Down
9 changes: 2 additions & 7 deletions scipy/io/matlab/mio5_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ cdef class VarReader5:
# pointers to stuff in preader.class_dtypes
cdef PyObject* class_dtypes[_N_MXS]
# cached here for convenience in later array creation
cdef cnp.dtype U1_dtype
cdef cnp.dtype bool_dtype
# element processing options
cdef:
Expand Down Expand Up @@ -203,10 +202,6 @@ cdef class VarReader5:
if isinstance(key, str):
continue
self.class_dtypes[key] = <PyObject*>dt
# Always use U1 rather than <U or >U1 for interpreting string
# data because the strings are created by the Python runtime
# by .decode() and hence use native byte order rather the mat file's
self.U1_dtype = np.dtype('U1')
bool_dtype = np.dtype('bool')

def set_stream(self, fobj):
Expand Down Expand Up @@ -789,7 +784,7 @@ cdef class VarReader5:
if byte_count == 0:
arr = np.array(' ' * length, dtype='U')
return np.ndarray(shape=header.dims,
dtype=self.U1_dtype,
dtype='U1',
buffer=arr,
order='F')
# Character data can be of apparently numerical types,
Expand Down Expand Up @@ -819,7 +814,7 @@ cdef class VarReader5:
# could take this to numpy C-API level, but probably not worth
# it
return np.ndarray(shape=header.dims,
dtype=self.U1_dtype,
dtype='U1',
buffer=arr,
order='F')

Expand Down
Binary file added scipy/io/matlab/tests/data/little_endian.mat
Binary file not shown.
50 changes: 28 additions & 22 deletions scipy/io/matlab/tests/test_mio.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,31 +818,37 @@ def test_empty_string():
stream.close()


def test_read_big_endian():
# make sure big-endian data is read correctly
estring_fname = pjoin(test_data_path, 'big_endian.mat')
fp = open(estring_fname, 'rb')
rdr = MatFile5Reader_future(fp)
d = rdr.get_variables()
fp.close()
assert_array_equal(d['strings'], np.array([['hello'],
['world']], dtype=np.object))
assert_array_equal(d['floats'], np.array([[ 2., 3.],
[ 3., 4.]], dtype=np.float32))


def test_write_big_endian():
# we don't support writing actual big-endian .mat files, but we need to
# behave correctly if the user supplies a big-endian numpy array to write out
def test_read_both_endian():
# make sure big- and little- endian data is read correctly
for fname in ('big_endian.mat', 'little_endian.mat'):
fp = open(pjoin(test_data_path, fname), 'rb')
rdr = MatFile5Reader_future(fp)
d = rdr.get_variables()
fp.close()
assert_array_equal(d['strings'],
np.array([['hello'],
['world']], dtype=np.object))
assert_array_equal(d['floats'],
np.array([[ 2., 3.],
[ 3., 4.]], dtype=np.float32))


def test_write_opposite_endian():
# We don't support writing opposite endian .mat files, but we need to behave
# correctly if the user supplies an other-endian numpy array to write out
float_arr = np.array([[ 2., 3.],
[ 3., 4.]])
int_arr = np.arange(6).reshape((2, 3))
uni_arr = np.array(['hello', 'world'], dtype='U')
stream = BytesIO()
savemat_future(stream, {'a': np.array([[ 2., 3.],
[ 3., 4.]], dtype='>f4'),
'b': np.array(['hello', 'world'], dtype='>U')})
savemat_future(stream, {'floats': float_arr.byteswap().newbyteorder(),
'ints': int_arr.byteswap().newbyteorder(),
'uni_arr': uni_arr.byteswap().newbyteorder()})
rdr = MatFile5Reader_future(stream)
d = rdr.get_variables()
assert_array_equal(d['a'], np.array([[ 2., 3.],
[ 3., 4.]], dtype='f4'))
assert_array_equal(d['b'], np.array(['hello', 'world'], dtype='U'))
assert_array_equal(d['floats'], float_arr)
assert_array_equal(d['ints'], int_arr)
assert_array_equal(d['uni_arr'], uni_arr)
stream.close()


Expand Down

0 comments on commit dd40f08

Please sign in to comment.