Skip to content

Commit

Permalink
ARROW-17192: [Python] Pass **kwargs in read_feather to to_pandas() (a…
Browse files Browse the repository at this point in the history
…pache#14492)

Pass `**kwargs` in `read_feather` to `to_pandas()` to ensure `timestamp_as_object=True` (together with other `**kwargs`) can be passed and the conversion between Arrow and pandas doesn't fail due to different datetime resolutions.

Authored-by: Alenka Frim <[email protected]>
Signed-off-by: Alenka Frim <[email protected]>
  • Loading branch information
AlenkaF authored Nov 9, 2022
1 parent 94cf74f commit 28a1152
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
7 changes: 5 additions & 2 deletions python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ def write_feather(df, dest, compression=None, compression_level=None,
raise


def read_feather(source, columns=None, use_threads=True, memory_map=False):
def read_feather(source, columns=None, use_threads=True,
memory_map=False, **kwargs):
"""
Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
feather.read_table.
Expand All @@ -222,6 +223,8 @@ def read_feather(source, columns=None, use_threads=True, memory_map=False):
reading from Feather format.
memory_map : boolean, default False
Use memory mapping when opening file on disk, when source is a str.
**kwargs
Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.
Returns
-------
Expand All @@ -230,7 +233,7 @@ def read_feather(source, columns=None, use_threads=True, memory_map=False):
_check_pandas_version()
return (read_table(
source, columns=columns, memory_map=memory_map,
use_threads=use_threads).to_pandas(use_threads=use_threads))
use_threads=use_threads).to_pandas(use_threads=use_threads, **kwargs))


def read_table(source, columns=None, memory_map=False, use_threads=True):
Expand Down
22 changes: 22 additions & 0 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,3 +838,25 @@ def test_preserve_index_pandas(version):
expected = df

_check_pandas_roundtrip(df, expected, version=version)


@pytest.mark.pandas
def test_feather_datetime_resolution_arrow_to_pandas(tempdir):
# ARROW-17192 - ensure timestamp_as_object=True (together with other
# **kwargs) can be passed in read_feather to to_pandas.

from datetime import datetime
df = pd.DataFrame({"date": [
datetime.fromisoformat("1654-01-01"),
datetime.fromisoformat("1920-01-01"), ],
})
write_feather(df, tempdir / "test_resolution.feather")

expected_0 = datetime.fromisoformat("1654-01-01")
expected_1 = datetime.fromisoformat("1920-01-01")

result = read_feather(tempdir / "test_resolution.feather",
timestamp_as_object=True)

assert expected_0 == result['date'][0]
assert expected_1 == result['date'][1]

0 comments on commit 28a1152

Please sign in to comment.