Skip to content

Commit

Permalink
ARROW-15370: [Python] Fix regression in empty table to_pandas conversion
Browse files Browse the repository at this point in the history
This is a revert of ARROW-10643 (apache#12081), but I think the bug fix of that PR is less important than getting this regression fixed (I left the test that was added, but with an xfail)

Closes apache#12189 from jorisvandenbossche/ARROW-15370

Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Krisztián Szűcs <[email protected]>
  • Loading branch information
jorisvandenbossche authored and kszucs committed Jan 19, 2022
1 parent 3f98301 commit 657d21a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
2 changes: 1 addition & 1 deletion python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ def _reconstruct_index(table, index_descriptors, all_columns):
descr['stop'],
step=descr['step'],
name=index_name)
if len(table) > 0 and len(index_level) != len(table):
if len(index_level) != len(table):
# Possibly the result of munged metadata
continue
else:
Expand Down
18 changes: 18 additions & 0 deletions python/pyarrow/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def test_chunked_array_to_pandas_preserve_name():
tm.assert_series_equal(result, expected)


@pytest.mark.xfail
@pytest.mark.pandas
def test_table_roundtrip_to_pandas_empty_dataframe():
# https://issues.apache.org/jira/browse/ARROW-10643
Expand All @@ -348,10 +349,27 @@ def test_table_roundtrip_to_pandas_empty_dataframe():
table = pa.table(data)
result = table.to_pandas()

# TODO the conversion results in a table with 0 rows if the original
# DataFrame has a RangeIndex (i.e. no index column in the converted
# Arrow table)
assert table.num_rows == 10
assert data.shape == (10, 0)
assert result.shape == (10, 0)


@pytest.mark.pandas
def test_to_pandas_empty_table():
# https://issues.apache.org/jira/browse/ARROW-15370
import pandas as pd
import pandas.testing as tm

df = pd.DataFrame({'a': [1, 2], 'b': [0.1, 0.2]})
table = pa.table(df)
result = table.schema.empty_table().to_pandas()
assert result.shape == (0, 2)
tm.assert_frame_equal(result, df.iloc[:0])


@pytest.mark.pandas
@pytest.mark.nopandas
def test_chunked_array_asarray():
Expand Down

0 comments on commit 657d21a

Please sign in to comment.