Skip to content

Commit

Permalink
ARROW-7950: [Python] Determine + test minimal pandas version + raise …
Browse files Browse the repository at this point in the history
…error when pandas is too old

Closes apache#6992 from jorisvandenbossche/ARROW-7950-pandas-version

Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
jorisvandenbossche authored and wesm committed Apr 23, 2020
1 parent b93528a commit fa60ea6
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 30 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.6]
python: [3.8]
env:
PYTHON: ${{ matrix.python }}
steps:
Expand Down Expand Up @@ -126,8 +126,8 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.8]
pandas: ["latest"]
python: [3.6]
pandas: ["latest", "0.23"]
env:
PYTHON: ${{ matrix.python }}
PANDAS: ${{ matrix.pandas }}
Expand Down
16 changes: 16 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2026,6 +2026,22 @@ tasks:
run:
- conda-python-pandas

test-conda-python-3.6-pandas-0.23:
ci: circle
platform: linux
template: docker-tests/circle.linux.yml
params:
env:
PYTHON: 3.6
PANDAS: 0.23
build:
- conda-cpp
- conda-python
nocache:
- conda-python-pandas
run:
- conda-python-pandas

test-conda-python-3.7-dask-latest:
ci: circle
platform: linux
Expand Down
46 changes: 24 additions & 22 deletions python/pyarrow/pandas-shim.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

# pandas lazy-loading API shim that reduces API call and import overhead

import warnings


cdef class _PandasAPIShim(object):
"""
Lazy pandas importer that isolates usages of pandas APIs and avoids
Expand Down Expand Up @@ -55,39 +58,38 @@ cdef class _PandasAPIShim(object):
from distutils.version import LooseVersion
self._loose_version = LooseVersion(pd.__version__)

if self._loose_version < LooseVersion('0.23.0'):
self._have_pandas = False
if raise_:
raise ImportError(
"pyarrow requires pandas 0.23.0 or above, pandas {} is "
"installed".format(self._version)
)
else:
warnings.warn(
"pyarrow requires pandas 0.23.0 or above, pandas {} is "
"installed. Therefore, pandas-specific integration is not "
"used.".format(self._version), stacklevel=2)
return

self._compat_module = pdcompat
self._data_frame = pd.DataFrame
self._index = pd.Index
self._categorical_type = pd.Categorical
self._series = pd.Series
if self._loose_version >= LooseVersion('0.23.0'):
self._extension_array = pd.api.extensions.ExtensionArray
self._array_like_types = (
self._series, self._index, self._categorical_type,
self._extension_array)
self._extension_dtype = pd.api.extensions.ExtensionDtype
else:
self._extension_array = None
self._array_like_types = (
self._series, self._index, self._categorical_type)
self._extension_dtype = None
self._extension_array = pd.api.extensions.ExtensionArray
self._array_like_types = (
self._series, self._index, self._categorical_type,
self._extension_array)
self._extension_dtype = pd.api.extensions.ExtensionDtype
if self._loose_version >= LooseVersion('0.24.0'):
self._is_extension_array_dtype = \
pd.api.types.is_extension_array_dtype
else:
self._is_extension_array_dtype = None

if self._loose_version >= LooseVersion('0.20.0'):
from pandas.api.types import DatetimeTZDtype
self._types_api = pd.api.types
elif self._loose_version >= LooseVersion('0.19.0'):
from pandas.types.dtypes import DatetimeTZDtype
self._types_api = pd.api.types
else:
from pandas.types.dtypes import DatetimeTZDtype
self._types_api = pd.core.common

self._datetimetz_type = DatetimeTZDtype
self._types_api = pd.api.types
self._datetimetz_type = pd.api.types.DatetimeTZDtype
self._have_pandas = True

if self._loose_version > LooseVersion('0.25'):
Expand Down
13 changes: 8 additions & 5 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2675,8 +2675,8 @@ class A:

df = pd.DataFrame({'a': [A(), A()]})

expected_msg = 'Conversion failed for column a with type object'
with pytest.raises(ValueError, match=expected_msg):
msg = 'Conversion failed for column a with type object'
with pytest.raises(ValueError, match=msg):
pa.Table.from_pandas(df)

# period unsupported for pandas <= 0.25
Expand All @@ -2685,8 +2685,8 @@ class A:
'a': pd.period_range('2000-01-01', periods=20),
})

expected_msg = 'Conversion failed for column a with type period'
with pytest.raises(TypeError, match=expected_msg):
msg = 'Conversion failed for column a with type (period|object)'
with pytest.raises((TypeError, ValueError), match=msg):
pa.Table.from_pandas(df)


Expand Down Expand Up @@ -3560,7 +3560,7 @@ def test_array_protocol_pandas_extension_types(monkeypatch):
# ARROW-7022 - ensure protocol works for Period / Interval extension dtypes

if LooseVersion(pd.__version__) < '0.24.0':
pytest.skip(reason='Period/IntervalArray only introduced in 0.24')
pytest.skip('Period/IntervalArray only introduced in 0.24')

storage = pa.array([1, 2, 3], type=pa.int64())
expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
Expand Down Expand Up @@ -3654,6 +3654,9 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
# converting extension type to linked pandas ExtensionDtype/Array
import pandas.core.internals as _int

if LooseVersion(pd.__version__) < "0.24.0":
pytest.skip("ExtensionDtype introduced in pandas 0.24")

storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
table = pa.table({'a': arr})
Expand Down

0 comments on commit fa60ea6

Please sign in to comment.