forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-16018: [Doc][Python] Run doctests on Python docstring examples …
…(--doctest-modules) A series of 3 PRs add `doctest` functionality to ensure that docstring examples are actually correct (and keep being correct). - [x] Add `--doctest-module` - [x] Add `--doctest-cython` apache#13204 - [x] Create a CI job apache#13216 This PR can be tested with `pytest --doctest-modules python/pyarrow`. Closes apache#13199 from AlenkaF/ARROW-16018 Lead-authored-by: Alenka Frim <[email protected]> Co-authored-by: Alenka Frim <[email protected]> Signed-off-by: Joris Van den Bossche <[email protected]>
- Loading branch information
1 parent
fe2ce20
commit 3b92f02
Showing
8 changed files
with
365 additions
and
226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
import pytest | ||
from pyarrow import Codec | ||
|
||
groups = [ | ||
'brotli', | ||
'bz2', | ||
'cython', | ||
'dataset', | ||
'hypothesis', | ||
'fastparquet', | ||
'gandiva', | ||
'gdb', | ||
'gzip', | ||
'hdfs', | ||
'large_memory', | ||
'lz4', | ||
'memory_leak', | ||
'nopandas', | ||
'orc', | ||
'pandas', | ||
'parquet', | ||
'parquet_encryption', | ||
'plasma', | ||
's3', | ||
'snappy', | ||
'substrait', | ||
'tensorflow', | ||
'flight', | ||
'slow', | ||
'requires_testing_data', | ||
'zstd', | ||
] | ||
|
||
defaults = { | ||
'brotli': Codec.is_available('brotli'), | ||
'bz2': Codec.is_available('bz2'), | ||
'cython': False, | ||
'dataset': False, | ||
'fastparquet': False, | ||
'flight': False, | ||
'gandiva': False, | ||
'gdb': True, | ||
'gzip': Codec.is_available('gzip'), | ||
'hdfs': False, | ||
'hypothesis': False, | ||
'large_memory': False, | ||
'lz4': Codec.is_available('lz4'), | ||
'memory_leak': False, | ||
'nopandas': False, | ||
'orc': False, | ||
'pandas': False, | ||
'parquet': False, | ||
'parquet_encryption': False, | ||
'plasma': False, | ||
'requires_testing_data': True, | ||
's3': False, | ||
'slow': False, | ||
'snappy': Codec.is_available('snappy'), | ||
'substrait': False, | ||
'tensorflow': False, | ||
'zstd': Codec.is_available('zstd'), | ||
} | ||
|
||
try: | ||
import cython # noqa | ||
defaults['cython'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import fastparquet # noqa | ||
defaults['fastparquet'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.gandiva # noqa | ||
defaults['gandiva'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.dataset # noqa | ||
defaults['dataset'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.orc # noqa | ||
defaults['orc'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pandas # noqa | ||
defaults['pandas'] = True | ||
except ImportError: | ||
defaults['nopandas'] = True | ||
|
||
try: | ||
import pyarrow.parquet # noqa | ||
defaults['parquet'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.parquet.encryption # noqa | ||
defaults['parquet_encryption'] = True | ||
except ImportError: | ||
pass | ||
|
||
|
||
try: | ||
import pyarrow.plasma # noqa | ||
defaults['plasma'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import tensorflow # noqa | ||
defaults['tensorflow'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.flight # noqa | ||
defaults['flight'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
from pyarrow.fs import S3FileSystem # noqa | ||
defaults['s3'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
from pyarrow.fs import HadoopFileSystem # noqa | ||
defaults['hdfs'] = True | ||
except ImportError: | ||
pass | ||
|
||
try: | ||
import pyarrow.substrait # noqa | ||
defaults['substrait'] = True | ||
except ImportError: | ||
pass | ||
|
||
|
||
# Doctest should ignore files for the modules that are not built | ||
def pytest_ignore_collect(path, config): | ||
if config.option.doctestmodules: | ||
# don't try to run doctests on the /tests directory | ||
if "/pyarrow/tests/" in str(path): | ||
return True | ||
|
||
doctest_groups = [ | ||
'dataset', | ||
'orc', | ||
'parquet', | ||
'plasma', | ||
'flight', | ||
'substrait', | ||
] | ||
|
||
# handle cuda, flight, etc | ||
for group in doctest_groups: | ||
if 'pyarrow/{}'.format(group) in str(path): | ||
if not defaults[group]: | ||
return True | ||
|
||
if 'pyarrow/parquet/encryption' in str(path): | ||
if not defaults['parquet_encryption']: | ||
return True | ||
|
||
if 'pyarrow/cuda' in str(path): | ||
try: | ||
import pyarrow.cuda # noqa | ||
return False | ||
except ImportError: | ||
return True | ||
|
||
if 'pyarrow/fs' in str(path): | ||
try: | ||
from pyarrow.fs import S3FileSystem # noqa | ||
return False | ||
except ImportError: | ||
return True | ||
|
||
return False | ||
|
||
|
||
# Save output files from doctest examples into temp dir | ||
@pytest.fixture(autouse=True) | ||
def _docdir(request): | ||
|
||
# Trigger ONLY for the doctests. | ||
if request.config.option.doctestmodules: | ||
|
||
# Get the fixture dynamically by its name. | ||
tmpdir = request.getfixturevalue('tmpdir') | ||
|
||
# Chdir only for the duration of the test. | ||
with tmpdir.as_cwd(): | ||
yield | ||
|
||
else: | ||
# For normal tests, we have to yield, since this is a yield-fixture. | ||
yield |
Oops, something went wrong.