Skip to content

Commit

Permalink
ARROW-16340: [C++][Python] Move all Python related code into PyArrow (a…
Browse files Browse the repository at this point in the history
…pache#13311)

This PR moves `src/arrow/python` directory into `pyarrow` and arranges PyArrow to build it. The build on the Python side is made in two steps:

1. `_run_cmake_pyarrow_cpp()` where the C++ part of the pyarrow is build first (the part that was moved in the refactoring)
2. `_run_cmake()` where pyarrow is built as before

No changes are needed in the build process from the user side to successfully build pyarrow after this refactoring. The test for PyArrow CPP will however be moved into Cython and can currently be run with:

```shell
>>> pushd python/build/dist/temp 
>>> ctest
```

Lead-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
AlenkaF and jorisvandenbossche authored Aug 26, 2022
1 parent 7e7b8e1 commit b832853
Show file tree
Hide file tree
Showing 103 changed files with 862 additions and 650 deletions.
11 changes: 11 additions & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
set -ex

arrow_dir=${1}
test_dir=${1}/python/build/dist

export ARROW_SOURCE_DIR=${arrow_dir}
export ARROW_TEST_DATA=${arrow_dir}/testing/data
Expand Down Expand Up @@ -54,4 +55,14 @@ export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_S3

# Testing PyArrow C++
if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
pushd ${test_dir}
ctest \
--output-on-failure \
--parallel ${n_jobs} \
--timeout 300
popd
fi
# Testing PyArrow
pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_WITH_S3=${ARROW_S3}
export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
export ARROW_HOME=${build_dir}/install
# PyArrow build configuration
export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
# Set PyArrow version explicitly
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION}
export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_WITH_S3=${ARROW_S3}
export ARROW_HOME=/tmp/arrow-dist
# PyArrow build configuration
export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig

Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/FindArrowPython.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ find_package(Arrow ${find_package_arguments})

if(ARROW_FOUND)
arrow_find_package(ARROW_PYTHON
"${ARROW_HOME}"
"${PYARROW_CPP_HOME}"
arrow_python
arrow/python/api.h
ArrowPython
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/FindArrowPythonFlight.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ find_package(ArrowPython ${find_package_arguments})

if(ARROW_PYTHON_FOUND AND ARROW_FLIGHT_FOUND)
arrow_find_package(ARROW_PYTHON_FLIGHT
"${ARROW_HOME}"
"${PYARROW_CPP_HOME}"
arrow_python_flight
arrow/python/flight.h
ArrowPythonFlight
Expand Down
4 changes: 0 additions & 4 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -805,10 +805,6 @@ if(ARROW_ORC)
add_subdirectory(adapters/orc)
endif()

if(ARROW_PYTHON)
add_subdirectory(python)
endif()

if(ARROW_TENSORFLOW)
add_subdirectory(adapters/tensorflow)
endif()
4 changes: 0 additions & 4 deletions cpp/src/arrow/public_api_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@
#include "arrow/json/api.h" // IWYU pragma: keep
#endif

#ifdef ARROW_PYTHON
#include "arrow/python/api.h" // IWYU pragma: keep
#endif

#ifdef DCHECK
#error "DCHECK should not be visible from Arrow public headers."
#endif
Expand Down
208 changes: 0 additions & 208 deletions cpp/src/arrow/python/CMakeLists.txt

This file was deleted.

7 changes: 7 additions & 0 deletions dev/release/01-prepare-test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@ def test_version_pre_tag
"+set(MLARROW_VERSION \"#{@release_version}\")"],
],
},
{
path: "python/pyarrow/src/CMakeLists.txt",
hunks: [
["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")",
"+set(ARROW_PYTHON_VERSION \"#{@release_version}\")"],
],
},
{
path: "python/setup.py",
hunks: [
Expand Down
7 changes: 7 additions & 0 deletions dev/release/post-11-bump-versions-test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,13 @@ def test_version_post_tag
"+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
],
},
{
path: "python/pyarrow/src/CMakeLists.txt",
hunks: [
["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")",
"+set(ARROW_PYTHON_VERSION \"#{@next_snapshot_version}\")"],
],
},
{
path: "python/setup.py",
hunks: [
Expand Down
4 changes: 0 additions & 4 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install
dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install
dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install
dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install
Expand Down
8 changes: 8 additions & 0 deletions dev/release/utils-prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ update_versions() {
git add setup.py
popd

pushd "${ARROW_DIR}/python/pyarrow/src"
sed -i.bak -E -e \
"s/^set\(ARROW_PYTHON_VERSION \".+\"\)/set(ARROW_PYTHON_VERSION \"${version}\")/" \
CMakeLists.txt
rm -f CMakeLists.txt.bak
git add CMakeLists.txt
popd

pushd "${ARROW_DIR}/r"
sed -i.bak -E -e \
"s/^Version: .+/Version: ${r_version}/" \
Expand Down
7 changes: 0 additions & 7 deletions dev/release/verify-apt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,6 @@ ruby -r gi -e "p GI.load('ArrowFlightSQL')"
echo "::endgroup::"


if [ "${have_python}" = "yes" ]; then
echo "::group::Test libarrow-python"
${APT_INSTALL} libarrow-python-dev=${package_version}
echo "::endgroup::"
fi


if [ "${have_plasma}" = "yes" ]; then
echo "::group::Test Plasma"
${APT_INSTALL} libplasma-glib-dev=${package_version}
Expand Down
6 changes: 0 additions & 6 deletions dev/release/verify-yum.sh
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,6 @@ if [ "${have_flight}" = "yes" ]; then
echo "::endgroup::"
fi

if [ "${have_python}" = "yes" ]; then
echo "::group::Test libarrow-python"
${install_command} --enablerepo=epel arrow-python-devel-${package_version}
echo "::endgroup::"
fi

echo "::group::Test Plasma"
if [ "${have_glib}" = "yes" ]; then
${install_command} --enablerepo=epel plasma-glib-devel-${package_version}
Expand Down
Loading

0 comments on commit b832853

Please sign in to comment.