From be4beb19c11072a211b9a91bea0ee445e0719dd6 Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Mon, 21 Aug 2017 17:49:35 -0700 Subject: [PATCH] Changes to build to fix creation of wheels. (#840) * Pass DPYTHON_EXECUTABLE into cmake for arrow and for ray. * Add cython to setup.py install_requires. * Revert custom code for finding python in cmake. * Correctly find arrow on CentOS. * In cmake, don't find PythonLibs, just find PYTHON_INCLUDE_DIRS. * Fix typo. * Do not use boost shared libraries when building arrow. * Add six to the setup.py install_requires because it is needed by pyarrow. * Don't link numbuf against boost_system and boost_filesystem. * Compile boost when we are on Linux. * Make numbuf find the correct boost libraries. * Only use find_package Boost on Linux, suppress output when building boost. * Changes to wheel building scripts, install cython in mac script. * Compile flatbuffers ourselves on Linux and pass it in when compiling Arrow. * Clean up build_flatbuffers.sh and build_boost.sh scripts a little. * Install cython when building linux wheel. --- build.sh | 19 +++++----- cmake/Modules/FindNumPy.cmake | 6 ++-- python/build-wheel-macos.sh | 15 ++++---- python/build-wheel-manylinux1.sh | 4 +-- python/setup.py | 3 ++ src/common/cmake/Common.cmake | 25 ++----------- src/numbuf/CMakeLists.txt | 5 ++- src/numbuf/cmake/Modules/FindNumPy.cmake | 6 ++-- src/thirdparty/build_boost.sh | 21 +++++++++++ src/thirdparty/build_flatbuffers.sh | 27 ++++++++++++++ src/thirdparty/build_thirdparty.sh | 46 ++++++++++++++++++------ 11 files changed, 117 insertions(+), 60 deletions(-) create mode 100755 src/thirdparty/build_boost.sh create mode 100644 src/thirdparty/build_flatbuffers.sh diff --git a/build.sh b/build.sh index d9bc39f9d043..b5e5a2b707e8 100755 --- a/build.sh +++ b/build.sh @@ -32,9 +32,6 @@ popd bash "$ROOT_DIR/src/thirdparty/download_thirdparty.sh" bash "$ROOT_DIR/src/thirdparty/build_thirdparty.sh" $PYTHON_EXECUTABLE -# Get the directory of the Python executable. -PYTHON_EXECUTABLE_DIR=$(dirname $PYTHON_EXECUTABLE) - # Now build everything. pushd "$ROOT_DIR/python/ray/core" # We use these variables to set PKG_CONFIG_PATH, which is important so that @@ -42,17 +39,17 @@ pushd "$ROOT_DIR/python/ray/core" TP_DIR=$ROOT_DIR/src/thirdparty ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install if [[ "$VALGRIND" = "1" ]]; then - # Pass a slightly different path into this command so that cmake finds the - # right Python interpreter and libraries. - PATH=$PYTHON_EXECUTABLE_DIR:$PATH \ + BOOST_ROOT=$TP_DIR/boost \ PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \ - cmake -DCMAKE_BUILD_TYPE=Debug ../../.. + cmake -DCMAKE_BUILD_TYPE=Debug \ + -DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \ + ../../.. else - # Pass a slightly different path into this command so that cmake finds the - # right Python interpreter and libraries. - PATH=$PYTHON_EXECUTABLE_DIR:$PATH \ + BOOST_ROOT=$TP_DIR/boost \ PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \ - cmake -DCMAKE_BUILD_TYPE=Release ../../.. + cmake -DCMAKE_BUILD_TYPE=Release \ + -DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \ + ../../.. fi make clean make -j${PARALLEL} diff --git a/cmake/Modules/FindNumPy.cmake b/cmake/Modules/FindNumPy.cmake index 0b9fb3e5ccee..037dbdb5fdbd 100644 --- a/cmake/Modules/FindNumPy.cmake +++ b/cmake/Modules/FindNumPy.cmake @@ -14,8 +14,8 @@ unset(NUMPY_VERSION) unset(NUMPY_INCLUDE_DIR) -if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND") - execute_process(COMMAND "${CUSTOM_PYTHON_EXECUTABLE}" "-c" +if(PYTHONINTERP_FOUND) + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import numpy as n; print(n.__version__); print(n.get_include());" RESULT_VARIABLE __result OUTPUT_VARIABLE __output @@ -42,7 +42,7 @@ if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND endif() endif() else() - message(STATUS "To find NumPy Python executable is required to be found.") + message(STATUS "To find NumPy Python interpreter is required to be found.") endif() include(FindPackageHandleStandardArgs) diff --git a/python/build-wheel-macos.sh b/python/build-wheel-macos.sh index 14b1f0c29151..cce1e509f505 100755 --- a/python/build-wheel-macos.sh +++ b/python/build-wheel-macos.sh @@ -44,21 +44,20 @@ for ((i=0; i<${#PY_VERSIONS[@]}; ++i)); do curl $MACPYTHON_URL/$PY_VERSION/$PY_INST > $INST_PATH sudo installer -pkg $INST_PATH -target / - # Create a link from "python" to the actual Python executable so that the - # Python on the path that Ray finds is the correct version. - if [ ! -e $MACPYTHON_PY_PREFIX/$PY_MM/bin/python ]; then - ln -s $MACPYTHON_PY_PREFIX/$PY_MM/bin/python$PY_MM $MACPYTHON_PY_PREFIX/$PY_MM/bin/python - fi - PYTHON_EXE=$MACPYTHON_PY_PREFIX/$PY_MM/bin/python + PYTHON_EXE=$MACPYTHON_PY_PREFIX/$PY_MM/bin/python$PY_MM PIP_CMD="$(dirname $PYTHON_EXE)/pip$PY_MM" pushd python + # Install setuptools_scm because otherwise when building the wheel for + # Python 3.6, we see an error. + $PIP_CMD install setuptools_scm # Fix the numpy version because this will be the oldest numpy version we can # support. - $PIP_CMD install numpy==1.10.4 + $PIP_CMD install numpy==1.10.4 cython # Install wheel to avoid the error "invalid command 'bdist_wheel'". $PIP_CMD install wheel - # Add the correct Python to the path and build the wheel. + # Add the correct Python to the path and build the wheel. This is only + # needed so that the installation finds the cython executable. PATH=$MACPYTHON_PY_PREFIX/$PY_MM/bin:$PATH $PYTHON_EXE setup.py bdist_wheel mv dist/*.whl ../.whl/ popd diff --git a/python/build-wheel-manylinux1.sh b/python/build-wheel-manylinux1.sh index 254916940e8d..e79b8ef4e1ba 100755 --- a/python/build-wheel-manylinux1.sh +++ b/python/build-wheel-manylinux1.sh @@ -2,7 +2,7 @@ cat << EOF > "/usr/bin/nproc" #!/bin/bash -echo 1 +echo 10 EOF chmod +x /usr/bin/nproc @@ -15,7 +15,7 @@ for PYTHON in cp27-cp27mu cp33-cp33m cp34-cp34m cp35-cp35m cp36-cp36m; do pushd python # Fix the numpy version because this will be the oldest numpy version we can # support. - /opt/python/${PYTHON}/bin/pip install numpy==1.10.4 + /opt/python/${PYTHON}/bin/pip install numpy==1.10.4 cython PATH=/opt/python/${PYTHON}/bin:$PATH /opt/python/${PYTHON}/bin/python setup.py bdist_wheel # In the future, run auditwheel here. mv dist/*.whl ../.whl/ diff --git a/python/setup.py b/python/setup.py index 372b2da06ab9..cf0f8cecd785 100644 --- a/python/setup.py +++ b/python/setup.py @@ -81,12 +81,15 @@ def has_ext_modules(self): # The BinaryDistribution argument triggers build_ext. distclass=BinaryDistribution, install_requires=["numpy", + "cython", "funcsigs", "click", "colorama", "psutil", "redis", "cloudpickle >= 0.2.2", + # The six module is required by pyarrow. + "six >= 1.0.0", "flatbuffers"], entry_points={"console_scripts": ["ray=ray.scripts.scripts:main"]}, include_package_data=True, diff --git a/src/common/cmake/Common.cmake b/src/common/cmake/Common.cmake index 17c498eff700..63c03441a94b 100644 --- a/src/common/cmake/Common.cmake +++ b/src/common/cmake/Common.cmake @@ -31,33 +31,14 @@ include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) set(CMAKE_C_FLAGS "-g -Wall -Wextra -Werror=implicit-function-declaration -Wno-sign-compare -Wno-unused-parameter -Wno-type-limits -Wno-missing-field-initializers --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -std=c99") # Code for finding Python +find_package(PythonInterp REQUIRED) -message(STATUS "Trying custom approach for finding Python.") -# Start off by figuring out which Python executable to use. -find_program(CUSTOM_PYTHON_EXECUTABLE python) -message(STATUS "Found Python program: ${CUSTOM_PYTHON_EXECUTABLE}") -execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "import sys; print('python' + sys.version[0:3])" - OUTPUT_VARIABLE PYTHON_LIBRARY_NAME OUTPUT_STRIP_TRAILING_WHITESPACE) -message(STATUS "PYTHON_LIBRARY_NAME: " ${PYTHON_LIBRARY_NAME}) # Now find the Python include directories. -execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_inc())" +execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_inc())" OUTPUT_VARIABLE PYTHON_INCLUDE_DIRS OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS}) -# If we found the Python libraries and the include directories, then continue -# on. If not, then try find_package as a last resort, but it probably won't -# work. -if(PYTHON_INCLUDE_DIRS) - message(STATUS "The custom approach for finding Python succeeded.") - SET(PYTHONLIBS_FOUND TRUE) -else() - message(WARNING "The custom approach for finding Python failed. Defaulting to find_package.") - find_package(PythonInterp REQUIRED) - find_package(PythonLibs ${PYTHON_VERSION_STRING} EXACT) - set(CUSTOM_PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) -endif() - -message(STATUS "Using CUSTOM_PYTHON_EXECUTABLE: " ${CUSTOM_PYTHON_EXECUTABLE}) +message(STATUS "Using PYTHON_EXECUTABLE: " ${PYTHON_EXECUTABLE}) message(STATUS "Using PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS}) # Common libraries diff --git a/src/numbuf/CMakeLists.txt b/src/numbuf/CMakeLists.txt index 239e2042c609..2666bf0a207b 100644 --- a/src/numbuf/CMakeLists.txt +++ b/src/numbuf/CMakeLists.txt @@ -57,7 +57,10 @@ add_library(numbuf SHARED if(APPLE) target_link_libraries(numbuf "-undefined dynamic_lookup" ${ARROW_LIB} ${ARROW_PYTHON_LIB} -lpthread) else() - target_link_libraries(numbuf -Wl,--whole-archive ${ARROW_LIB} -Wl,--no-whole-archive ${ARROW_PYTHON_LIB} -lpthread -lboost_system -lboost_filesystem) + set(Boost_USE_STATIC_LIBS ON) + find_package(Boost 1.60.0 COMPONENTS filesystem system) + message(STATUS "Using Boost_LIBRARIES: ${Boost_LIBRARIES}") + target_link_libraries(numbuf -Wl,--whole-archive ${ARROW_LIB} -Wl,--no-whole-archive ${ARROW_PYTHON_LIB} -lpthread ${Boost_LIBRARIES}) endif() if(HAS_PLASMA) diff --git a/src/numbuf/cmake/Modules/FindNumPy.cmake b/src/numbuf/cmake/Modules/FindNumPy.cmake index 0b9fb3e5ccee..037dbdb5fdbd 100644 --- a/src/numbuf/cmake/Modules/FindNumPy.cmake +++ b/src/numbuf/cmake/Modules/FindNumPy.cmake @@ -14,8 +14,8 @@ unset(NUMPY_VERSION) unset(NUMPY_INCLUDE_DIR) -if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND") - execute_process(COMMAND "${CUSTOM_PYTHON_EXECUTABLE}" "-c" +if(PYTHONINTERP_FOUND) + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import numpy as n; print(n.__version__); print(n.get_include());" RESULT_VARIABLE __result OUTPUT_VARIABLE __output @@ -42,7 +42,7 @@ if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND endif() endif() else() - message(STATUS "To find NumPy Python executable is required to be found.") + message(STATUS "To find NumPy Python interpreter is required to be found.") endif() include(FindPackageHandleStandardArgs) diff --git a/src/thirdparty/build_boost.sh b/src/thirdparty/build_boost.sh new file mode 100755 index 000000000000..595d3cf742e5 --- /dev/null +++ b/src/thirdparty/build_boost.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -x + +# Cause the script to exit if a single command fails. +set -e + +TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) + +# Download and compile boost if it isn't already present. +if [ ! -d $TP_DIR/boost ]; then + wget --no-check-certificate http://downloads.sourceforge.net/project/boost/boost/1.60.0/boost_1_60_0.tar.gz -O $TP_DIR/boost_1_60_0.tar.gz + tar xf $TP_DIR/boost_1_60_0.tar.gz -C $TP_DIR/ + rm -rf $TP_DIR/boost_1_60_0.tar.gz + + # Compile boost. + pushd $TP_DIR/boost_1_60_0 + ./bootstrap.sh + ./bjam cxxflags=-fPIC cflags=-fPIC --prefix=$TP_DIR/boost --with-filesystem --with-system install > /dev/null + popd +fi diff --git a/src/thirdparty/build_flatbuffers.sh b/src/thirdparty/build_flatbuffers.sh new file mode 100644 index 000000000000..121cd9e8d73a --- /dev/null +++ b/src/thirdparty/build_flatbuffers.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -x + +# Cause the script to exit if a single command fails. +set -e + +TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) + +FLATBUFFERS_VERSION=1.7.1 + +# Download and compile flatbuffers if it isn't already present. +if [ ! -d $TP_DIR/flatbuffers ]; then + echo "building flatbuffers" + wget https://github.com/google/flatbuffers/archive/v$FLATBUFFERS_VERSION.tar.gz -O flatbuffers-$FLATBUFFERS_VERSION.tar.gz + tar xf flatbuffers-$FLATBUFFERS_VERSION.tar.gz + rm -rf flatbuffers-$FLATBUFFERS_VERSION.tar.gz + + # Compile flatbuffers. + pushd flatbuffers-$FLATBUFFERS_VERSION + cmake -DCMAKE_CXX_FLAGS=-fPIC \ + -DCMAKE_INSTALL_PREFIX:PATH=$TP_DIR/flatbuffers \ + -DFLATBUFFERS_BUILD_TESTS=OFF + make -j5 + make install + popd +fi diff --git a/src/thirdparty/build_thirdparty.sh b/src/thirdparty/build_thirdparty.sh index b60207b9a043..702825516908 100755 --- a/src/thirdparty/build_thirdparty.sh +++ b/src/thirdparty/build_thirdparty.sh @@ -26,24 +26,38 @@ else exit 1 fi +# If we're on Linux, then compile boost. This installs boost to $TP_DIR/boost. +if [[ "$unamestr" == "Linux" ]]; then + echo "building boost" + bash "$TP_DIR/build_boost.sh" +fi + +# If we're on Linux, then compile flatbuffers. This installs flatbuffers to +# $TP_DIR/flatbuffers. +if [[ "$unamestr" == "Linux" ]]; then + echo "building flatbuffers" + bash "$TP_DIR/build_flatbuffers.sh" + FLATBUFFERS_HOME=$TP_DIR/flatbuffers +else + FLATBUFFERS_HOME="" +fi + echo "building arrow" cd $TP_DIR/arrow/cpp mkdir -p $TP_DIR/arrow/cpp/build cd $TP_DIR/arrow/cpp/build -export ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install - -# Get the directory of the Python executable. -PYTHON_EXECUTABLE_DIR=$(dirname $PYTHON_EXECUTABLE) +ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install -# Pass a slightly different path into this command so that cmake finds the right -# Python interpreter and libraries. -PATH=$PYTHON_EXECUTABLE_DIR:$PATH \ +BOOST_ROOT=$TP_DIR/boost \ +FLATBUFFERS_HOME=$FLATBUFFERS_HOME \ cmake -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_FLAGS="-g -O3" \ -DCMAKE_CXX_FLAGS="-g -O3" \ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DARROW_BUILD_TESTS=off \ -DARROW_HDFS=on \ + -DARROW_BOOST_USE_SHARED=off \ + -DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \ -DARROW_PYTHON=on \ -DARROW_PLASMA=on \ -DPLASMA_PYTHON=on \ @@ -57,13 +71,25 @@ cmake -DCMAKE_BUILD_TYPE=Release \ make VERBOSE=1 -j$PARALLEL make install +if [[ -d $ARROW_HOME/lib64 ]]; then + # On CentOS, Arrow gets installed under lib64 instead of lib, so copy it for + # now. TODO(rkn): A preferable solution would be to add both directories to + # the PKG_CONFIG_PATH, but that didn't seem to work. + cp -r $ARROW_HOME/lib64 $ARROW_HOME/lib +fi + echo "installing pyarrow" cd $TP_DIR/arrow/python # We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can # find plasma. -ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install -PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build -PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build_ext +PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \ +PYARROW_WITH_PLASMA=1 \ +PYARROW_BUNDLE_ARROW_CPP=1 \ +$PYTHON_EXECUTABLE setup.py build +PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \ +PYARROW_WITH_PLASMA=1 \ +PYARROW_BUNDLE_ARROW_CPP=1 \ +$PYTHON_EXECUTABLE setup.py build_ext # Find the pyarrow directory that was just built and copy it to ray/python/ray/ # so that pyarrow can be packaged along with ray. TODO(rkn): This doesn't seem # very robust. Fix this.