Skip to content

Commit

Permalink
Changes to build to fix creation of wheels. (ray-project#840)
Browse files Browse the repository at this point in the history
* Pass DPYTHON_EXECUTABLE into cmake for arrow and for ray.

* Add cython to setup.py install_requires.

* Revert custom code for finding python in cmake.

* Correctly find arrow on CentOS.

* In cmake, don't find PythonLibs, just find PYTHON_INCLUDE_DIRS.

* Fix typo.

* Do not use boost shared libraries when building arrow.

* Add six to the setup.py install_requires because it is needed by pyarrow.

* Don't link numbuf against boost_system and boost_filesystem.

* Compile boost when we are on Linux.

* Make numbuf find the correct boost libraries.

* Only use find_package Boost on Linux, suppress output when building boost.

* Changes to wheel building scripts, install cython in mac script.

* Compile flatbuffers ourselves on Linux and pass it in when compiling Arrow.

* Clean up build_flatbuffers.sh and build_boost.sh scripts a little.

* Install cython when building linux wheel.
  • Loading branch information
robertnishihara authored and pcmoritz committed Aug 22, 2017
1 parent af71f96 commit be4beb1
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 60 deletions.
19 changes: 8 additions & 11 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,24 @@ popd
bash "$ROOT_DIR/src/thirdparty/download_thirdparty.sh"
bash "$ROOT_DIR/src/thirdparty/build_thirdparty.sh" $PYTHON_EXECUTABLE

# Get the directory of the Python executable.
PYTHON_EXECUTABLE_DIR=$(dirname $PYTHON_EXECUTABLE)

# Now build everything.
pushd "$ROOT_DIR/python/ray/core"
# We use these variables to set PKG_CONFIG_PATH, which is important so that
# in cmake, pkg-config can find plasma.
TP_DIR=$ROOT_DIR/src/thirdparty
ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install
if [[ "$VALGRIND" = "1" ]]; then
# Pass a slightly different path into this command so that cmake finds the
# right Python interpreter and libraries.
PATH=$PYTHON_EXECUTABLE_DIR:$PATH \
BOOST_ROOT=$TP_DIR/boost \
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
cmake -DCMAKE_BUILD_TYPE=Debug ../../..
cmake -DCMAKE_BUILD_TYPE=Debug \
-DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
../../..
else
# Pass a slightly different path into this command so that cmake finds the
# right Python interpreter and libraries.
PATH=$PYTHON_EXECUTABLE_DIR:$PATH \
BOOST_ROOT=$TP_DIR/boost \
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
cmake -DCMAKE_BUILD_TYPE=Release ../../..
cmake -DCMAKE_BUILD_TYPE=Release \
-DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
../../..
fi
make clean
make -j${PARALLEL}
Expand Down
6 changes: 3 additions & 3 deletions cmake/Modules/FindNumPy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)

if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND")
execute_process(COMMAND "${CUSTOM_PYTHON_EXECUTABLE}" "-c"
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
Expand All @@ -42,7 +42,7 @@ if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND
endif()
endif()
else()
message(STATUS "To find NumPy Python executable is required to be found.")
message(STATUS "To find NumPy Python interpreter is required to be found.")
endif()

include(FindPackageHandleStandardArgs)
Expand Down
15 changes: 7 additions & 8 deletions python/build-wheel-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,20 @@ for ((i=0; i<${#PY_VERSIONS[@]}; ++i)); do
curl $MACPYTHON_URL/$PY_VERSION/$PY_INST > $INST_PATH
sudo installer -pkg $INST_PATH -target /

# Create a link from "python" to the actual Python executable so that the
# Python on the path that Ray finds is the correct version.
if [ ! -e $MACPYTHON_PY_PREFIX/$PY_MM/bin/python ]; then
ln -s $MACPYTHON_PY_PREFIX/$PY_MM/bin/python$PY_MM $MACPYTHON_PY_PREFIX/$PY_MM/bin/python
fi
PYTHON_EXE=$MACPYTHON_PY_PREFIX/$PY_MM/bin/python
PYTHON_EXE=$MACPYTHON_PY_PREFIX/$PY_MM/bin/python$PY_MM
PIP_CMD="$(dirname $PYTHON_EXE)/pip$PY_MM"

pushd python
# Install setuptools_scm because otherwise when building the wheel for
# Python 3.6, we see an error.
$PIP_CMD install setuptools_scm
# Fix the numpy version because this will be the oldest numpy version we can
# support.
$PIP_CMD install numpy==1.10.4
$PIP_CMD install numpy==1.10.4 cython
# Install wheel to avoid the error "invalid command 'bdist_wheel'".
$PIP_CMD install wheel
# Add the correct Python to the path and build the wheel.
# Add the correct Python to the path and build the wheel. This is only
# needed so that the installation finds the cython executable.
PATH=$MACPYTHON_PY_PREFIX/$PY_MM/bin:$PATH $PYTHON_EXE setup.py bdist_wheel
mv dist/*.whl ../.whl/
popd
Expand Down
4 changes: 2 additions & 2 deletions python/build-wheel-manylinux1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cat << EOF > "/usr/bin/nproc"
#!/bin/bash
echo 1
echo 10
EOF
chmod +x /usr/bin/nproc

Expand All @@ -15,7 +15,7 @@ for PYTHON in cp27-cp27mu cp33-cp33m cp34-cp34m cp35-cp35m cp36-cp36m; do
pushd python
# Fix the numpy version because this will be the oldest numpy version we can
# support.
/opt/python/${PYTHON}/bin/pip install numpy==1.10.4
/opt/python/${PYTHON}/bin/pip install numpy==1.10.4 cython
PATH=/opt/python/${PYTHON}/bin:$PATH /opt/python/${PYTHON}/bin/python setup.py bdist_wheel
# In the future, run auditwheel here.
mv dist/*.whl ../.whl/
Expand Down
3 changes: 3 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,15 @@ def has_ext_modules(self):
# The BinaryDistribution argument triggers build_ext.
distclass=BinaryDistribution,
install_requires=["numpy",
"cython",
"funcsigs",
"click",
"colorama",
"psutil",
"redis",
"cloudpickle >= 0.2.2",
# The six module is required by pyarrow.
"six >= 1.0.0",
"flatbuffers"],
entry_points={"console_scripts": ["ray=ray.scripts.scripts:main"]},
include_package_data=True,
Expand Down
25 changes: 3 additions & 22 deletions src/common/cmake/Common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,14 @@ include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
set(CMAKE_C_FLAGS "-g -Wall -Wextra -Werror=implicit-function-declaration -Wno-sign-compare -Wno-unused-parameter -Wno-type-limits -Wno-missing-field-initializers --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -std=c99")

# Code for finding Python
find_package(PythonInterp REQUIRED)

message(STATUS "Trying custom approach for finding Python.")
# Start off by figuring out which Python executable to use.
find_program(CUSTOM_PYTHON_EXECUTABLE python)
message(STATUS "Found Python program: ${CUSTOM_PYTHON_EXECUTABLE}")
execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "import sys; print('python' + sys.version[0:3])"
OUTPUT_VARIABLE PYTHON_LIBRARY_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "PYTHON_LIBRARY_NAME: " ${PYTHON_LIBRARY_NAME})
# Now find the Python include directories.
execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_inc())"
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_inc())"
OUTPUT_VARIABLE PYTHON_INCLUDE_DIRS OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS})

# If we found the Python libraries and the include directories, then continue
# on. If not, then try find_package as a last resort, but it probably won't
# work.
if(PYTHON_INCLUDE_DIRS)
message(STATUS "The custom approach for finding Python succeeded.")
SET(PYTHONLIBS_FOUND TRUE)
else()
message(WARNING "The custom approach for finding Python failed. Defaulting to find_package.")
find_package(PythonInterp REQUIRED)
find_package(PythonLibs ${PYTHON_VERSION_STRING} EXACT)
set(CUSTOM_PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
endif()

message(STATUS "Using CUSTOM_PYTHON_EXECUTABLE: " ${CUSTOM_PYTHON_EXECUTABLE})
message(STATUS "Using PYTHON_EXECUTABLE: " ${PYTHON_EXECUTABLE})
message(STATUS "Using PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS})

# Common libraries
Expand Down
5 changes: 4 additions & 1 deletion src/numbuf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ add_library(numbuf SHARED
if(APPLE)
target_link_libraries(numbuf "-undefined dynamic_lookup" ${ARROW_LIB} ${ARROW_PYTHON_LIB} -lpthread)
else()
target_link_libraries(numbuf -Wl,--whole-archive ${ARROW_LIB} -Wl,--no-whole-archive ${ARROW_PYTHON_LIB} -lpthread -lboost_system -lboost_filesystem)
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost 1.60.0 COMPONENTS filesystem system)
message(STATUS "Using Boost_LIBRARIES: ${Boost_LIBRARIES}")
target_link_libraries(numbuf -Wl,--whole-archive ${ARROW_LIB} -Wl,--no-whole-archive ${ARROW_PYTHON_LIB} -lpthread ${Boost_LIBRARIES})
endif()

if(HAS_PLASMA)
Expand Down
6 changes: 3 additions & 3 deletions src/numbuf/cmake/Modules/FindNumPy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)

if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND")
execute_process(COMMAND "${CUSTOM_PYTHON_EXECUTABLE}" "-c"
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
Expand All @@ -42,7 +42,7 @@ if(NOT "${CUSTOM_PYTHON_EXECUTABLE}" STREQUAL "CUSTOM_PYTHON_EXECUTABLE-NOTFOUND
endif()
endif()
else()
message(STATUS "To find NumPy Python executable is required to be found.")
message(STATUS "To find NumPy Python interpreter is required to be found.")
endif()

include(FindPackageHandleStandardArgs)
Expand Down
21 changes: 21 additions & 0 deletions src/thirdparty/build_boost.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

set -x

# Cause the script to exit if a single command fails.
set -e

TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

# Download and compile boost if it isn't already present.
if [ ! -d $TP_DIR/boost ]; then
wget --no-check-certificate http://downloads.sourceforge.net/project/boost/boost/1.60.0/boost_1_60_0.tar.gz -O $TP_DIR/boost_1_60_0.tar.gz
tar xf $TP_DIR/boost_1_60_0.tar.gz -C $TP_DIR/
rm -rf $TP_DIR/boost_1_60_0.tar.gz

# Compile boost.
pushd $TP_DIR/boost_1_60_0
./bootstrap.sh
./bjam cxxflags=-fPIC cflags=-fPIC --prefix=$TP_DIR/boost --with-filesystem --with-system install > /dev/null
popd
fi
27 changes: 27 additions & 0 deletions src/thirdparty/build_flatbuffers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

set -x

# Cause the script to exit if a single command fails.
set -e

TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

FLATBUFFERS_VERSION=1.7.1

# Download and compile flatbuffers if it isn't already present.
if [ ! -d $TP_DIR/flatbuffers ]; then
echo "building flatbuffers"
wget https://github.com/google/flatbuffers/archive/v$FLATBUFFERS_VERSION.tar.gz -O flatbuffers-$FLATBUFFERS_VERSION.tar.gz
tar xf flatbuffers-$FLATBUFFERS_VERSION.tar.gz
rm -rf flatbuffers-$FLATBUFFERS_VERSION.tar.gz

# Compile flatbuffers.
pushd flatbuffers-$FLATBUFFERS_VERSION
cmake -DCMAKE_CXX_FLAGS=-fPIC \
-DCMAKE_INSTALL_PREFIX:PATH=$TP_DIR/flatbuffers \
-DFLATBUFFERS_BUILD_TESTS=OFF
make -j5
make install
popd
fi
46 changes: 36 additions & 10 deletions src/thirdparty/build_thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,38 @@ else
exit 1
fi

# If we're on Linux, then compile boost. This installs boost to $TP_DIR/boost.
if [[ "$unamestr" == "Linux" ]]; then
echo "building boost"
bash "$TP_DIR/build_boost.sh"
fi

# If we're on Linux, then compile flatbuffers. This installs flatbuffers to
# $TP_DIR/flatbuffers.
if [[ "$unamestr" == "Linux" ]]; then
echo "building flatbuffers"
bash "$TP_DIR/build_flatbuffers.sh"
FLATBUFFERS_HOME=$TP_DIR/flatbuffers
else
FLATBUFFERS_HOME=""
fi

echo "building arrow"
cd $TP_DIR/arrow/cpp
mkdir -p $TP_DIR/arrow/cpp/build
cd $TP_DIR/arrow/cpp/build
export ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install

# Get the directory of the Python executable.
PYTHON_EXECUTABLE_DIR=$(dirname $PYTHON_EXECUTABLE)
ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install

# Pass a slightly different path into this command so that cmake finds the right
# Python interpreter and libraries.
PATH=$PYTHON_EXECUTABLE_DIR:$PATH \
BOOST_ROOT=$TP_DIR/boost \
FLATBUFFERS_HOME=$FLATBUFFERS_HOME \
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-g -O3" \
-DCMAKE_CXX_FLAGS="-g -O3" \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_BUILD_TESTS=off \
-DARROW_HDFS=on \
-DARROW_BOOST_USE_SHARED=off \
-DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
-DARROW_PYTHON=on \
-DARROW_PLASMA=on \
-DPLASMA_PYTHON=on \
Expand All @@ -57,13 +71,25 @@ cmake -DCMAKE_BUILD_TYPE=Release \
make VERBOSE=1 -j$PARALLEL
make install

if [[ -d $ARROW_HOME/lib64 ]]; then
# On CentOS, Arrow gets installed under lib64 instead of lib, so copy it for
# now. TODO(rkn): A preferable solution would be to add both directories to
# the PKG_CONFIG_PATH, but that didn't seem to work.
cp -r $ARROW_HOME/lib64 $ARROW_HOME/lib
fi

echo "installing pyarrow"
cd $TP_DIR/arrow/python
# We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can
# find plasma.
ARROW_HOME=$TP_DIR/arrow/cpp/build/cpp-install
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig PYARROW_WITH_PLASMA=1 PYARROW_BUNDLE_ARROW_CPP=1 $PYTHON_EXECUTABLE setup.py build_ext
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
$PYTHON_EXECUTABLE setup.py build
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
$PYTHON_EXECUTABLE setup.py build_ext
# Find the pyarrow directory that was just built and copy it to ray/python/ray/
# so that pyarrow can be packaged along with ray. TODO(rkn): This doesn't seem
# very robust. Fix this.
Expand Down

0 comments on commit be4beb1

Please sign in to comment.