forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_arrow.sh
executable file
·164 lines (140 loc) · 4.77 KB
/
build_arrow.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/bin/bash
set -x
# Cause the script to exit if a single command fails.
set -e
TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)/../
# setup env
if [[ -z "$1" ]]; then
PYTHON_EXECUTABLE=`which python`
else
PYTHON_EXECUTABLE=$1
fi
echo "Using Python executable $PYTHON_EXECUTABLE."
unamestr="$(uname)"
if [[ "$unamestr" == "Linux" ]]; then
FLATBUFFERS_HOME=$TP_DIR/pkg/flatbuffers
else
FLATBUFFERS_HOME=""
fi
# Determine how many parallel jobs to use for make based on the number of cores
if [[ "$unamestr" == "Linux" ]]; then
PARALLEL=$(nproc)
elif [[ "$unamestr" == "Darwin" ]]; then
PARALLEL=$(sysctl -n hw.ncpu)
echo "Platform is macosx."
else
echo "Unrecognized platform."
exit 1
fi
# The PR for this commit is https://github.com/apache/arrow/pull/2482. We
# include the link here to make it easier to find the right commit because
# Arrow often rewrites git history and invalidates certain commits.
TARGET_COMMIT_ID=927bd34aaad875e82beca2584d5d777839fa8bb0
build_arrow() {
echo "building arrow"
# Make sure arrow will be built again when building ray for java later than python
if [[ "$RAY_BUILD_JAVA" == "YES" ]]; then
rm -rf $TP_DIR/build/arrow/cpp/build/CMakeCache.txt
fi
if [[ ! -d $TP_DIR/build/arrow/.git ]]; then
if [[ -d $TP_DIR/build/arrow ]]; then
rm -rf $TP_DIR/build/arrow
fi
git clone -q https://github.com/apache/arrow.git "$TP_DIR/build/arrow"
fi
if ! [ -x "$(command -v bison)" ]; then
echo 'Error: bison is not installed.' >&2
exit 1
fi
if ! [ -x "$(command -v flex)" ]; then
echo 'Error: flex is not installed.' >&2
exit 1
fi
pushd $TP_DIR/build/arrow
git fetch origin master
git checkout $TARGET_COMMIT_ID
cd cpp
if [ ! -d "build" ]; then
mkdir build
fi
cd build
BUILD_ARROW_PLASMA_JAVA_CLIENT=off
if [[ "$RAY_BUILD_JAVA" == "YES" ]]; then
BUILD_ARROW_PLASMA_JAVA_CLIENT=on
fi
# Clean the build cache for arrow and parquet, in case the error of "Cannot find Parquet" occurs.
rm -rf $TP_DIR/build/arrow/python/build/temp*
ARROW_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install
BOOST_ROOT=$TP_DIR/pkg/boost \
FLATBUFFERS_HOME=$FLATBUFFERS_HOME \
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-g -O3" \
-DCMAKE_CXX_FLAGS="-g -O3" \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_BUILD_TESTS=off \
-DARROW_HDFS=on \
-DARROW_BOOST_USE_SHARED=off \
-DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
-DARROW_PYTHON=on \
-DARROW_PLASMA=on \
-DARROW_TENSORFLOW=on \
-DARROW_JEMALLOC=off \
-DARROW_WITH_BROTLI=off \
-DARROW_WITH_LZ4=off \
-DARROW_WITH_ZLIB=off \
-DARROW_WITH_ZSTD=off \
-DARROW_PLASMA_JAVA_CLIENT=$BUILD_ARROW_PLASMA_JAVA_CLIENT \
..
make VERBOSE=1 -j$PARALLEL
make install
if [[ -d $ARROW_HOME/lib64 ]]; then
# On CentOS, Arrow gets installed under lib64 instead of lib, so copy it for
# now. TODO(rkn): A preferable solution would be to add both directories to
# the PKG_CONFIG_PATH, but that didn't seem to work.
cp -r $ARROW_HOME/lib64 $ARROW_HOME/lib
fi
bash "$TP_DIR/scripts/build_parquet.sh"
echo "installing pyarrow"
cd $TP_DIR/build/arrow/python
# We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can
# find plasma.
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_WITH_TENSORFLOW=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
$PYTHON_EXECUTABLE setup.py build
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_WITH_TENSORFLOW=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
PARQUET_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install \
PYARROW_WITH_PARQUET=1 \
$PYTHON_EXECUTABLE setup.py build_ext
# Find the pyarrow directory that was just built and copy it to ray/python/ray/
# so that pyarrow can be packaged along with ray.
pushd $TP_DIR/build/arrow/python/build
PYARROW_BUILD_LIB_DIR="$TP_DIR/build/arrow/python/build/$(find ./ -maxdepth 1 -type d -print | grep -m1 'lib')"
popd
echo "copying pyarrow files from $PYARROW_BUILD_LIB_DIR/pyarrow"
cp -r $PYARROW_BUILD_LIB_DIR/pyarrow $TP_DIR/../python/ray/pyarrow_files/
popd
}
# Download and compile arrow if it isn't already present or the commit-id mismatches.
if [[ "$RAY_BUILD_PYTHON" == "YES" && ! -d $TP_DIR/../python/ray/pyarrow_files/pyarrow ]] || \
[[ "$RAY_BUILD_JAVA" == "YES" && ! -f $TP_DIR/build/arrow/cpp/build/release/libplasma_java.dylib ]]; then
build_arrow
else
REBUILD=off
pushd $TP_DIR/build/arrow
if [[ "$TARGET_COMMIT_ID" != `git rev-parse HEAD` ]]; then
# TARGET_COMMIT_ID may change to later commit.
echo "Commit ID mismatches."
git fetch origin master
git checkout $TARGET_COMMIT_ID
REBUILD=on
fi
popd
if [[ "$REBUILD" == "on" ]]; then
build_arrow
fi
fi