forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_arrow.sh
executable file
·168 lines (142 loc) · 4.73 KB
/
build_arrow.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/bin/bash
set -x
# Cause the script to exit if a single command fails.
set -e
TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)/../
# setup env
if [[ -z "$1" ]]; then
PYTHON_EXECUTABLE=`which python`
else
PYTHON_EXECUTABLE=$1
fi
echo "Using Python executable $PYTHON_EXECUTABLE."
LANGUAGE="python"
if [[ -n "$2" ]]; then
LANGUAGE=$2
fi
echo "Build language is $LANGUAGE."
unamestr="$(uname)"
if [[ "$unamestr" == "Linux" ]]; then
FLATBUFFERS_HOME=$TP_DIR/pkg/flatbuffers
else
FLATBUFFERS_HOME=""
fi
# Determine how many parallel jobs to use for make based on the number of cores
if [[ "$unamestr" == "Linux" ]]; then
PARALLEL=$(nproc)
elif [[ "$unamestr" == "Darwin" ]]; then
PARALLEL=$(sysctl -n hw.ncpu)
echo "Platform is macosx."
else
echo "Unrecognized platform."
exit 1
fi
# The PR for this commit is https://github.com/apache/arrow/pull/2953. We
# include the link here to make it easier to find the right commit because
# Arrow often rewrites git history and invalidates certain commits.
TARGET_COMMIT_ID=d48dce2cfebdbd044a8260d0a77f5fe3d89a4a2d
build_arrow() {
echo "building arrow"
# Make sure arrow will be built again when building ray for java later than python
if [[ "$LANGUAGE" == "java" ]]; then
rm -rf $TP_DIR/build/arrow/cpp/build/CMakeCache.txt
fi
if [[ ! -d $TP_DIR/build/arrow ]]; then
git clone https://github.com/apache/arrow.git "$TP_DIR/build/arrow"
fi
if ! [ -x "$(command -v bison)" ]; then
echo 'Error: bison is not installed.' >&2
exit 1
fi
if ! [ -x "$(command -v flex)" ]; then
echo 'Error: flex is not installed.' >&2
exit 1
fi
pushd $TP_DIR/build/arrow
git fetch origin master
git checkout $TARGET_COMMIT_ID
cd cpp
if [ ! -d "build" ]; then
mkdir build
fi
cd build
BUILD_ARROW_PLASMA_JAVA_CLIENT=off
if [[ "$LANGUAGE" == "java" ]]; then
BUILD_ARROW_PLASMA_JAVA_CLIENT=on
fi
# Clean the build cache for arrow and parquet, in case the error of "Cannot find Parquet" occurs.
rm -rf $TP_DIR/build/arrow/python/build/temp*
ARROW_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install
BOOST_ROOT=$TP_DIR/pkg/boost \
FLATBUFFERS_HOME=$FLATBUFFERS_HOME \
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-g -O3" \
-DCMAKE_CXX_FLAGS="-g -O3" \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_BUILD_TESTS=off \
-DARROW_HDFS=on \
-DARROW_BOOST_USE_SHARED=off \
-DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
-DARROW_PYTHON=on \
-DARROW_PLASMA=on \
-DARROW_TENSORFLOW=on \
-DARROW_JEMALLOC=off \
-DARROW_WITH_BROTLI=off \
-DARROW_WITH_LZ4=off \
-DARROW_WITH_ZLIB=off \
-DARROW_WITH_ZSTD=off \
-DARROW_PLASMA_JAVA_CLIENT=$BUILD_ARROW_PLASMA_JAVA_CLIENT \
..
make VERBOSE=1 -j$PARALLEL
make install
if [[ -d $ARROW_HOME/lib64 ]]; then
# On CentOS, Arrow gets installed under lib64 instead of lib, so copy it for
# now. TODO(rkn): A preferable solution would be to add both directories to
# the PKG_CONFIG_PATH, but that didn't seem to work.
cp -r $ARROW_HOME/lib64 $ARROW_HOME/lib
fi
bash "$TP_DIR/scripts/build_parquet.sh"
echo "installing pyarrow"
cd $TP_DIR/build/arrow/python
# We set PKG_CONFIG_PATH, which is important so that in cmake, pkg-config can
# find plasma.
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_WITH_TENSORFLOW=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
$PYTHON_EXECUTABLE setup.py build
PKG_CONFIG_PATH=$ARROW_HOME/lib/pkgconfig \
PYARROW_WITH_PLASMA=1 \
PYARROW_WITH_TENSORFLOW=1 \
PYARROW_BUNDLE_ARROW_CPP=1 \
PARQUET_HOME=$TP_DIR/pkg/arrow/cpp/build/cpp-install \
PYARROW_WITH_PARQUET=1 \
$PYTHON_EXECUTABLE setup.py build_ext
# Find the pyarrow directory that was just built and copy it to ray/python/ray/
# so that pyarrow can be packaged along with ray.
pushd $TP_DIR/build/arrow/python/build
PYARROW_BUILD_LIB_DIR="$TP_DIR/build/arrow/python/build/$(find ./ -maxdepth 1 -type d -print | grep -m1 'lib')"
popd
echo "copying pyarrow files from $PYARROW_BUILD_LIB_DIR/pyarrow"
cp -r $PYARROW_BUILD_LIB_DIR/pyarrow $TP_DIR/../python/ray/pyarrow_files/
popd
}
# Download and compile arrow if it isn't already present or the commit-id mismatches.
if [[ ! -d $TP_DIR/../python/ray/pyarrow_files/pyarrow ]] || \
[[ "$LANGUAGE" == "java" && ! -f $TP_DIR/build/arrow/cpp/build/release/libplasma_java.dylib ]]; then
build_arrow
else
REBUILD=off
pushd $TP_DIR/build/arrow
if [[ "$TARGET_COMMIT_ID" != `git rev-parse HEAD` ]]; then
# TARGET_COMMIT_ID may change to later commit.
echo "Commit ID mismatches."
git fetch origin master
git checkout $TARGET_COMMIT_ID
REBUILD=on
fi
popd
if [[ "$REBUILD" == "on" ]]; then
build_arrow
fi
fi