Skip to content

Commit

Permalink
ARROW-7717: [CI] Have nightly integration test for Spark's latest rel…
Browse files Browse the repository at this point in the history
…ease

Closes apache#6316 from kszucs/spark-2

Authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Krisztián Szűcs <[email protected]>
  • Loading branch information
kszucs committed Jul 2, 2020
1 parent bcbb3e2 commit 0d789ac
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 8 deletions.
5 changes: 3 additions & 2 deletions ci/docker/conda-python-spark.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ RUN conda install -q \
# installing specific version of spark
ARG spark=master
COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark /arrow/ci/etc
RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark

# build cpp with tests
ENV CC=gcc \
CXX=g++ \
ARROW_PYTHON=ON \
ARROW_HDFS=ON \
ARROW_BUILD_TESTS=OFF
ARROW_BUILD_TESTS=OFF \
SPARK_VERSION=${spark}
9 changes: 4 additions & 5 deletions ci/scripts/install_spark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@

set -e

if [ "$#" -ne 3 ]; then
echo "Usage: $0 <spark version> <target directory> <patches directory>"
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <spark version> <target directory>"
exit 1
fi

spark=$1
target=$2
patches=${3}

mkdir "${target}"
wget -q -O - https://github.com/apache/spark/archive/${spark}.tar.gz | tar -xzf - --strip-components=1 -C "${target}"
git clone https://github.com/apache/spark "${target}"
git -C "${target}" checkout "${spark}"
6 changes: 6 additions & 0 deletions ci/scripts/integration_spark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ set -eu

source_dir=${1}
spark_dir=${2}
spark_version=${SPARK_VERSION:-master}

if [ "${SPARK_VERSION:0:2}" == "2." ]; then
# https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x
export ARROW_PRE_0_15_IPC_FORMAT=1
fi

pushd ${source_dir}/java
arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
Expand Down
12 changes: 11 additions & 1 deletion dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1917,12 +1917,22 @@ tasks:
HDFS: 2.9.2
run: conda-python-hdfs

test-conda-python-3.7-spark-master:
test-conda-python-3.7-spark-branch-3.0:
ci: github
template: docker-tests/github.linux.yml
params:
env:
PYTHON: 3.7
SPARK: "branch-3.0"
# use the master branch of spark, so prevent reusing any layers
run: --no-leaf-cache conda-python-spark

test-conda-python-3.8-spark-master:
ci: github
template: docker-tests/github.linux.yml
params:
env:
PYTHON: 3.8
SPARK: master
# use the master branch of spark, so prevent reusing any layers
run: --no-leaf-cache conda-python-spark
Expand Down

0 comments on commit 0d789ac

Please sign in to comment.