Skip to content

Commit

Permalink
Split CI tests in half and run them in parallel (#7867)
Browse files Browse the repository at this point in the history
* Split and run tests in parallel

* Refactor tests
  • Loading branch information
yf225 authored and ezyang committed May 30, 2018
1 parent 8e6cd43 commit d102f9e
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 95 deletions.
3 changes: 3 additions & 0 deletions .jenkins/pytorch/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ if ([[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7.2*
export MAX_JOBS=`expr $(nproc) - 1`
fi

# Target only our CI GPU machine's CUDA arch to speed up the build
export TORCH_CUDA_ARCH_LIST=5.2

WERROR=1 python setup.py install

# Add the test binaries so that they won't be git clean'ed away
Expand Down
47 changes: 6 additions & 41 deletions .jenkins/pytorch/macos-build-test.sh
Original file line number Diff line number Diff line change
@@ -1,44 +1,9 @@
#!/bin/bash

COMPACT_JOB_NAME=pytorch-macos-10.13-py3-build-test
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-build* ]]; then
source "$(dirname "${BASH_SOURCE[0]}")/macos-build.sh"
fi

# Set up conda environment
curl https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o $PWD/miniconda3.sh
rm -rf $PWD/miniconda3
bash $PWD/miniconda3.sh -b -p $PWD/miniconda3
export PATH="$PWD/miniconda3/bin:$PATH"
source $PWD/miniconda3/bin/activate
conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja

git submodule update --init --recursive
export CMAKE_PREFIX_PATH=$PWD/miniconda3/

# Build and test PyTorch
export MACOSX_DEPLOYMENT_TARGET=10.9
export CXX=clang++
export CC=clang
# If we run too many parallel jobs, we will OOM
export MAX_JOBS=2
python setup.py install
echo "Ninja version: $(ninja --version)"
python test/run_test.py --verbose

# C++ API

# NB: Install outside of source directory (at the same level as the root
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
# But still clean it before we perform our own build.
#
CPP_BUILD="$PWD/../cpp-build"
rm -rf $CPP_BUILD
mkdir -p $CPP_BUILD
WERROR=1 VERBOSE=1 tools/cpp_build/build_all.sh "$CPP_BUILD"

python tools/download_mnist.py --quiet -d test/cpp/api/mnist

# Unfortunately it seems like the test can't load from miniconda3
# without these paths being set
export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
"$CPP_BUILD"/libtorch/bin/test_api
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test* ]]; then
source "$(dirname "${BASH_SOURCE[0]}")/macos-test.sh"
fi
37 changes: 37 additions & 0 deletions .jenkins/pytorch/macos-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

COMPACT_JOB_NAME=pytorch-macos-10.13-py3-build-test
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

export PATH="/usr/local/bin:$PATH"

# Set up conda environment
export PYTORCH_ENV_DIR="${HOME}/pytorch-ci-env"
# If a local installation of conda doesn't exist, we download and install conda
if [ ! -d "${PYTORCH_ENV_DIR}/miniconda3" ]; then
mkdir -p ${PYTORCH_ENV_DIR}
curl https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${PYTORCH_ENV_DIR}/miniconda3.sh
bash ${PYTORCH_ENV_DIR}/miniconda3.sh -b -p ${PYTORCH_ENV_DIR}/miniconda3
fi
export PATH="${PYTORCH_ENV_DIR}/miniconda3/bin:$PATH"
source ${PYTORCH_ENV_DIR}/miniconda3/bin/activate
conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja
rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch

git submodule update --init --recursive
export CMAKE_PREFIX_PATH=${PYTORCH_ENV_DIR}/miniconda3/

# Build and test PyTorch
export MACOSX_DEPLOYMENT_TARGET=10.9
export CXX=clang++
export CC=clang
# If we run too many parallel jobs, we will OOM
export MAX_JOBS=2

export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}

python setup.py install

# Upload torch binaries when the build job is finished
7z a ${IMAGE_COMMIT_TAG}.7z ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch
aws s3 cp ${IMAGE_COMMIT_TAG}.7z s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z --acl public-read
73 changes: 73 additions & 0 deletions .jenkins/pytorch/macos-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

COMPACT_JOB_NAME=pytorch-macos-10.13-py3-build-test
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

export PATH="/usr/local/bin:$PATH"

# Set up conda environment
export PYTORCH_ENV_DIR="${HOME}/pytorch-ci-env"
# If a local installation of conda doesn't exist, we download and install conda
if [ ! -d "${PYTORCH_ENV_DIR}/miniconda3" ]; then
mkdir -p ${PYTORCH_ENV_DIR}
curl https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${PYTORCH_ENV_DIR}/miniconda3.sh
bash ${PYTORCH_ENV_DIR}/miniconda3.sh -b -p ${PYTORCH_ENV_DIR}/miniconda3
fi
export PATH="${PYTORCH_ENV_DIR}/miniconda3/bin:$PATH"
source ${PYTORCH_ENV_DIR}/miniconda3/bin/activate
conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja
rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch

git submodule update --init --recursive
export CMAKE_PREFIX_PATH=${PYTORCH_ENV_DIR}/miniconda3/

# Build and test PyTorch
export MACOSX_DEPLOYMENT_TARGET=10.9
export CXX=clang++
export CC=clang
# If we run too many parallel jobs, we will OOM
export MAX_JOBS=2

export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID}

# Download torch binaries in the test jobs
rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch
aws s3 cp s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z ${IMAGE_COMMIT_TAG}.7z
7z x ${IMAGE_COMMIT_TAG}.7z -o"${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages"

test_python_all() {
echo "Ninja version: $(ninja --version)"
python test/run_test.py --verbose
}

test_cpp_api() {
# C++ API

# NB: Install outside of source directory (at the same level as the root
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
# But still clean it before we perform our own build.
#
CPP_BUILD="$PWD/../cpp-build"
rm -rf $CPP_BUILD
mkdir -p $CPP_BUILD
WERROR=1 VERBOSE=1 tools/cpp_build/build_all.sh "$CPP_BUILD"

python tools/download_mnist.py --quiet -d test/cpp/api/mnist

# Unfortunately it seems like the test can't load from miniconda3
# without these paths being set
export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
"$CPP_BUILD"/libtorch/bin/test_api
}

if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
test_python_all
test_cpp_api
else
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
test_python_all
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
test_cpp_api
fi
fi
101 changes: 65 additions & 36 deletions .jenkins/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,44 +34,73 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
(cd test && ! python -c "import torch; torch._C._crash_if_aten_asan(3)")
fi

time python test/run_test.py --verbose
test_python_nn() {
time python test/run_test.py --include nn --verbose
}

# Test ATen
if [[ "$BUILD_ENVIRONMENT" != *asan* ]]; then
echo "Running ATen tests with pytorch lib"
TORCH_LIB_PATH=$(python -c "import site; print(site.getsitepackages()[0])")/torch/lib
# NB: the ATen test binaries don't have RPATH set, so it's necessary to
# put the dynamic libraries somewhere were the dynamic linker can find them.
# This is a bit of a hack.
ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
ls build/bin
aten/tools/run_tests.sh build/bin
fi
test_python_all_except_nn() {
time python test/run_test.py --exclude nn --verbose
}

rm -rf ninja
test_aten() {
# Test ATen
if [[ "$BUILD_ENVIRONMENT" != *asan* ]]; then
echo "Running ATen tests with pytorch lib"
TORCH_LIB_PATH=$(python -c "import site; print(site.getsitepackages()[0])")/torch/lib
# NB: the ATen test binaries don't have RPATH set, so it's necessary to
# put the dynamic libraries somewhere were the dynamic linker can find them.
# This is a bit of a hack.
ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
ls build/bin
aten/tools/run_tests.sh build/bin
fi
}

echo "Installing torchvision at branch master"
rm -rf vision
# TODO: This git clone is bad, it means pushes to torchvision can break
# PyTorch CI
git clone https://github.com/pytorch/vision --quiet
pushd vision
# python setup.py install with a tqdm dependency is broken in the
# Travis Python nightly (but not in latest Python nightlies, so
# this should be a transient requirement...)
# See https://github.com/pytorch/pytorch/issues/7525
#time python setup.py install
pip install .
popd
test_torchvision() {
rm -rf ninja

echo "Installing torchvision at branch master"
rm -rf vision
# TODO: This git clone is bad, it means pushes to torchvision can break
# PyTorch CI
git clone https://github.com/pytorch/vision --quiet
pushd vision
# python setup.py install with a tqdm dependency is broken in the
# Travis Python nightly (but not in latest Python nightlies, so
# this should be a transient requirement...)
# See https://github.com/pytorch/pytorch/issues/7525
#time python setup.py install
pip install .
popd
}

test_libtorch() {
if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
echo "Testing libtorch with NO_PYTHON"
CPP_BUILD="$PWD/../cpp-build"
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
"$CPP_BUILD"/libtorch/bin/test_jit
else
"$CPP_BUILD"/libtorch/bin/test_jit "[cpu]"
fi
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
OMP_NUM_THREADS=2 "$CPP_BUILD"/libtorch/bin/test_api
fi
}

if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
echo "Testing libtorch with NO_PYTHON"
CPP_BUILD="$PWD/../cpp-build"
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
"$CPP_BUILD"/libtorch/bin/test_jit
else
"$CPP_BUILD"/libtorch/bin/test_jit "[cpu]"
fi
python tools/download_mnist.py --quiet -d test/cpp/api/mnist
OMP_NUM_THREADS=2 "$CPP_BUILD"/libtorch/bin/test_api
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
test_python_nn
test_python_all_except_nn
test_aten
test_torchvision
test_libtorch
else
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
test_python_nn
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
test_python_all_except_nn
test_aten
test_torchvision
test_libtorch
fi
fi
1 change: 1 addition & 0 deletions .jenkins/pytorch/win-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ set CUDNN_LIB_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\l
set CUDA_TOOLKIT_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
:: Target only our CI GPU machine's CUDA arch to speed up the build
set TORCH_CUDA_ARCH_LIST=5.2
sccache --stop-server || set ERRORLEVEL=0
Expand Down
44 changes: 26 additions & 18 deletions .jenkins/pytorch/win-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,7 @@ except botocore.exceptions.ClientError as e:
EOL

cat >ci_scripts/delete_image.py << EOL
import os
import boto3
IMAGE_COMMIT_TAG = os.getenv('IMAGE_COMMIT_TAG')
session = boto3.session.Session()
s3 = session.resource('s3')
BUCKET_NAME = 'ossci-windows-build'
KEY = 'pytorch/'+IMAGE_COMMIT_TAG+'.7z'
s3.Object(BUCKET_NAME, KEY).delete()
EOL

cat >ci_scripts/test_pytorch.bat <<EOL
cat >ci_scripts/setup_pytorch_env.bat <<EOL
set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\curl-7.57.0-win64-mingw\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH%
Expand Down Expand Up @@ -78,8 +63,31 @@ cd test/
python ..\\ci_scripts\\download_image.py %IMAGE_COMMIT_TAG%.7z
7z x %IMAGE_COMMIT_TAG%.7z
python run_test.py --verbose && python ..\\ci_scripts\\delete_image.py
cd ..
EOL

cat >ci_scripts/test_python_nn.bat <<EOL
call ci_scripts/setup_pytorch_env.bat
cd test/ && python run_test.py --include nn --verbose && cd ..
EOL

cat >ci_scripts/test_python_all_except_nn.bat <<EOL
call ci_scripts/setup_pytorch_env.bat
cd test/ && python run_test.py --exclude nn --verbose && cd ..
EOL

ci_scripts/test_pytorch.bat && echo "TEST PASSED"
run_tests() {
if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then
ci_scripts/test_python_nn.bat && ci_scripts/test_python_all_except_nn.bat
else
if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then
ci_scripts/test_python_nn.bat
elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then
ci_scripts/test_python_all_except_nn.bat
fi
fi
}

run_tests && echo "TEST PASSED"

0 comments on commit d102f9e

Please sign in to comment.