Skip to content

Commit

Permalink
[build] Make travis logs not as long (ray-project#4213)
Browse files Browse the repository at this point in the history
* clean it up

* Update .travis.yml

* Update .travis.yml

* update

* fix example

* suppress

* timeout

* print periodic progress

* Update suppress_output

* Update run_silent.sh

* Update suppress_output

* Update suppress_output

* manually do timeout

* sleep 300

* fix test

* Update run_silent.sh

* Update suppress_output

* Update .travis.yml
  • Loading branch information
ericl authored Mar 7, 2019
1 parent b9ea821 commit 437459f
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 50 deletions.
48 changes: 24 additions & 24 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ matrix:

# Test Bazel build
- rm -rf build
- ./ci/travis/install-bazel.sh
- ./ci/suppress_output ./ci/travis/install-bazel.sh
- bazel build ...

- os: linux
Expand Down Expand Up @@ -84,10 +84,10 @@ matrix:
- sudo apt-get update -qq
- sudo apt-get install -qq valgrind
install:
- ./ci/travis/install-bazel.sh
- ./ci/travis/install-dependencies.sh
- ./ci/suppress_output ./ci/travis/install-bazel.sh
- ./ci/suppress_output ./ci/travis/install-dependencies.sh
- export PATH="$HOME/miniconda/bin:$PATH"
- ./ci/travis/install-ray.sh
- ./ci/suppress_output ./ci/travis/install-ray.sh

script:
- bash src/ray/test/run_object_manager_valgrind.sh
Expand All @@ -98,11 +98,11 @@ matrix:
# - export RAY_REDIS_SERVER_VALGRIND=1

# # Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
- python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=10 python/ray/experimental/test/async_test.py
- python -m pytest -v --durations=10 python/ray/tests/test_mini.py
- python -m pytest -v --durations=10 python/ray/tests/test_array.py
- python -m pytest -v --durations=10 python/ray/tests/test_multi_node_2.py
- python -m pytest -v --durations=10 python/ray/tests/test_node_manager.py
- python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest --durations=5 python/ray/experimental/test/async_test.py
- python -m pytest --durations=5 python/ray/tests/test_mini.py
- python -m pytest --durations=5 python/ray/tests/test_array.py
- python -m pytest --durations=5 python/ray/tests/test_multi_node_2.py
- python -m pytest --durations=5 python/ray/tests/test_node_manager.py


# Build Linux wheels.
Expand Down Expand Up @@ -138,16 +138,16 @@ matrix:


install:
- ./ci/travis/install-bazel.sh
- ./ci/travis/install-dependencies.sh
- ./ci/suppress_output ./ci/travis/install-bazel.sh
- ./ci/suppress_output ./ci/travis/install-dependencies.sh
- export PATH="$HOME/miniconda/bin:$PATH"
- ./ci/travis/install-ray.sh
- ./ci/travis/install-cython-examples.sh
- ./ci/suppress_output ./ci/travis/install-ray.sh
- ./ci/suppress_output ./ci/travis/install-cython-examples.sh

- bash src/ray/test/run_gcs_tests.sh
- ./ci/suppress_output bash src/ray/test/run_gcs_tests.sh
# Raylet tests.
- bash src/ray/test/run_object_manager_tests.sh
- bazel test --build_tests_only --test_lang_filters=cc ... -c opt
- ./ci/suppress_output bash src/ray/test/run_object_manager_tests.sh
- ./ci/suppress_output bazel test --build_tests_only --test_lang_filters=cc ... -c opt


script:
Expand All @@ -159,20 +159,20 @@ script:
# - export PYTHONPATH="$PYTHONPATH:./ci/"

# ray tune tests
- python python/ray/tune/tests/test_dependency.py
- ./ci/suppress_output python python/ray/tune/tests/test_dependency.py
# `cluster_tests.py` runs on Jenkins, not Travis.
- python -m pytest -v --durations=30 --ignore=python/ray/tune/tests/test_cluster.py python/ray/tune/tests
- python -m pytest --durations=10 --ignore=python/ray/tune/tests/test_cluster.py python/ray/tune/tests

# ray rllib tests
- python python/ray/rllib/tests/test_catalog.py
- python python/ray/rllib/tests/test_filters.py
- python python/ray/rllib/tests/test_optimizers.py
- python python/ray/rllib/tests/test_evaluators.py
- python/ray/rllib/tests/run_silent.sh tests/test_catalog.py
- python/ray/rllib/tests/run_silent.sh tests/test_filters.py
- python/ray/rllib/tests/run_silent.sh tests/test_optimizers.py
- python/ray/rllib/tests/run_silent.sh tests/test_evaluators.py

# ray tests
# Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
- python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=10 python/ray/experimental/test/async_test.py
- python -m pytest -v --durations=30 python/ray/tests
- python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest --durations=5 python/ray/experimental/test/async_test.py
- python -m pytest --durations=10 python/ray/tests
deploy:
- provider: s3
access_key_id: AKIAJ2L7XDUSZVTXI5QA
Expand Down
21 changes: 11 additions & 10 deletions ci/jenkins_tests/run_multi_node_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ SHM_SIZE="20G"
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache)
SUPPRESS_OUTPUT=$ROOT_DIR/../suppress_output
echo "Using Docker image" $DOCKER_SHA

######################## RLLIB TESTS #################################
Expand All @@ -24,39 +25,39 @@ bash $ROOT_DIR/run_tune_tests.sh ${MEMORY_SIZE} ${SHM_SIZE} $DOCKER_SHA

######################## SGD TESTS #################################

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/test_sgd.py --num-iters=2 \
--batch-size=1 --strategy=simple

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/test_sgd.py --num-iters=2 \
--batch-size=1 --strategy=ps

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/test_save_and_restore.py --num-iters=2 \
--batch-size=1 --strategy=simple

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/test_save_and_restore.py --num-iters=2 \
--batch-size=1 --strategy=ps

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/mnist_example.py --num-iters=1 \
--num-workers=1 --devices-per-worker=1 --strategy=ps

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/mnist_example.py --num-iters=1 \
--num-workers=1 --devices-per-worker=1 --strategy=ps --tune

######################## RAY BACKEND TESTS #################################

python3 $ROOT_DIR/multi_node_docker_test.py \
$SUPPRESS_OUTPUT python3 $ROOT_DIR/multi_node_docker_test.py \
--docker-image=$DOCKER_SHA \
--num-nodes=5 \
--num-redis-shards=10 \
--test-script=/ray/ci/jenkins_tests/multi_node_tests/test_0.py

python3 $ROOT_DIR/multi_node_docker_test.py \
$SUPPRESS_OUTPUT python3 $ROOT_DIR/multi_node_docker_test.py \
--docker-image=$DOCKER_SHA \
--num-nodes=5 \
--num-redis-shards=5 \
Expand All @@ -65,15 +66,15 @@ python3 $ROOT_DIR/multi_node_docker_test.py \
--driver-locations=0,1,0,1,2,3,4 \
--test-script=/ray/ci/jenkins_tests/multi_node_tests/remove_driver_test.py

python3 $ROOT_DIR/multi_node_docker_test.py \
$SUPPRESS_OUTPUT python3 $ROOT_DIR/multi_node_docker_test.py \
--docker-image=$DOCKER_SHA \
--num-nodes=5 \
--num-redis-shards=2 \
--num-gpus=0,0,5,6,50 \
--num-drivers=100 \
--test-script=/ray/ci/jenkins_tests/multi_node_tests/many_drivers_test.py

python3 $ROOT_DIR/multi_node_docker_test.py \
$SUPPRESS_OUTPUT python3 $ROOT_DIR/multi_node_docker_test.py \
--docker-image=$DOCKER_SHA \
--num-nodes=1 \
--mem-size=60G \
Expand Down
33 changes: 17 additions & 16 deletions ci/jenkins_tests/run_tune_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SHM_SIZE=$2
DOCKER_SHA=$3

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
SUPPRESS_OUTPUT=$ROOT_DIR/../suppress_output

if [ "$MEMORY_SIZE" == "" ]; then
MEMORY_SIZE="20G"
Expand All @@ -30,46 +31,46 @@ fi

echo "Using Docker image" $DOCKER_SHA

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
pytest /ray/python/ray/tune/tests/test_cluster.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tune_mnist_ray.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/pbt_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/hyperband_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/async_hyperband_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/logging_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/bayesopt_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/hyperopt_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e SIGOPT_KEY $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e SIGOPT_KEY $DOCKER_SHA \
python /ray/python/ray/tune/examples/sigopt_example.py \
--smoke-test

Expand All @@ -78,21 +79,21 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e SIGOPT_KEY $DO
# python3 /ray/python/ray/tune/examples/nevergrad_example.py \
# --smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tune_mnist_keras.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/mnist_pytorch.py --smoke-test --no-cuda

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/mnist_pytorch_trainable.py \
--smoke-test --no-cuda

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/genetic_example.py \
--smoke-test

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/skopt_example.py \
--smoke-test
34 changes: 34 additions & 0 deletions ci/suppress_output
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Run a command, suppressing output unless it hangs or crashes.

TMPFILE=`mktemp`
COMMAND="$@"
PID=$$

# Print output to avoid travis killing us
watchdog() {
for i in `seq 5 5 120`; do
sleep 300
echo "This command has been running for more than $i minutes..."
done
echo "Command timed out after 2h, dumping logs:"
cat $TMPFILE
echo "TIMED OUT"
kill -SIGKILL $PID
}

watchdog & 2>/dev/null
WATCHDOG_PID=$!

time $COMMAND >$TMPFILE 2>&1

CODE=$?
if [ $CODE != 0 ]; then
cat $TMPFILE
echo "FAILED $CODE"
kill $WATCHDOG_PID
exit $CODE
fi

kill $WATCHDOG_PID
exit 0

0 comments on commit 437459f

Please sign in to comment.