From 8fa6cf947d0b00095a0e35b3d4545239e340103e Mon Sep 17 00:00:00 2001 From: Aljoscha Krettek Date: Wed, 8 Nov 2017 18:57:22 +0100 Subject: [PATCH] [hotfix] Make end-to-end test scripts more robust This uses traps to ensure that we properly do cleanups, remove config values and shutdown things. --- test-infra/end-to-end-test/common.sh | 31 ++++++++++---- .../end-to-end-test/test_batch_wordcount.sh | 20 +-------- .../end-to-end-test/test_shaded_hadoop_s3a.sh | 42 +++++++------------ .../end-to-end-test/test_shaded_presto_s3.sh | 36 ++++++---------- .../test_streaming_classloader.sh | 22 +--------- .../test_streaming_kafka010.sh | 34 ++++++--------- tools/travis_mvn_watchdog.sh | 10 ++--- 7 files changed, 75 insertions(+), 120 deletions(-) diff --git a/test-infra/end-to-end-test/common.sh b/test-infra/end-to-end-test/common.sh index cc31c90a15914..35d1d7e7639ed 100644 --- a/test-infra/end-to-end-test/common.sh +++ b/test-infra/end-to-end-test/common.sh @@ -17,16 +17,28 @@ # limitations under the License. ################################################################################ -set -e set -o pipefail -export FLINK_DIR="$1" -export CLUSTER_MODE="$2" +if [[ -z $FLINK_DIR ]]; then + echo "FLINK_DIR needs to point to a Flink distribution directory" + exit 1 +fi +if [[ -z $CLUSTER_MODE ]]; then + echo "CLUSTER_MODE needs to be one of local or cluster." + exit 1 +fi export PASS=1 echo "Flink dist directory: $FLINK_DIR" +TEST_ROOT=`pwd` +TEST_INFRA_DIR="$0" +TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` +cd $TEST_INFRA_DIR +TEST_INFRA_DIR=`pwd` +cd $TEST_ROOT + # used to randomize created directories export TEST_DATA_DIR=$TEST_INFRA_DIR/temp-test-directory-$(date +%S%N) echo "TEST_DATA_DIR: $TEST_DATA_DIR" @@ -73,6 +85,7 @@ function stop_cluster { | grep -v "AskTimeoutException" \ | grep -v "WARN akka.remote.transport.netty.NettyTransport" \ | grep -v "WARN org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline" \ + | grep -v "jvm-exit-on-fatal-error" \ | grep -iq "error"; then echo "Found error in log files:" cat $FLINK_DIR/log/* @@ -132,10 +145,6 @@ function check_all_pass { echo "All tests PASS" } -function clean_data_dir { - rm -r $TEST_DATA_DIR -} - function s3_put { local_file=$1 bucket=$2 @@ -172,3 +181,11 @@ function s3_delete { -H "Authorization: AWS ${s3Key}:${signature}" \ https://${bucket}.s3.amazonaws.com/${s3_file} } + +# make sure to clean up even in case of failures +function cleanup { + stop_cluster + rm -r $TEST_DATA_DIR + check_all_pass +} +trap cleanup EXIT diff --git a/test-infra/end-to-end-test/test_batch_wordcount.sh b/test-infra/end-to-end-test/test_batch_wordcount.sh index dfde5c6b692cc..2c9a17597ca65 100755 --- a/test-infra/end-to-end-test/test_batch_wordcount.sh +++ b/test-infra/end-to-end-test/test_batch_wordcount.sh @@ -17,25 +17,9 @@ # limitations under the License. ################################################################################ - -set -e -set -o pipefail - -# Convert relative path to absolute path -TEST_ROOT=`pwd` -TEST_INFRA_DIR="$0" -TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` -cd $TEST_INFRA_DIR -TEST_INFRA_DIR=`pwd` -cd $TEST_ROOT - -. "$TEST_INFRA_DIR"/common.sh +source "$(dirname "$0")"/common.sh start_cluster $FLINK_DIR/bin/flink run -p 1 $FLINK_DIR/examples/batch/WordCount.jar --input $TEST_INFRA_DIR/test-data/words --output $TEST_DATA_DIR/out/wc_out -check_result_hash "WordCount" $TEST_DATA_DIR/out/wc_out "72a690412be8928ba239c2da967328a5" - -stop_cluster -clean_data_dir -check_all_pass \ No newline at end of file +check_result_hash "WordCount" $TEST_DATA_DIR/out/wc_out "72a690412be8928ba239c2da967328a5" \ No newline at end of file diff --git a/test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh b/test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh index 90bf73b6ed599..a989488faccaa 100755 --- a/test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh +++ b/test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh @@ -17,6 +17,8 @@ # limitations under the License. ################################################################################ +# Tests for our shaded/bundled Hadoop S3A file system. + if [[ -z "$ARTIFACTS_AWS_BUCKET" ]]; then echo "Did not find AWS environment variables, NOT running Shaded Hadoop S3A e2e tests." exit 0 @@ -24,22 +26,22 @@ else echo "Found AWS bucket $ARTIFACTS_AWS_BUCKET, running Shaded Hadoop S3A e2e tests." fi -# Tests for our shaded/bundled Hadoop S3A file system. +source "$(dirname "$0")"/common.sh -set -e -set -o pipefail - -# Convert relative path to absolute path -TEST_ROOT=`pwd` -TEST_INFRA_DIR="$0" -TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` -cd $TEST_INFRA_DIR -TEST_INFRA_DIR=`pwd` -cd $TEST_ROOT +s3_put $TEST_INFRA_DIR/test-data/words $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-s3a +# make sure we delete the file at the end +function s3_cleanup { + s3_delete $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-s3a + rm $FLINK_DIR/lib/flink-s3-fs*.jar -. "$TEST_INFRA_DIR"/common.sh + # remove any leftover settings + sed -i -e 's/s3.access-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" + sed -i -e 's/s3.secret-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" -s3_put $TEST_INFRA_DIR/test-data/words $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-s3a + # make sure to run regular cleanup as well + cleanup +} +trap s3_cleanup EXIT cp $FLINK_DIR/opt/flink-s3-fs-hadoop-*.jar $FLINK_DIR/lib/ echo "s3.access-key: $ARTIFACTS_AWS_ACCESS_KEY" >> "$FLINK_DIR/conf/flink-conf.yaml" @@ -49,16 +51,4 @@ start_cluster $FLINK_DIR/bin/flink run -p 1 $FLINK_DIR/examples/batch/WordCount.jar --input s3:/$resource --output $TEST_DATA_DIR/out/wc_out -check_result_hash "WordCountWithShadedS3A" $TEST_DATA_DIR/out/wc_out "72a690412be8928ba239c2da967328a5" - -# remove any leftover settings -sed -i -e 's/s3.access-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" -sed -i -e 's/s3.secret-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" - -rm $FLINK_DIR/lib/flink-s3-fs*.jar - -s3_delete $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-s3a - -stop_cluster -clean_data_dir -check_all_pass +check_result_hash "WordCountWithShadedS3A" $TEST_DATA_DIR/out/wc_out "72a690412be8928ba239c2da967328a5" \ No newline at end of file diff --git a/test-infra/end-to-end-test/test_shaded_presto_s3.sh b/test-infra/end-to-end-test/test_shaded_presto_s3.sh index 6ded115cfcb16..bca7649b07048 100755 --- a/test-infra/end-to-end-test/test_shaded_presto_s3.sh +++ b/test-infra/end-to-end-test/test_shaded_presto_s3.sh @@ -17,6 +17,8 @@ # limitations under the License. ################################################################################ +# Tests for our shaded/bundled Hadoop S3A file system. + if [[ -z "$ARTIFACTS_AWS_BUCKET" ]]; then echo "Did not find AWS environment variables, NOT running Shaded Presto S3 e2e tests." exit 0 @@ -24,22 +26,18 @@ else echo "Found AWS bucket $ARTIFACTS_AWS_BUCKET, running Shaded Presto S3 e2e tests." fi -# Tests for our shaded/bundled Hadoop S3A file system. - -set -e -set -o pipefail - -# Convert relative path to absolute path -TEST_ROOT=`pwd` -TEST_INFRA_DIR="$0" -TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` -cd $TEST_INFRA_DIR -TEST_INFRA_DIR=`pwd` -cd $TEST_ROOT - -. "$TEST_INFRA_DIR"/common.sh +source "$(dirname "$0")"/common.sh s3_put $TEST_INFRA_DIR/test-data/words $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-presto-s3 +# make sure we delete the file at the end +function s3_cleanup { + s3_delete $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-s3a + rm $FLINK_DIR/lib/flink-s3-fs*.jar + + # make sure to run regular cleanup as well + cleanup +} +trap s3_cleanup EXIT cp $FLINK_DIR/opt/flink-s3-fs-presto-*.jar $FLINK_DIR/lib/ echo "s3.access-key: $ARTIFACTS_AWS_ACCESS_KEY" >> "$FLINK_DIR/conf/flink-conf.yaml" @@ -53,12 +51,4 @@ check_result_hash "WordCountWithShadedPrestoS3" $TEST_DATA_DIR/out/wc_out "72a69 # remove any leftover settings sed -i -e 's/s3.access-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" -sed -i -e 's/s3.secret-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" - -rm $FLINK_DIR/lib/flink-s3-fs*.jar - -s3_delete $ARTIFACTS_AWS_BUCKET flink-end-to-end-test-shaded-presto-s3 - -stop_cluster -clean_data_dir -check_all_pass +sed -i -e 's/s3.secret-key: .*//' "$FLINK_DIR/conf/flink-conf.yaml" \ No newline at end of file diff --git a/test-infra/end-to-end-test/test_streaming_classloader.sh b/test-infra/end-to-end-test/test_streaming_classloader.sh index 8bc6858517d20..95c58f8a1f584 100755 --- a/test-infra/end-to-end-test/test_streaming_classloader.sh +++ b/test-infra/end-to-end-test/test_streaming_classloader.sh @@ -17,25 +17,10 @@ # limitations under the License. ################################################################################ - -set -e -set -o pipefail - -# Convert relative path to absolute path -TEST_ROOT=`pwd` -TEST_INFRA_DIR="$0" -TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` -cd $TEST_INFRA_DIR -TEST_INFRA_DIR=`pwd` -cd $TEST_ROOT - -. "$TEST_INFRA_DIR"/common.sh +source "$(dirname "$0")"/common.sh TEST_PROGRAM_JAR=$TEST_INFRA_DIR/../../flink-end-to-end-tests/target/ClassLoaderTestProgram.jar -# kill any remaining JobManagers/TaskManagers at the end -trap 'pkill -f "JobManager|TaskManager"' EXIT - echo "Testing parent-first class loading" # retrieve git.remote.origin.url from .version.properties @@ -126,7 +111,4 @@ if [[ "$OUTPUT" != "$EXPECTED" ]]; then echo -e "EXPECTED: $EXPECTED" echo -e "ACTUAL: $OUTPUT" PASS="" -fi - -clean_data_dir -check_all_pass +fi \ No newline at end of file diff --git a/test-infra/end-to-end-test/test_streaming_kafka010.sh b/test-infra/end-to-end-test/test_streaming_kafka010.sh index cce8db4399592..1324e5aead3d0 100755 --- a/test-infra/end-to-end-test/test_streaming_kafka010.sh +++ b/test-infra/end-to-end-test/test_streaming_kafka010.sh @@ -17,19 +17,7 @@ # limitations under the License. ################################################################################ -set -e -set -o pipefail - -# Convert relative path to absolute path -TEST_ROOT=`pwd` -TEST_INFRA_DIR="$0" -TEST_INFRA_DIR=`dirname "$TEST_INFRA_DIR"` -cd $TEST_INFRA_DIR -TEST_INFRA_DIR=`pwd` -cd $TEST_ROOT - -. "$TEST_INFRA_DIR"/common.sh - +source "$(dirname "$0")"/common.sh start_cluster @@ -54,6 +42,17 @@ sed -i -e "s+^\(log\.dirs\s*=\s*\).*$+\1$TEST_DATA_DIR/kafka+" $KAFKA_DIR/config $KAFKA_DIR/bin/zookeeper-server-start.sh -daemon $KAFKA_DIR/config/zookeeper.properties $KAFKA_DIR/bin/kafka-server-start.sh -daemon $KAFKA_DIR/config/server.properties +# make sure to stop Kafka and ZooKeeper at the end + +function kafka_cleanup { + $KAFKA_DIR/bin/kafka-server-stop.sh + $KAFKA_DIR/bin/zookeeper-server-stop.sh + + # make sure to run regular cleanup as well + cleanup +} +trap kafka_cleanup EXIT + # zookeeper outputs the "Node does not exist" bit to stderr while [[ $($KAFKA_DIR/bin/zookeeper-shell.sh localhost:2181 get /brokers/ids/0 2>&1) =~ .*Node\ does\ not\ exist.* ]]; do echo "Waiting for broker..." @@ -82,11 +81,4 @@ if [[ "$DATA_FROM_KAFKA" != "$EXPECTED" ]]; then echo -e "EXPECTED: --$EXPECTED--" echo -e "ACTUAL: --$DATA_FROM_KAFKA--" PASS="" -fi - -$KAFKA_DIR/bin/kafka-server-stop.sh -$KAFKA_DIR/bin/zookeeper-server-stop.sh - -stop_cluster -clean_data_dir -check_all_pass +fi \ No newline at end of file diff --git a/tools/travis_mvn_watchdog.sh b/tools/travis_mvn_watchdog.sh index 0417cd35e9346..8e315fbf6f341 100755 --- a/tools/travis_mvn_watchdog.sh +++ b/tools/travis_mvn_watchdog.sh @@ -530,31 +530,31 @@ case $TEST in printf "\n==============================================================================\n" printf "Running Wordcount end-to-end test\n" printf "==============================================================================\n" - test-infra/end-to-end-test/test_batch_wordcount.sh build-target cluster + FLINK_DIR=build-target CLUSTER_MODE=cluster test-infra/end-to-end-test/test_batch_wordcount.sh EXIT_CODE=$(($EXIT_CODE+$?)) printf "\n==============================================================================\n" printf "Running Kafka end-to-end test\n" printf "==============================================================================\n" - test-infra/end-to-end-test/test_streaming_kafka010.sh build-target cluster + FLINK_DIR=build-target CLUSTER_MODE=cluster test-infra/end-to-end-test/test_streaming_kafka010.sh EXIT_CODE=$(($EXIT_CODE+$?)) printf "\n==============================================================================\n" printf "Running class loading end-to-end test\n" printf "==============================================================================\n" - test-infra/end-to-end-test/test_streaming_classloader.sh build-target cluster + FLINK_DIR=build-target CLUSTER_MODE=cluster test-infra/end-to-end-test/test_streaming_classloader.sh EXIT_CODE=$(($EXIT_CODE+$?)) printf "\n==============================================================================\n" printf "Running Shaded Hadoop S3A end-to-end test\n" printf "==============================================================================\n" - test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh build-target cluster + FLINK_DIR=build-target CLUSTER_MODE=cluster test-infra/end-to-end-test/test_shaded_hadoop_s3a.sh EXIT_CODE=$(($EXIT_CODE+$?)) printf "\n==============================================================================\n" printf "Running Shaded Presto S3 end-to-end test\n" printf "==============================================================================\n" - test-infra/end-to-end-test/test_shaded_presto_s3.sh build-target cluster + FLINK_DIR=build-target CLUSTER_MODE=cluster test-infra/end-to-end-test/test_shaded_presto_s3.sh EXIT_CODE=$(($EXIT_CODE+$?)) else printf "\n==============================================================================\n"