diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6227dc256a1d8..6297bcbcd0b14 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,7 +12,7 @@ - Fill out the template below to describe the changes contributed by the pull request. That will give reviewers the context they need to do the review. - - Make sure that the change passes the automated tests, i.e., `mvn clean verify` passes. You can set up Travis CI to do that following [this guide](https://flink.apache.org/contributing/contribute-code.html#open-a-pull-request). + - Make sure that the change passes the automated tests, i.e., `mvn clean verify` passes. You can set up Azure Pipelines CI to do that following [this guide](https://cwiki.apache.org/confluence/display/FLINK/Azure+Pipelines#AzurePipelines-Tutorial:SettingupAzurePipelinesforaforkoftheFlinkrepository). - Each pull request should address only one issue, not mix up code from multiple issues. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 82c0831a32720..fba75edd6deda 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -48,10 +48,11 @@ resources: # to understand why the secrets are handled like this variables: MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository + E2E_CACHE_FOLDER: $(Pipeline.Workspace)/e2e_cache MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/** CACHE_FALLBACK_KEY: maven | $(Agent.OS) - CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache + FLINK_ARTIFACT_DIR: $(Pipeline.Workspace)/flink_artifact SECRET_S3_BUCKET: $[variables.IT_CASE_S3_BUCKET] SECRET_S3_ACCESS_KEY: $[variables.IT_CASE_S3_ACCESS_KEY] SECRET_S3_SECRET_KEY: $[variables.IT_CASE_S3_SECRET_KEY] diff --git a/flink-end-to-end-tests/run-nightly-tests.sh b/flink-end-to-end-tests/run-nightly-tests.sh index f7af3b3305547..69a2ec38a6793 100755 --- a/flink-end-to-end-tests/run-nightly-tests.sh +++ b/flink-end-to-end-tests/run-nightly-tests.sh @@ -45,7 +45,7 @@ if [ ! -z "$TF_BUILD" ] ; then echo "COMPRESSING build artifacts." COMPRESSED_ARCHIVE=${BUILD_BUILDNUMBER}.tgz mkdir compressed-archive-dir - tar -zcvf compressed-archive-dir/${COMPRESSED_ARCHIVE} $ARTIFACTS_DIR + tar -zcvf compressed-archive-dir/${COMPRESSED_ARCHIVE} -C $ARTIFACTS_DIR . echo "##vso[task.setvariable variable=ARTIFACT_DIR]$(pwd)/compressed-archive-dir" } on_exit compress_logs @@ -235,7 +235,7 @@ printf "Running Java end-to-end tests\n" printf "==============================================================================\n" -LOG4J_PROPERTIES=${END_TO_END_DIR}/../tools/ci/log4j-ci.properties +LOG4J_PROPERTIES=${END_TO_END_DIR}/../tools/ci/log4j.properties MVN_LOGGING_OPTIONS="-Dlog.dir=${ARTIFACTS_DIR} -DlogBackupDir=${ARTIFACTS_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES" MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build" @@ -243,7 +243,7 @@ e2e_modules=$(find flink-end-to-end-tests -mindepth 2 -maxdepth 5 -name 'pom.xml e2e_modules="${e2e_modules},$(find flink-walkthroughs -mindepth 2 -maxdepth 2 -name 'pom.xml' -printf '%h\n' | sort -u | tr '\n' ',')" PROFILE="$PROFILE -Pe2e-travis1 -Pe2e-travis2 -Pe2e-travis3 -Pe2e-travis4 -Pe2e-travis5 -Pe2e-travis6" -run_mvn ${MVN_COMMON_OPTIONS} ${MVN_LOGGING_OPTIONS} ${PROFILE} verify -pl ${e2e_modules} -DdistDir=$(readlink -e build-target) +run_mvn ${MVN_COMMON_OPTIONS} ${MVN_LOGGING_OPTIONS} ${PROFILE} verify -pl ${e2e_modules} -DdistDir=$(readlink -e build-target) -Dcache-dir=$E2E_CACHE_FOLDER -Dcache-ttl=P1M EXIT_CODE=$? diff --git a/flink-end-to-end-tests/run-pre-commit-tests.sh b/flink-end-to-end-tests/run-pre-commit-tests.sh deleted file mode 100755 index 94c9b9ef1ea3d..0000000000000 --- a/flink-end-to-end-tests/run-pre-commit-tests.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env bash -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -END_TO_END_DIR="`dirname \"$0\"`" # relative -END_TO_END_DIR="`( cd \"$END_TO_END_DIR\" && pwd -P )`" # absolutized and normalized -if [ -z "$END_TO_END_DIR" ] ; then - # error; for some reason, the path is not accessible - # to the script (e.g. permissions re-evaled after suid) - exit 1 # fail -fi - -export END_TO_END_DIR - -if [ -z "$FLINK_DIR" ] ; then - echo "You have to export the Flink distribution directory as FLINK_DIR" - exit 1 -fi - -source ${END_TO_END_DIR}/test-scripts/test-runner-common.sh - -FLINK_DIR="`( cd \"$FLINK_DIR\" && pwd -P)`" # absolutized and normalized - -echo "flink-end-to-end-test directory: $END_TO_END_DIR" -echo "Flink distribution directory: $FLINK_DIR" - -# Template for adding a test: -# run_test "" "$END_TO_END_DIR/test-scripts/" ["skip_check_exceptions"] - -# IMPORTANT: -# With the "skip_check_exceptions" flag one can disable default exceptions and errors checking in log files. This should be done -# carefully though. A valid reasons for doing so could be e.g killing TMs randomly as we cannot predict what exception could be thrown. Whenever -# those checks are disabled, one should take care that a proper checks are performed in the tests itself that ensure that the test finished -# in an expected state. - -run_test "State Migration end-to-end test from 1.6" "$END_TO_END_DIR/test-scripts/test_state_migration.sh" -run_test "State Evolution end-to-end test" "$END_TO_END_DIR/test-scripts/test_state_evolution.sh" -run_test "Wordcount end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh file" -run_test "Shaded Hadoop S3A end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh hadoop" -run_test "Shaded Hadoop S3A end-to-end test (minio)" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh hadoop_minio" -run_test "Shaded Presto S3 end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh presto" -run_test "Shaded Presto S3 end-to-end test (minio)" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh presto_minio" -run_test "Custom FS plugin end-to-end test" "$END_TO_END_DIR/test-scripts/test_batch_wordcount.sh dummy-fs" - -run_test "Kinesis end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_kinesis.sh" -run_test "class loading end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_classloader.sh" -run_test "Distributed cache end-to-end test" "$END_TO_END_DIR/test-scripts/test_streaming_distributed_cache_via_blob.sh" -printf "\n[PASS] All tests passed\n" -exit 0 diff --git a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java index 4fbf89aeed06b..fca47e0fd8102 100644 --- a/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java +++ b/flink-yarn-tests/src/test/java/org/apache/flink/yarn/YarnTestBase.java @@ -449,7 +449,7 @@ public boolean accept(File dir, String name) { } if (!whitelistedFound) { - // logging in FATAL to see the actual message in TRAVIS tests. + // logging in FATAL to see the actual message in CI tests. Marker fatal = MarkerFactory.getMarker("FATAL"); LOG.error(fatal, "Prohibited String '{}' in '{}:{}'", aProhibited, f.getAbsolutePath(), lineFromFile); @@ -1048,10 +1048,10 @@ public static void teardown() throws Exception { hdfsSiteXML.delete(); } - // When we are on travis, we copy the temp files of JUnit (containing the MiniYARNCluster log files) + // When we are on CI, we copy the temp files of JUnit (containing the MiniYARNCluster log files) // to /target/flink-yarn-tests-*. // The files from there are picked up by the tools/ci/* scripts to upload them. - if (isOnTravis()) { + if (isOnCI()) { File target = new File("../target" + YARN_CONFIGURATION.get(TEST_CLUSTER_NAME_KEY)); if (!target.mkdirs()) { LOG.warn("Error creating dirs to {}", target); @@ -1067,8 +1067,8 @@ public static void teardown() throws Exception { } - public static boolean isOnTravis() { - return System.getenv("TRAVIS") != null && System.getenv("TRAVIS").equals("true"); + public static boolean isOnCI() { + return System.getenv("IS_CI") != null && System.getenv("IS_CI").equals("true"); } protected void waitApplicationFinishedElseKillIt( diff --git a/tools/azure-pipelines/azure_controller.sh b/tools/azure-pipelines/azure_controller.sh deleted file mode 100755 index bbe2faa553325..0000000000000 --- a/tools/azure-pipelines/azure_controller.sh +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env bash -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -HERE="`dirname \"$0\"`" # relative -HERE="`( cd \"$HERE\" && pwd )`" # absolutized and normalized -if [ -z "$HERE" ] ; then - exit 1 # fail -fi -CI_DIR="$HERE/../ci" - -# source required ci scripts -source "${CI_DIR}/stage.sh" -source "${CI_DIR}/shade.sh" -source "${CI_DIR}/maven-utils.sh" - -echo $M2_HOME -echo $PATH -echo $MAVEN_OPTS - -run_mvn -version -echo "Commit: $(git rev-parse HEAD)" - -print_system_info() { - echo "CPU information" - lscpu - - echo "Memory information" - cat /proc/meminfo - - echo "Disk information" - df -hH - - echo "Running build as" - whoami -} - -print_system_info - - -STAGE=$1 -echo "Current stage: \"$STAGE\"" - -EXIT_CODE=0 - -# Set up a custom Maven settings file, configuring an Google-hosted maven central -# mirror. We use a different mirror because the official maven central mirrors -# often lead to connection timeouts (probably due to rate-limiting) - -MVN="run_mvn clean install $MAVEN_OPTS -Dflink.convergence.phase=install -Pcheck-convergence -Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dmaven.javadoc.skip=true -U -DskipTests" - -# Run actual compile&test steps -if [ $STAGE == "$STAGE_COMPILE" ]; then - # run mvn clean install: - $MVN - EXIT_CODE=$? - - if [ $EXIT_CODE == 0 ]; then - echo "\n\n==============================================================================\n" - echo "Checking scala suffixes\n" - echo "==============================================================================\n" - - ./tools/ci/verify_scala_suffixes.sh "${PROFILE}" - EXIT_CODE=$? - else - echo "\n==============================================================================\n" - echo "Previous build failure detected, skipping scala-suffixes check.\n" - echo "==============================================================================\n" - fi - - if [ $EXIT_CODE == 0 ]; then - check_shaded_artifacts - EXIT_CODE=$(($EXIT_CODE+$?)) - check_shaded_artifacts_s3_fs hadoop - EXIT_CODE=$(($EXIT_CODE+$?)) - check_shaded_artifacts_s3_fs presto - EXIT_CODE=$(($EXIT_CODE+$?)) - check_shaded_artifacts_connector_elasticsearch 2 - EXIT_CODE=$(($EXIT_CODE+$?)) - check_shaded_artifacts_connector_elasticsearch 5 - EXIT_CODE=$(($EXIT_CODE+$?)) - check_shaded_artifacts_connector_elasticsearch 6 - EXIT_CODE=$(($EXIT_CODE+$?)) - else - echo "==============================================================================" - echo "Previous build failure detected, skipping shaded dependency check." - echo "==============================================================================" - fi - - if [ $EXIT_CODE == 0 ]; then - echo "Creating cache build directory $CACHE_FLINK_DIR" - - cp -r . "$CACHE_FLINK_DIR" - - function minimizeCachedFiles() { - # reduces the size of the cached directory to speed up - # the packing&upload / download&unpacking process - # by removing files not required for subsequent stages - - # jars are re-built in subsequent stages, so no need to cache them (cannot be avoided) - find "$CACHE_FLINK_DIR" -maxdepth 8 -type f -name '*.jar' \ - ! -path "$CACHE_FLINK_DIR/flink-formats/flink-csv/target/flink-csv*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-formats/flink-json/target/flink-json*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-formats/flink-avro/target/flink-avro*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-runtime/target/flink-runtime*tests.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-streaming-java/target/flink-streaming-java*tests.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-dist*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-table_*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/lib/flink-table-blink*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/opt/flink-python*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-dist/target/flink-*-bin/flink-*/opt/flink-sql-client_*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-connectors/flink-connector-elasticsearch-base/target/flink-*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-connectors/flink-connector-kafka-base/target/flink-*.jar" \ - ! -path "$CACHE_FLINK_DIR/flink-table/flink-table-planner/target/flink-table-planner*tests.jar" | xargs rm -rf - - # .git directory - # not deleting this can cause build stability issues - # merging the cached version sometimes fails - rm -rf "$CACHE_FLINK_DIR/.git" - - # AZ Pipelines has a problem with links. - rm "$CACHE_FLINK_DIR/build-target" - } - - echo "Minimizing cache" - minimizeCachedFiles - else - echo "==============================================================================" - echo "Previous build failure detected, skipping cache setup." - echo "==============================================================================" - fi -elif [ $STAGE != "$STAGE_CLEANUP" ]; then - if ! [ -e $CACHE_FLINK_DIR ]; then - echo "Cached flink dir $CACHE_FLINK_DIR does not exist. Exiting build." - exit 1 - fi - # merged compiled flink into local clone - # this prevents the cache from being re-uploaded - echo "Merging cache" - cp -RT "$CACHE_FLINK_DIR" "." - - echo "Adjusting timestamps" - # adjust timestamps to prevent recompilation - find . -type f -name '*.java' | xargs touch - find . -type f -name '*.scala' | xargs touch - # wait a bit for better odds of different timestamps - sleep 5 - find . -type f -name '*.class' | xargs touch - find . -type f -name '*.timestamp' | xargs touch - - if [ $STAGE == $STAGE_PYTHON ]; then - echo "==============================================================================" - echo "Python stage found. Re-compiling (this is required on Azure for the python tests to pass)" - echo "==============================================================================" - # run mvn install (w/o "clean"): - PY_MVN="${MVN// clean/}" - PY_MVN="$PY_MVN -Drat.skip=true" - ${PY_MVN} - EXIT_CODE=$? - - if [ $EXIT_CODE != 0 ]; then - echo "==============================================================================" - echo "Compile error for python stage preparation. Exit code: $EXIT_CODE. Failing build" - echo "==============================================================================" - exit $EXIT_CODE - fi - - echo "Done compiling ... " - fi - - - TEST="$STAGE" "./tools/ci/ci_controller.sh" 900 - EXIT_CODE=$? -elif [ $STAGE == "$STAGE_CLEANUP" ]; then - echo "Cleaning up $CACHE_BUILD_DIR" - rm -rf "$CACHE_BUILD_DIR" -else - echo "Invalid Stage specified: $STAGE" - exit 1 -fi - -# Exit code for Azure build success/failure -exit $EXIT_CODE diff --git a/tools/azure-pipelines/build-apache-repo.yml b/tools/azure-pipelines/build-apache-repo.yml index dab6d956f84f4..fc0f80d296111 100644 --- a/tools/azure-pipelines/build-apache-repo.yml +++ b/tools/azure-pipelines/build-apache-repo.yml @@ -42,10 +42,11 @@ resources: variables: MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository + E2E_CACHE_FOLDER: $(Pipeline.Workspace)/e2e_cache MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)' CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/** CACHE_FALLBACK_KEY: maven | $(Agent.OS) - CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache + FLINK_ARTIFACT_DIR: $(Pipeline.Workspace)/flink_artifact SECRET_S3_BUCKET: $[variables.IT_CASE_S3_BUCKET] SECRET_S3_ACCESS_KEY: $[variables.IT_CASE_S3_ACCESS_KEY] SECRET_S3_SECRET_KEY: $[variables.IT_CASE_S3_SECRET_KEY] diff --git a/tools/azure-pipelines/build-python-wheels.yml b/tools/azure-pipelines/build-python-wheels.yml index f4bc620875fcf..b2bdfa8088967 100644 --- a/tools/azure-pipelines/build-python-wheels.yml +++ b/tools/azure-pipelines/build-python-wheels.yml @@ -22,8 +22,10 @@ jobs: clean: all steps: # Compile - - script: STAGE=compile ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh compile - displayName: Build + - script: | + ${{parameters.environment}} ./tools/ci/compile.sh + ./tools/azure-pipelines/create_build_artifact.sh + displayName: Compile - script: | VERSION=$(mvn --file pom.xml org.apache.maven.plugins:maven-help-plugin:3.1.0:evaluate -Dexpression=project.version -q -DforceStdout) @@ -38,8 +40,8 @@ jobs: # upload artifacts for building wheels - task: PublishPipelineArtifact@1 inputs: - targetPath: $(Pipeline.Workspace)/flink.tar.gz - artifactName: FlinkCompileCacheDir-${{parameters.stage_name}} + path: $(FLINK_ARTIFACT_DIR) + artifact: FlinkCompileArtifact-${{parameters.stage_name}} - job: build_wheels dependsOn: compile_${{parameters.stage_name}} @@ -58,7 +60,7 @@ jobs: - task: DownloadPipelineArtifact@2 inputs: path: $(Pipeline.Workspace) - artifact: FlinkCompileCacheDir-${{parameters.stage_name}} + artifact: FlinkCompileArtifact-${{parameters.stage_name}} - script: | tar zxf $(Pipeline.Workspace)/flink.tar.gz -C $(Pipeline.Workspace) mkdir -p flink-dist/target/flink-$(VERSION)-bin diff --git a/tools/azure-pipelines/create_build_artifact.sh b/tools/azure-pipelines/create_build_artifact.sh new file mode 100755 index 0000000000000..6de32e9a553fa --- /dev/null +++ b/tools/azure-pipelines/create_build_artifact.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +echo "Creating build artifact dir $FLINK_ARTIFACT_DIR" + +cp -r . "$FLINK_ARTIFACT_DIR" + +echo "Minimizing artifact files" + +# reduces the size of the artifact directory to speed up +# the packing&upload / download&unpacking process +# by removing files not required for subsequent stages + +# jars are re-built in subsequent stages, so no need to cache them (cannot be avoided) +find "$FLINK_ARTIFACT_DIR" -maxdepth 8 -type f -name '*.jar' | xargs rm -rf + +# .git directory +# not deleting this can cause build stability issues +# merging the cached version sometimes fails +rm -rf "$FLINK_ARTIFACT_DIR/.git" + +# AZ Pipelines has a problem with links. +rm "$FLINK_ARTIFACT_DIR/build-target" + diff --git a/tools/azure-pipelines/debug_files_utils.sh b/tools/azure-pipelines/debug_files_utils.sh new file mode 100755 index 0000000000000..6c8719253874c --- /dev/null +++ b/tools/azure-pipelines/debug_files_utils.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +function prepare_debug_files { + MODULE=$1 + export DEBUG_FILES_OUTPUT_DIR="$AGENT_TEMPDIRECTORY/debug_files/" + export DEBUG_FILES_NAME="$(echo $MODULE | tr -dc '[:alnum:]\n\r')-$(date +%s)" + echo "##vso[task.setvariable variable=DEBUG_FILES_OUTPUT_DIR]$DEBUG_FILES_OUTPUT_DIR" + echo "##vso[task.setvariable variable=DEBUG_FILES_NAME]$DEBUG_FILES_NAME" + mkdir -p $DEBUG_FILES_OUTPUT_DIR || { echo "FAILURE: cannot create log directory '${DEBUG_FILES_OUTPUT_DIR}'." ; exit 1; } +} + +function compress_debug_files { + echo "Compressing debug files" + tar -zcvf /tmp/$DEBUG_FILES_NAME.tgz -C $DEBUG_FILES_OUTPUT_DIR . + # clean directory + rm -rf $DEBUG_FILES_OUTPUT_DIR ; mkdir -p $DEBUG_FILES_OUTPUT_DIR + mv /tmp/$DEBUG_FILES_NAME.tgz $DEBUG_FILES_OUTPUT_DIR +} diff --git a/tools/azure-pipelines/jobs-template.yml b/tools/azure-pipelines/jobs-template.yml index fbe5bd8cd5707..d8dc2d864322b 100644 --- a/tools/azure-pipelines/jobs-template.yml +++ b/tools/azure-pipelines/jobs-template.yml @@ -24,7 +24,8 @@ parameters: jobs: - job: compile_${{parameters.stage_name}} - condition: not(eq(variables['MODE'], 'e2e')) + # succeeded() is needed to allow job cancellation + condition: and(succeeded(), not(eq(variables['MODE'], 'e2e'))) pool: ${{parameters.test_pool_definition}} container: ${{parameters.container}} timeoutInMinutes: 240 @@ -64,14 +65,16 @@ jobs: displayName: "Set to jdk11" condition: eq('${{parameters.jdk}}', 'jdk11') # Compile - - script: STAGE=compile ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh compile - displayName: Build + - script: | + ${{parameters.environment}} ./tools/ci/compile.sh || exit $? + ./tools/azure-pipelines/create_build_artifact.sh + displayName: Compile # upload artifacts for next stage - task: PublishPipelineArtifact@1 inputs: - path: $(CACHE_FLINK_DIR) - artifact: FlinkCompileCacheDir-${{parameters.stage_name}} + targetPath: $(FLINK_ARTIFACT_DIR) + artifact: FlinkCompileArtifact-${{parameters.stage_name}} - job: test_${{parameters.stage_name}} dependsOn: compile_${{parameters.stage_name}} @@ -107,11 +110,14 @@ jobs: condition: not(eq('${{parameters.test_pool_definition.name}}', 'Default')) displayName: Free up disk space - # download artifacts + # download artifact from compile stage - task: DownloadPipelineArtifact@2 inputs: - path: $(CACHE_FLINK_DIR) - artifact: FlinkCompileCacheDir-${{parameters.stage_name}} + path: $(FLINK_ARTIFACT_DIR) + artifact: FlinkCompileArtifact-${{parameters.stage_name}} + + - script: ./tools/azure-pipelines/unpack_build_artifact.sh + displayName: "Unpack Build artifact" - task: Cache@2 inputs: @@ -121,15 +127,25 @@ jobs: continueOnError: true # continue the build even if the cache fails. condition: not(eq('${{parameters.test_pool_definition.name}}', 'Default')) displayName: Cache Maven local repo + - script: | echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_11_X64" echo "##vso[task.setvariable variable=PATH]$JAVA_HOME_11_X64/bin:$PATH" displayName: "Set to jdk11" condition: eq('${{parameters.jdk}}', 'jdk11') + - script: sudo sysctl -w kernel.core_pattern=core.%p displayName: Set coredump pattern + # Test - - script: STAGE=test ${{parameters.environment}} ./tools/azure-pipelines/azure_controller.sh $(module) + - script: | + source ./tools/azure-pipelines/debug_files_utils.sh + prepare_debug_files $(module) + + ${{parameters.environment}} ./tools/ci/test_controller.sh $(module) ; TEST_EXIT_CODE=$? + + compress_debug_files + exit $TEST_EXIT_CODE displayName: Test - $(module) env: IT_CASE_S3_BUCKET: $(SECRET_S3_BUCKET) @@ -139,13 +155,14 @@ jobs: - task: PublishTestResults@2 inputs: testResultsFormat: 'JUnit' + # upload debug artifacts - task: PublishPipelineArtifact@1 - condition: and(succeededOrFailed(), not(eq('$(ARTIFACT_DIR)', ''))) + condition: and(succeededOrFailed(), not(eq('$(DEBUG_FILES_OUTPUT_DIR)', ''))) displayName: Upload Logs inputs: - path: $(ARTIFACT_DIR) - artifact: logs-${{parameters.stage_name}}-$(ARTIFACT_NAME) + targetPath: $(DEBUG_FILES_OUTPUT_DIR) + artifact: logs-${{parameters.stage_name}}-$(DEBUG_FILES_NAME) - job: e2e_${{parameters.stage_name}} # uncomment below condition to run the e2e tests only on request. @@ -164,6 +181,12 @@ jobs: path: $(MAVEN_CACHE_FOLDER) displayName: Cache Maven local repo continueOnError: true + - task: Cache@2 + inputs: + key: e2e-cache | flink-end-to-end-tests/**/*.java, !**/avro/** + path: $(E2E_CACHE_FOLDER) + displayName: Cache E2E files + continueOnError: true - script: | echo "##vso[task.setvariable variable=JAVA_HOME]$JAVA_HOME_11_X64" echo "##vso[task.setvariable variable=PATH]$JAVA_HOME_11_X64/bin:$PATH" @@ -178,11 +201,8 @@ jobs: - script: ./tools/azure-pipelines/free_disk_space.sh displayName: Free up disk space - script: sudo apt-get install -y bc - - script: ${{parameters.environment}} STAGE=compile ./tools/azure-pipelines/azure_controller.sh compile + - script: ${{parameters.environment}} ./tools/ci/compile.sh displayName: Build Flink - # TODO remove pre-commit tests script by adding the tests to the nightly script -# - script: FLINK_DIR=build-target ./flink-end-to-end-tests/run-pre-commit-tests.sh -# displayName: Test - precommit - script: ${{parameters.environment}} FLINK_DIR=`pwd`/build-target flink-end-to-end-tests/run-nightly-tests.sh displayName: Run e2e tests env: @@ -194,7 +214,7 @@ jobs: condition: and(succeededOrFailed(), not(eq(variables['ARTIFACT_DIR'], ''))) displayName: Upload Logs inputs: - path: $(ARTIFACT_DIR) + targetPath: $(ARTIFACT_DIR) artifact: logs-${{parameters.stage_name}}-e2e diff --git a/tools/azure-pipelines/prepare_precommit.sh b/tools/azure-pipelines/unpack_build_artifact.sh similarity index 68% rename from tools/azure-pipelines/prepare_precommit.sh rename to tools/azure-pipelines/unpack_build_artifact.sh index cac545bde4984..1f2b7f096e242 100755 --- a/tools/azure-pipelines/prepare_precommit.sh +++ b/tools/azure-pipelines/unpack_build_artifact.sh @@ -15,10 +15,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +################################################################################ + +if ! [ -e $FLINK_ARTIFACT_DIR ]; then + echo "Cached flink dir $FLINK_ARTIFACT_DIR does not exist. Exiting build." + exit 1 +fi echo "Merging cache" -cp -RT "$CACHE_FLINK_DIR" "." +cp -RT "$FLINK_ARTIFACT_DIR" "." + echo "Adjusting timestamps" # adjust timestamps to prevent recompilation find . -type f -name '*.java' | xargs touch @@ -28,20 +35,3 @@ sleep 5 find . -type f -name '*.class' | xargs touch find . -type f -name '*.timestamp' | xargs touch - -export M2_HOME=/home/vsts/maven_cache/apache-maven-3.2.5/ -export PATH=/home/vsts/maven_cache/apache-maven-3.2.5/bin:$PATH -run_mvn -version -MVN_CALL="run_mvn install -DskipTests -Drat.skip" -$MVN_CALL -EXIT_CODE=$? - -if [ $EXIT_CODE != 0 ]; then - echo "==============================================================================" - echo "Build error. Exit code: $EXIT_CODE. Failing build" - echo "==============================================================================" - exit $EXIT_CODE -fi - -chmod -R +x build-target -chmod -R +x flink-end-to-end-tests diff --git a/tools/ci/ci_controller.sh b/tools/ci/ci_controller.sh deleted file mode 100755 index 90ee551e6d081..0000000000000 --- a/tools/ci/ci_controller.sh +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env bash -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -HERE="`dirname \"$0\"`" # relative -HERE="`( cd \"$HERE\" && pwd )`" # absolutized and normalized -if [ -z "$HERE" ] ; then - # error; for some reason, the path is not accessible - # to the script (e.g. permissions re-evaled after suid) - exit 1 # fail -fi - -source "${HERE}/stage.sh" -source "${HERE}/maven-utils.sh" - -ARTIFACTS_DIR="${HERE}/artifacts" - -mkdir -p $ARTIFACTS_DIR || { echo "FAILURE: cannot create log directory '${ARTIFACTS_DIR}'." ; exit 1; } - -echo "Build for commit ${TRAVIS_COMMIT} of ${TRAVIS_REPO_SLUG} [build ID: ${TRAVIS_BUILD_ID}, job number: $TRAVIS_JOB_NUMBER]." | tee "${ARTIFACTS_DIR}/build_info" - -# ============================================================================= -# CONFIG -# ============================================================================= - -# Number of seconds w/o output before printing a stack trace and killing $MVN -MAX_NO_OUTPUT=${1:-900} - -# Number of seconds to sleep before checking the output again -SLEEP_TIME=20 - -# Maximum times to retry uploading artifacts file to transfer.sh -TRANSFER_UPLOAD_MAX_RETRIES=2 - -# The delay between two retries to upload artifacts file to transfer.sh. The default exponential -# backoff algorithm should be too long for the last several retries. -TRANSFER_UPLOAD_RETRY_DELAY=5 - -LOG4J_PROPERTIES=${HERE}/log4j-ci.properties - -PYTHON_TEST="./flink-python/dev/lint-python.sh" -PYTHON_PID="${ARTIFACTS_DIR}/watchdog.python.pid" -PYTHON_EXIT="${ARTIFACTS_DIR}/watchdog.python.exit" -PYTHON_OUT="${ARTIFACTS_DIR}/python.out" - -MVN_COMPILE_MODULES=$(get_compile_modules_for_stage ${TEST}) -MVN_TEST_MODULES=$(get_test_modules_for_stage ${TEST}) - -# Maven command to run. We set the forkCount manually, because otherwise Maven sees too many cores -# on the Travis VMs. Set forkCountTestPackage to 1 for container-based environment (4 GiB memory) -# and 2 for sudo-enabled environment (7.5 GiB memory). -MVN_LOGGING_OPTIONS="-Dlog.dir=${ARTIFACTS_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES" -MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build $MVN_LOGGING_OPTIONS" -MVN_COMPILE_OPTIONS="-DskipTests" -MVN_TEST_OPTIONS="-Dflink.tests.with-openssl" - -e2e_modules=$(find flink-end-to-end-tests -mindepth 2 -maxdepth 5 -name 'pom.xml' -printf '%h\n' | sort -u | tr '\n' ',') - -MVN_COMPILE="run_mvn $MVN_COMMON_OPTIONS $MVN_COMPILE_OPTIONS $PROFILE $MVN_COMPILE_MODULES install" -MVN_TEST="run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS $PROFILE $MVN_TEST_MODULES verify" -# don't move the e2e-pre-commit profile activation into the misc entry in .travis.yml, since it breaks caching -MVN_E2E="run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS -Pe2e-pre-commit -pl ${e2e_modules},flink-dist verify" - -MVN_PID="${ARTIFACTS_DIR}/watchdog.mvn.pid" -MVN_EXIT="${ARTIFACTS_DIR}/watchdog.mvn.exit" -MVN_OUT="${ARTIFACTS_DIR}/mvn.out" - -TRACE_OUT="${ARTIFACTS_DIR}/jps-traces.out" - -# E.g. travis-artifacts/apache/flink/1595/1595.1 -UPLOAD_TARGET_PATH="travis-artifacts/${TRAVIS_REPO_SLUG}/${TRAVIS_BUILD_NUMBER}/" -# These variables are stored as secure variables in '.travis.yml', which are generated per repo via -# the travis command line tool. -UPLOAD_BUCKET=$ARTIFACTS_AWS_BUCKET -UPLOAD_ACCESS_KEY=$ARTIFACTS_AWS_ACCESS_KEY -UPLOAD_SECRET_KEY=$ARTIFACTS_AWS_SECRET_KEY - -ARTIFACTS_FILE=${TRAVIS_JOB_NUMBER}.tar.gz - -if [ ! -z "$TF_BUILD" ] ; then - # set proper artifacts file name on Azure Pipelines - ARTIFACTS_FILE=${BUILD_BUILDNUMBER}.tar.gz -fi - -# enable coredumps -ulimit -c unlimited -export JAVA_TOOL_OPTIONS="-XX:+HeapDumpOnOutOfMemoryError" - -if [ $TEST == $STAGE_PYTHON ]; then - CMD=$PYTHON_TEST - CMD_PID=$PYTHON_PID - CMD_OUT=$PYTHON_OUT - CMD_EXIT=$PYTHON_EXIT - CMD_TYPE="PYTHON" -else - CMD=$MVN_COMPILE - CMD_PID=$MVN_PID - CMD_OUT=$MVN_OUT - CMD_EXIT=$MVN_EXIT - CMD_TYPE="MVN" -fi - -# ============================================================================= -# FUNCTIONS -# ============================================================================= - -upload_artifacts_s3() { - echo "PRODUCED build artifacts." - - ls $ARTIFACTS_DIR - - echo "COMPRESSING build artifacts." - - cd $ARTIFACTS_DIR - dmesg > container.log - tar -zcvf $ARTIFACTS_FILE * - - # Upload to secured S3 - if [ -n "$UPLOAD_BUCKET" ] && [ -n "$UPLOAD_ACCESS_KEY" ] && [ -n "$UPLOAD_SECRET_KEY" ]; then - - # Install artifacts tool - curl -sL https://raw.githubusercontent.com/travis-ci/artifacts/master/install | bash - - PATH=$HOME/bin/artifacts:$HOME/bin:$PATH - - echo "UPLOADING build artifacts." - - # Upload everything in $ARTIFACTS_DIR. Use relative path, otherwise the upload tool - # re-creates the whole directory structure from root. - artifacts upload --bucket $UPLOAD_BUCKET --key $UPLOAD_ACCESS_KEY --secret $UPLOAD_SECRET_KEY --target-paths $UPLOAD_TARGET_PATH $ARTIFACTS_FILE - fi - - # On Azure, publish ARTIFACTS_FILE as a build artifact - if [ ! -z "$TF_BUILD" ] ; then - TIMESTAMP=`date +%s` # append timestamp to name to allow multiple uploads for the same module - ARTIFACT_DIR="$(pwd)/artifact-dir" - mkdir $ARTIFACT_DIR - cp $ARTIFACTS_FILE $ARTIFACT_DIR/ - - echo "##vso[task.setvariable variable=ARTIFACT_DIR]$ARTIFACT_DIR" - echo "##vso[task.setvariable variable=ARTIFACT_NAME]$(echo $MODULE | tr -dc '[:alnum:]\n\r')-$TIMESTAMP" - fi - - # upload to https://transfer.sh - echo "Uploading to transfer.sh" - curl --retry ${TRANSFER_UPLOAD_MAX_RETRIES} --retry-delay ${TRANSFER_UPLOAD_RETRY_DELAY} --upload-file $ARTIFACTS_FILE --max-time 60 https://transfer.sh -} - -print_stacktraces () { - echo "==============================================================================" - echo "The following Java processes are running (JPS)" - echo "==============================================================================" - - jps - - local pids=( $(jps | awk '{print $1}') ) - - for pid in "${pids[@]}"; do - echo "==============================================================================" - echo "Printing stack trace of Java process ${pid}" - echo "==============================================================================" - - jstack $pid - done -} - -# locate YARN logs and put them into artifacts directory -put_yarn_logs_to_artifacts() { - # Make sure to be in project root - cd $HERE/../ - for file in `find ./flink-yarn-tests/target -type f -name '*.log'`; do - TARGET_FILE=`echo "$file" | grep -Eo "container_[0-9_]+/(.*).log"` - TARGET_DIR=`dirname "$TARGET_FILE"` - mkdir -p "$ARTIFACTS_DIR/yarn-tests/$TARGET_DIR" - cp $file "$ARTIFACTS_DIR/yarn-tests/$TARGET_FILE" - done -} - -mod_time () { - if [[ `uname` == 'Darwin' ]]; then - eval $(stat -s $CMD_OUT) - echo $st_mtime - else - echo `stat -c "%Y" $CMD_OUT` - fi -} - -the_time() { - echo `date +%s` -} - -# ============================================================================= -# WATCHDOG -# ============================================================================= - -watchdog () { - touch $CMD_OUT - - while true; do - sleep $SLEEP_TIME - - time_diff=$((`the_time` - `mod_time`)) - - if [ $time_diff -ge $MAX_NO_OUTPUT ]; then - echo "==============================================================================" - echo "Maven produced no output for ${MAX_NO_OUTPUT} seconds." - echo "==============================================================================" - - print_stacktraces | tee $TRACE_OUT - - # Kill $CMD and all descendants - pkill -P $(<$CMD_PID) - - exit 1 - fi - done -} - -run_with_watchdog() { - local cmd="$1" - - watchdog & - WD_PID=$! - echo "STARTED watchdog (${WD_PID})." - - # Make sure to be in project root - cd "$HERE/../" - - echo "RUNNING '${cmd}'." - - # Run $CMD and pipe output to $CMD_OUT for the watchdog. The PID is written to $CMD_PID to - # allow the watchdog to kill $CMD if it is not producing any output anymore. $CMD_EXIT contains - # the exit code. This is important for Travis' build life-cycle (success/failure). - ( $cmd & PID=$! ; echo $PID >&3 ; wait $PID ; echo $? >&4 ) 3>$CMD_PID 4>$CMD_EXIT | tee $CMD_OUT - - EXIT_CODE=$(<$CMD_EXIT) - - echo "${CMD_TYPE} exited with EXIT CODE: ${EXIT_CODE}." - - # Make sure to kill the watchdog in any case after $CMD has completed - echo "Trying to KILL watchdog (${WD_PID})." - ( kill $WD_PID 2>&1 ) > /dev/null - - rm $CMD_PID - rm $CMD_EXIT -} - -run_with_watchdog "$CMD" - -# Run tests if compilation was successful -if [ $CMD_TYPE == "MVN" ]; then - if [ $EXIT_CODE == 0 ]; then - run_with_watchdog "$MVN_TEST" - else - echo "==============================================================================" - echo "Compilation failure detected, skipping test execution." - echo "==============================================================================" - fi -fi - -# Post - -# only misc builds flink-dist and flink-yarn-tests -case $TEST in - (misc) - put_yarn_logs_to_artifacts - ;; -esac - -collect_coredumps `pwd` $ARTIFACTS_DIR - -upload_artifacts_s3 - -# since we are in flink/tools/artifacts -# we are going back to -cd ../../ - -# only run end-to-end tests in misc because we only have flink-dist here -case $TEST in - (misc) - # If we are not on Azure (we are on Travis) run precommit tests in misc stage. - # On Azure, we run them in a separate job - if [ -z "$TF_BUILD" ] ; then - if [ $EXIT_CODE == 0 ]; then - echo "\n\n==============================================================================\n" - echo "Running bash end-to-end tests\n" - echo "==============================================================================\n" - - FLINK_DIR=build-target flink-end-to-end-tests/run-pre-commit-tests.sh - - EXIT_CODE=$? - else - echo "\n==============================================================================\n" - echo "Previous build failure detected, skipping bash end-to-end tests.\n" - echo "==============================================================================\n" - fi - if [ $EXIT_CODE == 0 ]; then - echo "\n\n==============================================================================\n" - echo "Running java end-to-end tests\n" - echo "==============================================================================\n" - - run_with_watchdog "$MVN_E2E -DdistDir=$(readlink -e build-target)" - else - echo "\n==============================================================================\n" - echo "Previous build failure detected, skipping java end-to-end tests.\n" - fi - fi - ;; -esac - -# Exit code for Travis build success/failure -exit $EXIT_CODE diff --git a/tools/ci/compile.sh b/tools/ci/compile.sh new file mode 100755 index 0000000000000..b0b4803f2a985 --- /dev/null +++ b/tools/ci/compile.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# +# This file contains tooling for compiling Flink +# + +HERE="`dirname \"$0\"`" # relative +HERE="`( cd \"$HERE\" && pwd )`" # absolutized and normalized +if [ -z "$HERE" ] ; then + exit 1 # fail +fi +CI_DIR="$HERE/../ci" + +# source required ci scripts +source "${CI_DIR}/stage.sh" +source "${CI_DIR}/shade.sh" +source "${CI_DIR}/maven-utils.sh" + +echo "Maven version:" +run_mvn -version + +echo "==============================================================================" +echo "Compiling Flink" +echo "==============================================================================" + +EXIT_CODE=0 + +run_mvn clean install $MAVEN_OPTS -Dflink.convergence.phase=install -Pcheck-convergence -Dflink.forkCount=2 \ + -Dflink.forkCountTestPackage=2 -Dmaven.javadoc.skip=true -U -DskipTests + +EXIT_CODE=$? + +if [ $EXIT_CODE == 0 ]; then + echo "==============================================================================" + echo "Checking scala suffixes" + echo "==============================================================================" + + ${CI_DIR}/verify_scala_suffixes.sh "${PROFILE}" + EXIT_CODE=$? +else + echo "==============================================================================" + echo "Previous build failure detected, skipping scala-suffixes check." + echo "==============================================================================" +fi + +if [ $EXIT_CODE == 0 ]; then + check_shaded_artifacts + EXIT_CODE=$(($EXIT_CODE+$?)) + check_shaded_artifacts_s3_fs hadoop + EXIT_CODE=$(($EXIT_CODE+$?)) + check_shaded_artifacts_s3_fs presto + EXIT_CODE=$(($EXIT_CODE+$?)) + check_shaded_artifacts_connector_elasticsearch 5 + EXIT_CODE=$(($EXIT_CODE+$?)) + check_shaded_artifacts_connector_elasticsearch 6 + EXIT_CODE=$(($EXIT_CODE+$?)) +else + echo "==============================================================================" + echo "Previous build failure detected, skipping shaded dependency check." + echo "==============================================================================" +fi + +exit $EXIT_CODE + diff --git a/tools/ci/controller_utils.sh b/tools/ci/controller_utils.sh new file mode 100644 index 0000000000000..892d5fae8331b --- /dev/null +++ b/tools/ci/controller_utils.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +print_system_info() { + echo "CPU information" + lscpu + + echo "Memory information" + cat /proc/meminfo + + echo "Disk information" + df -hH + + echo "Running build as" + whoami +} + +# locate YARN logs and put them into artifacts directory +put_yarn_logs_to_artifacts() { + for file in `find ./flink-yarn-tests/target -type f -name '*.log'`; do + TARGET_FILE=`echo "$file" | grep -Eo "container_[0-9_]+/(.*).log"` + TARGET_DIR=`dirname "$TARGET_FILE"` + mkdir -p "$DEBUG_FILES_OUTPUT_DIR/yarn-tests/$TARGET_DIR" + cp $file "$DEBUG_FILES_OUTPUT_DIR/yarn-tests/$TARGET_FILE" + done +} + +print_stacktraces () { + echo "==============================================================================" + echo "The following Java processes are running (JPS)" + echo "==============================================================================" + + JAVA_PROCESSES=`jps` + echo $JAVA_PROCESSES + + local pids=( $(echo $JAVA_PROCESSES | awk '{print $1}') ) + + for pid in "${pids[@]}"; do + echo "==============================================================================" + echo "Printing stack trace of Java process ${pid}" + echo "==============================================================================" + + jstack $pid + done +} + diff --git a/tools/ci/log4j-ci.properties b/tools/ci/log4j.properties similarity index 100% rename from tools/ci/log4j-ci.properties rename to tools/ci/log4j.properties diff --git a/tools/ci/maven-utils.sh b/tools/ci/maven-utils.sh index c85057211756d..f3be22a53b9e6 100755 --- a/tools/ci/maven-utils.sh +++ b/tools/ci/maven-utils.sh @@ -73,7 +73,7 @@ function collect_coredumps { echo "Searching for .dump, .dumpstream and related files in '$SEARCHDIR'" for file in `find $SEARCHDIR -type f -regextype posix-extended -iregex '.*\.hprof|.*\.dump|.*\.dumpstream|.*hs.*\.log|.*/core(.[0-9]+)?$'`; do echo "Moving '$file' to target directory ('$TARGET_DIR')" - mv $file $TARGET_DIR/ + mv $file $TARGET_DIR/$(echo $file | tr "/" "-") done } diff --git a/tools/ci/shade.sh b/tools/ci/shade.sh old mode 100644 new mode 100755 diff --git a/tools/ci/stage.sh b/tools/ci/stage.sh old mode 100644 new mode 100755 index e431ebe1c7d7b..401f39f2d59c5 --- a/tools/ci/stage.sh +++ b/tools/ci/stage.sh @@ -158,6 +158,10 @@ function get_compile_modules_for_stage() { # the negation takes precedence, thus not all required modules would be built echo "" ;; + (${STAGE_PYTHON}) + # compile everything for PyFlink. + echo "" + ;; esac } diff --git a/tools/ci/test_controller.sh b/tools/ci/test_controller.sh new file mode 100755 index 0000000000000..8025e757dac21 --- /dev/null +++ b/tools/ci/test_controller.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# +# This file contains generic control over the test execution. +# + +HERE="`dirname \"$0\"`" # relative +HERE="`( cd \"$HERE\" && pwd )`" # absolutized and normalized +if [ -z "$HERE" ] ; then + exit 1 +fi + +source "${HERE}/stage.sh" +source "${HERE}/maven-utils.sh" +source "${HERE}/controller_utils.sh" +source "${HERE}/watchdog.sh" +STAGE=$1 + +# ============================================================================= +# Step 0: Check & print environment information & configure env +# ============================================================================= + +# check preconditions +if [ -z "$DEBUG_FILES_OUTPUT_DIR" ] ; then + echo "ERROR: Environment variable 'DEBUG_FILES_OUTPUT_DIR' is not set but expected by test_controller.sh. Tests may use this location to store debugging files." + exit 1 +fi + +if [ ! -d "$DEBUG_FILES_OUTPUT_DIR" ] ; then + echo "ERROR: Environment variable DEBUG_FILES_OUTPUT_DIR=$DEBUG_FILES_OUTPUT_DIR points to a directory that does not exist" + exit 1 +fi + +if [ -z "$STAGE" ] ; then + echo "ERROR: Environment variable 'STAGE' is not set but expected by test_controller.sh. THe variable refers to the stage being executed." + exit 1 +fi + +echo "Printing environment information" + +echo "PATH=$PATH" +run_mvn -version +echo "Commit: $(git rev-parse HEAD)" +print_system_info + +# enable coredumps for this process +ulimit -c unlimited + +# configure JVMs to produce heap dumps +export JAVA_TOOL_OPTIONS="-XX:+HeapDumpOnOutOfMemoryError" + +# some tests provide additional logs if they find this variable +export IS_CI=true + +# ============================================================================= +# Step 1: Rebuild jars and install Flink to local maven repository +# ============================================================================= + +LOG4J_PROPERTIES=${HERE}/log4j.properties +MVN_LOGGING_OPTIONS="-Dlog.dir=${DEBUG_FILES_OUTPUT_DIR} -Dlog4j.configurationFile=file://$LOG4J_PROPERTIES" + +MVN_COMMON_OPTIONS="-Dflink.forkCount=2 -Dflink.forkCountTestPackage=2 -Dfast -Pskip-webui-build $MVN_LOGGING_OPTIONS" +MVN_COMPILE_OPTIONS="-DskipTests" +MVN_COMPILE_MODULES=$(get_compile_modules_for_stage ${STAGE}) + +CALLBACK_ON_TIMEOUT="print_stacktraces | tee ${DEBUG_FILES_OUTPUT_DIR}/jps-traces.out" +run_with_watchdog "run_mvn $MVN_COMMON_OPTIONS $MVN_COMPILE_OPTIONS $PROFILE $MVN_COMPILE_MODULES install" $CALLBACK_ON_TIMEOUT +EXIT_CODE=$? + +if [ $EXIT_CODE != 0 ]; then + echo "==============================================================================" + echo "Compilation failure detected, skipping test execution." + echo "==============================================================================" + exit $EXIT_CODE +fi + + +# ============================================================================= +# Step 2: Run tests +# ============================================================================= + +if [ $STAGE == $STAGE_PYTHON ]; then + run_with_watchdog "./flink-python/dev/lint-python.sh" $CALLBACK_ON_TIMEOUT + EXIT_CODE=$? +else + MVN_TEST_OPTIONS="-Dflink.tests.with-openssl" + MVN_TEST_MODULES=$(get_test_modules_for_stage ${STAGE}) + + run_with_watchdog "run_mvn $MVN_COMMON_OPTIONS $MVN_TEST_OPTIONS $PROFILE $MVN_TEST_MODULES verify" $CALLBACK_ON_TIMEOUT + EXIT_CODE=$? +fi + +# ============================================================================= +# Step 3: Put extra logs into $DEBUG_FILES_OUTPUT_DIR +# ============================================================================= + +# only misc builds flink-yarn-tests +case $STAGE in + (misc) + put_yarn_logs_to_artifacts + ;; +esac + +collect_coredumps $(pwd) $DEBUG_FILES_OUTPUT_DIR + +# Exit code for CI build success/failure +exit $EXIT_CODE diff --git a/tools/ci/watchdog.sh b/tools/ci/watchdog.sh new file mode 100755 index 0000000000000..1e03430f84e56 --- /dev/null +++ b/tools/ci/watchdog.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# +# This file contains a watchdog tool to monitor a task and potentially kill it after +# not producing any output for $MAX_NO_OUTPUT seconds. +# + +# Number of seconds w/o output before printing a stack trace and killing the watched process +MAX_NO_OUTPUT=${MAX_NO_OUTPUT:-900} + +# Number of seconds to sleep before checking the output again +SLEEP_TIME=${SLEEP_TIME:-20} + +# Internal fields +CMD_OUT="/tmp/watchdog.out" +CMD_PID="/tmp/watchdog.pid" +CMD_EXIT="/tmp/watchdog.exit" + + +# ============================================= +# Utility functions +# ============================================= + +mod_time () { + echo `stat -c "%Y" $CMD_OUT` +} + +the_time() { + echo `date +%s` +} + +# watchdog process + +watchdog () { + touch $CMD_OUT + + while true; do + sleep $SLEEP_TIME + + time_diff=$((`the_time` - `mod_time`)) + + if [ $time_diff -ge $MAX_NO_OUTPUT ]; then + echo "==============================================================================" + echo "Process produced no output for ${MAX_NO_OUTPUT} seconds." + echo "==============================================================================" + + # run timeout callback + $CALLBACK_ON_TIMEOUT + + echo "Killing process with pid=$(<$CMD_PID) and all descendants" + pkill -P $(<$CMD_PID) # kill descendants + kill $(<$CMD_PID) # kill process itself + + exit 1 + fi + done +} + + +# ============================================= +# main function +# ============================================= + +# entrypoint +function run_with_watchdog() { + local cmd="$1" + local CALLBACK_ON_TIMEOUT="$2" + + watchdog & + WD_PID=$! + echo "STARTED watchdog (${WD_PID})." + + echo "RUNNING '${cmd}'." + + # Run $CMD and pipe output to $CMD_OUT for the watchdog. The PID is written to $CMD_PID to + # allow the watchdog to kill $CMD if it is not producing any output anymore. $CMD_EXIT contains + # the exit code. This is important for CI build life-cycle (success/failure). + ( $cmd & PID=$! ; echo $PID >&3 ; wait $PID ; echo $? >&4 ) 3>$CMD_PID 4>$CMD_EXIT | tee $CMD_OUT + + EXIT_CODE=$(<$CMD_EXIT) + + echo "Process exited with EXIT CODE: ${EXIT_CODE}." + + # Make sure to kill the watchdog in any case after $CMD has completed + echo "Trying to KILL watchdog (${WD_PID})." + ( kill $WD_PID 2>&1 ) > /dev/null + + rm $CMD_PID + rm $CMD_EXIT + + return $EXIT_CODE +} + +