diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6eafe797b24b..a88b929fb856 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -274,16 +274,28 @@ jobs: test_gpu: needs: [setup, build_all] if: fromJson(needs.setup.outputs.should-run) + env: + BUILD_DIR: ${{ needs.build_all.outputs.build-dir }} + BUILD_DIR_ARCHIVE: ${{ needs.build_all.outputs.build-dir-archive }} + BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_all.outputs.build-dir-gcs-artifact }} + strategy: + matrix: + target: + - runner-type: gpu + iree-ctest-label-regex: ^requires-gpu|^driver=vulkan$|^driver=cuda$ + iree-nvidia-sm80-tests-disable: 1 + - runner-type: a100 + iree-ctest-label-regex: ^requires-gpu-sm80$ + iree-nvidia-sm80-tests-disable: 0 + # Run other jobs even if one fails. + fail-fast: false + name: test_${{ matrix.target.runner-type }} runs-on: - self-hosted # must come first - runner-group=${{ needs.setup.outputs.runner-group }} - environment=${{ needs.setup.outputs.runner-env }} - - gpu + - ${{ matrix.target.runner-type }} - os-family=Linux - env: - BUILD_DIR: ${{ needs.build_all.outputs.build-dir }} - BUILD_DIR_ARCHIVE: ${{ needs.build_all.outputs.build-dir-archive }} - BUILD_DIR_GCS_ARTIFACT: ${{ needs.build_all.outputs.build-dir-gcs-artifact }} steps: - name: "Checking out repository" uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 @@ -297,20 +309,25 @@ jobs: run: gcloud storage cp "${BUILD_DIR_GCS_ARTIFACT}" "${BUILD_DIR_ARCHIVE}" - name: "Extracting build dir archive" run: tar -xf "${BUILD_DIR_ARCHIVE}" - - name: "Testing with GPU" + - name: "Testing all" + env: + IREE_NVIDIA_SM80_TESTS_DISABLE: ${{ matrix.target.iree-nvidia-sm80-tests-disable }} + IREE_CTEST_LABEL_REGEX: ${{ matrix.target.iree-ctest-label-regex }} run: | ./build_tools/github_actions/docker_run.sh \ - --env IREE_VULKAN_F16_DISABLE=0 \ - --env IREE_CUDA_DISABLE=0 \ - --env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \ - --env CTEST_PARALLEL_LEVEL=2 \ - --gpus all \ - --env NVIDIA_DRIVER_CAPABILITIES=all \ - gcr.io/iree-oss/nvidia@sha256:de6e4453614aa48059fd611d7e7255f4d6ac27ac29a47aabdc04191ec1758533 \ - bash -euo pipefail -c \ - "./build_tools/scripts/check_cuda.sh - ./build_tools/scripts/check_vulkan.sh - ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}" + --env IREE_NVIDIA_SM80_TESTS_DISABLE \ + --env IREE_CTEST_LABEL_REGEX \ + --env IREE_VULKAN_F16_DISABLE=0 \ + --env IREE_CUDA_DISABLE=0 \ + --env IREE_NVIDIA_GPU_TESTS_DISABLE=0 \ + --env CTEST_PARALLEL_LEVEL=2 \ + --env NVIDIA_DRIVER_CAPABILITIES=all \ + --gpus all \ + gcr.io/iree-oss/nvidia@sha256:1717431fd46b8b1e96d95fa72508e3e3eacb5c95f1245b9b7dbeec23ae823d02 \ + bash -euo pipefail -c \ + "./build_tools/scripts/check_cuda.sh + ./build_tools/scripts/check_vulkan.sh + ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}" ################################## Subsets ################################### # Jobs that build some subset of IREE @@ -1003,11 +1020,15 @@ jobs: # Basic - build_all + - test_all + - build_test_all_bazel + + # Platforms - build_test_all_windows - build_test_all_macos_arm64 - build_test_all_macos_x86_64 - - build_test_all_bazel - - test_all + + # Accelerators - test_gpu # Subsets diff --git a/build_tools/bazel/build_core.sh b/build_tools/bazel/build_core.sh index ccad1ba5111c..723b70830da6 100755 --- a/build_tools/bazel/build_core.sh +++ b/build_tools/bazel/build_core.sh @@ -63,10 +63,10 @@ default_test_tag_filters+=("-vulkan_uses_vk_khr_shader_float16_int8") # CUDA CI testing disabled until we setup a target for it. default_test_tag_filters+=("-driver=cuda") -if [[ "${IREE_VULKAN_DISABLE?}" == 1 ]]; then +if (( IREE_VULKAN_DISABLE == 1 )); then default_test_tag_filters+=("-driver=vulkan") fi -if [[ "${IREE_NVIDIA_GPU_TESTS_DISABLE?}" == 1 ]]; then +if (( IREE_NVIDIA_GPU_TESTS_DISABLE == 1 )); then default_test_tag_filters+=("-requires-gpu-nvidia" "-requires-gpu-sm80") fi diff --git a/build_tools/cmake/build_and_test_asan.sh b/build_tools/cmake/build_and_test_asan.sh index 30e54180962f..7ddd159c5300 100755 --- a/build_tools/cmake/build_and_test_asan.sh +++ b/build_tools/cmake/build_and_test_asan.sh @@ -110,13 +110,13 @@ for asan_in_bytecode_modules_ON_OFF in OFF ON; do # IREE_VULKAN_DISABLE is handled separately as we run Vulkan and non-Vulkan # tests in separate ctest commands anyway. - if [[ "${IREE_CUDA_DISABLE?}" == 1 ]]; then + if (( IREE_CUDA_DISABLE == 1 )); then label_exclude_args+=("^driver=cuda$") fi - if [[ "${IREE_VULKAN_F16_DISABLE?}" == 1 ]]; then + if (( IREE_VULKAN_F16_DISABLE == 1 )); then label_exclude_args+=("^vulkan_uses_vk_khr_shader_float16_int8$") fi - if [[ "${IREE_NVIDIA_GPU_TESTS_DISABLE}" == 1 ]]; then + if (( IREE_NVIDIA_GPU_TESTS_DISABLE == 1 )); then label_exclude_args+=("^requires-gpu") fi @@ -138,7 +138,7 @@ for asan_in_bytecode_modules_ON_OFF in OFF ON; do echo "------------------" cmake --build . --target check-iree-dialects -- -k 0 - if [[ "${IREE_VULKAN_DISABLE?}" == 0 ]]; then + if (( IREE_VULKAN_DISABLE == 0 )); then echo "*** Running ctests that use the Vulkan driver, with LSAN disabled (IREE_BYTECODE_MODULE_ENABLE_ASAN=${asan_in_bytecode_modules_ON_OFF}) ***" echo "------------------" # Disable LeakSanitizer (LSAN) because of a history of issues with Swiftshader diff --git a/build_tools/cmake/ctest_all.sh b/build_tools/cmake/ctest_all.sh index 802cb0ba99ba..471bf11458b5 100755 --- a/build_tools/cmake/ctest_all.sh +++ b/build_tools/cmake/ctest_all.sh @@ -24,21 +24,25 @@ get_default_parallel_level() { } # Respect the user setting, but default to as many jobs as we have cores. -export CTEST_PARALLEL_LEVEL=${CTEST_PARALLEL_LEVEL:-$(get_default_parallel_level)} +export CTEST_PARALLEL_LEVEL="${CTEST_PARALLEL_LEVEL:-$(get_default_parallel_level)}" # Respect the user setting, but default to turning on Vulkan. -export IREE_VULKAN_DISABLE=${IREE_VULKAN_DISABLE:-0} +export IREE_VULKAN_DISABLE="${IREE_VULKAN_DISABLE:-0}" # Respect the user setting, but default to turning off CUDA. -export IREE_CUDA_DISABLE=${IREE_CUDA_DISABLE:-1} +export IREE_CUDA_DISABLE="${IREE_CUDA_DISABLE:-1}" # The VK_KHR_shader_float16_int8 extension is optional prior to Vulkan 1.2. # We test on SwiftShader as a baseline, which does not support this extension. -export IREE_VULKAN_F16_DISABLE=${IREE_VULKAN_F16_DISABLE:-1} +export IREE_VULKAN_F16_DISABLE="${IREE_VULKAN_F16_DISABLE:-1}" # Respect the user setting, but default to skipping tests that require Nvidia GPU. -export IREE_NVIDIA_GPU_TESTS_DISABLE=${IREE_NVIDIA_GPU_TESTS_DISABLE:-1} +export IREE_NVIDIA_GPU_TESTS_DISABLE="${IREE_NVIDIA_GPU_TESTS_DISABLE:-1}" +# Respect the user setting, but default to skipping tests that require SM80 Nvidia GPU. +export IREE_NVIDIA_SM80_TESTS_DISABLE="${IREE_NVIDIA_SM80_TESTS_DISABLE:-1}" # Respect the user setting, default to no --repeat-until-fail. -export IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT=${IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT:-} +export IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT="${IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT:-}" # Respect the user setting, default to no --tests-regex. -export IREE_CTEST_TESTS_REGEX=${IREE_CTEST_TESTS_REGEX:-} +export IREE_CTEST_TESTS_REGEX="${IREE_CTEST_TESTS_REGEX:-}" +# Respect the user setting, default to no --label-regex +export IREE_CTEST_LABEL_REGEX="${IREE_CTEST_LABEL_REGEX:-}" # Tests to exclude by label. In addition to any custom labels (which are carried # over from Bazel tags), every test should be labeled with its directory. @@ -62,19 +66,24 @@ declare -a label_exclude_args=( # ^bindings/ ) -if [[ "${IREE_VULKAN_DISABLE}" == 1 ]]; then + +if (( IREE_VULKAN_DISABLE == 1 )); then label_exclude_args+=("^driver=vulkan$") fi -if [[ "${IREE_CUDA_DISABLE}" == 1 ]]; then +if (( IREE_CUDA_DISABLE == 1 )); then label_exclude_args+=("^driver=cuda$") fi -if [[ "${IREE_VULKAN_F16_DISABLE}" == 1 ]]; then +if (( IREE_VULKAN_F16_DISABLE == 1 )); then label_exclude_args+=("^vulkan_uses_vk_khr_shader_float16_int8$") fi -if [[ "${IREE_NVIDIA_GPU_TESTS_DISABLE}" == 1 ]]; then - label_exclude_args+=("^requires-gpu$") +if (( IREE_NVIDIA_GPU_TESTS_DISABLE == 1 )); then + label_exclude_args+=("^requires-gpu") +fi +if (( IREE_NVIDIA_SM80_TESTS_DISABLE == 1 )); then + label_exclude_args+=("^requires-gpu-sm80$") fi + IFS=',' read -ra extra_label_exclude_args <<< "${IREE_EXTRA_COMMA_SEPARATED_CTEST_LABELS_TO_EXCLUDE:-}" label_exclude_args+=(${extra_label_exclude_args[@]}) @@ -83,7 +92,7 @@ label_exclude_args+=(${extra_label_exclude_args[@]}) # platforms it doesn't support, but that would require editing through layers # of CMake functions. Hopefully this list stays very short. declare -a excluded_tests=() -if [[ "$OSTYPE" =~ ^msys ]]; then +if [[ "${OSTYPE}" =~ ^msys ]]; then # These tests are failing on Windows. excluded_tests+=( # TODO(#11077): INVALID_ARGUMENT: argument/result signature mismatch @@ -99,7 +108,7 @@ if [[ "$OSTYPE" =~ ^msys ]]; then # TODO(#11070): Fix argument/result signature mismatch "iree/tests/e2e/tosa_ops/check_vmvx_local-sync_microkernels_fully_connected.mlir" ) -elif [[ "$OSTYPE" =~ ^darwin ]]; then +elif [[ "${OSTYPE}" =~ ^darwin ]]; then excluded_tests+=( #TODO(#12496): Remove after fixing the test on macOS "iree/compiler/bindings/c/loader_test" @@ -129,6 +138,10 @@ if [[ -n "${IREE_CTEST_TESTS_REGEX}" ]]; then ctest_args+=("--tests-regex ${IREE_CTEST_TESTS_REGEX}") fi +if [[ -n "${IREE_CTEST_LABEL_REGEX}" ]]; then + ctest_args+=("--label-regex ${IREE_CTEST_LABEL_REGEX}") +fi + if [[ -n "${IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT}" ]]; then ctest_args+=("--repeat-until-fail ${IREE_CTEST_REPEAT_UNTIL_FAIL_COUNT}") fi diff --git a/build_tools/cmake/test.sh b/build_tools/cmake/test.sh index e3607f2e9a26..511bb21aba95 100755 --- a/build_tools/cmake/test.sh +++ b/build_tools/cmake/test.sh @@ -47,13 +47,13 @@ declare -a label_exclude_args=( # ^bindings/ ) -if [[ "${IREE_VULKAN_DISABLE?}" == 1 ]]; then +if (( IREE_VULKAN_DISABLE == 1 )); then label_exclude_args+=("^driver=vulkan$") fi -if [[ "${IREE_CUDA_DISABLE?}" == 1 ]]; then +if (( IREE_CUDA_DISABLE == 1 )); then label_exclude_args+=("^driver=cuda$") fi -if [[ "${IREE_VULKAN_F16_DISABLE?}" == 1 ]]; then +if (( IREE_VULKAN_F16_DISABLE == 1 )); then label_exclude_args+=("^vulkan_uses_vk_khr_shader_float16_int8$") fi