diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index aa541079b93e2..bd6f3a87c5ab4 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -13,7 +13,7 @@ from typing import Dict, List, Tuple, Optional -CUDA_ARCHES = ["10.2", "11.3", "11.5", "11.6"] +CUDA_ARCHES = ["10.2", "11.3", "11.6"] ROCM_ARCHES = ["4.5.2", "5.0"] diff --git a/.github/workflows/generated-linux-binary-conda-nightly.yml b/.github/workflows/generated-linux-binary-conda-nightly.yml index 63861bbe87c13..2a057f2a3fe8b 100644 --- a/.github/workflows/generated-linux-binary-conda-nightly.yml +++ b/.github/workflows/generated-linux-binary-conda-nightly.yml @@ -1076,7 +1076,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_5-build: + conda-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1084,10 +1084,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1180,7 +1180,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_7-cuda11_5 + name: conda-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -1203,19 +1203,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_5-test: # Testing + conda-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_5-build + needs: conda-py3_7-cuda11_6-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1242,7 +1242,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_5 + name: conda-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1335,18 +1335,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_5-upload: # Uploading + conda-py3_7-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_5-test + needs: conda-py3_7-cuda11_6-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1375,7 +1375,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_5 + name: conda-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -1428,7 +1428,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_6-build: + conda-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1436,12 +1436,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -1486,9 +1485,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -1532,7 +1528,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_7-cuda11_6 + name: conda-py3_8-cpu retention-days: 14 if-no-files-found: error path: @@ -1555,21 +1551,20 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_6-test: # Testing + conda-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-build - runs-on: linux.4xlarge.nvidia.gpu + needs: conda-py3_8-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -1594,7 +1589,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_6 + name: conda-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1619,17 +1614,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - name: Pull Docker image run: | retry () { @@ -1687,20 +1671,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_6-upload: # Uploading + conda-py3_8-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-test + needs: conda-py3_8-cpu-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -1727,7 +1710,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_7-cuda11_6 + name: conda-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -1780,7 +1763,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cpu-build: + conda-py3_8-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1788,9 +1771,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1880,7 +1864,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_8-cpu + name: conda-py3_8-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -1903,18 +1887,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cpu-test: # Testing + conda-py3_8-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-build - runs-on: linux.4xlarge + needs: conda-py3_8-cuda10_2-build + runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -1941,7 +1926,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cpu + name: conda-py3_8-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1966,6 +1951,17 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a + name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + with: + timeout_minutes: 10 + max_attempts: 3 + command: | + set -ex + pushd pytorch + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + popd - name: Pull Docker image run: | retry () { @@ -2023,17 +2019,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cpu-upload: # Uploading + conda-py3_8-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-test + needs: conda-py3_8-cuda10_2-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2062,7 +2059,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cpu + name: conda-py3_8-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2115,7 +2112,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda10_2-build: + conda-py3_8-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2123,10 +2120,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2173,6 +2170,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -2216,7 +2216,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_8-cuda10_2 + name: conda-py3_8-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -2239,19 +2239,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda10_2-test: # Testing + conda-py3_8-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda10_2-build + needs: conda-py3_8-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2278,7 +2278,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda10_2 + name: conda-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2371,18 +2371,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda10_2-upload: # Uploading + conda-py3_8-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda10_2-test + needs: conda-py3_8-cuda11_3-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2411,7 +2411,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda10_2 + name: conda-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2464,7 +2464,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda11_3-build: + conda-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2472,10 +2472,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2568,7 +2568,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_8-cuda11_3 + name: conda-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -2591,19 +2591,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda11_3-test: # Testing + conda-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-build + needs: conda-py3_8-cuda11_6-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2630,7 +2630,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_3 + name: conda-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2723,18 +2723,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda11_3-upload: # Uploading + conda-py3_8-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-test + needs: conda-py3_8-cuda11_6-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2763,7 +2763,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_3 + name: conda-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2816,7 +2816,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda11_5-build: + conda-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2824,12 +2824,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2874,9 +2873,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -2920,1395 +2916,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_8-cuda11_5 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_5-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_5-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_5 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_5-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_5-test - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_5 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_6-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: conda-py3_8-cuda11_6 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_6-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_6-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-test - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: conda-py3_9-cpu - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-build - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cpu-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-test - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cuda10_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: conda-py3_9-cuda10_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cuda10_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda10_2-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cuda10_2 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cuda10_2-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda10_2-test - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_9-cuda10_2 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_9-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/conda/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: conda-py3_9-cuda11_3 + name: conda-py3_9-cpu retention-days: 14 if-no-files-found: error path: @@ -4331,19 +2939,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_3-test: # Testing + conda-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-build - runs-on: linux.4xlarge.nvidia.gpu + needs: conda-py3_9-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -4370,7 +2977,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_3 + name: conda-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4395,17 +3002,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - name: Pull Docker image run: | retry () { @@ -4463,18 +3059,17 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_3-upload: # Uploading + conda-py3_9-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-test + needs: conda-py3_9-cpu-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -4503,7 +3098,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_3 + name: conda-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -4556,7 +3151,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_5-build: + conda-py3_9-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -4564,10 +3159,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -4614,9 +3209,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -4660,7 +3252,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -4683,19 +3275,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_5-test: # Testing + conda-py3_9-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_5-build + needs: conda-py3_9-cuda10_2-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -4722,7 +3314,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4815,18 +3407,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_5-upload: # Uploading + conda-py3_9-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_5-test + needs: conda-py3_9-cuda10_2-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -4855,7 +3447,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -4908,7 +3500,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_6-build: + conda-py3_9-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -4916,10 +3508,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5012,7 +3604,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -5035,19 +3627,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_6-test: # Testing + conda-py3_9-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-build + needs: conda-py3_9-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5074,7 +3666,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -5167,18 +3759,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_6-upload: # Uploading + conda-py3_9-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-test + needs: conda-py3_9-cuda11_3-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5207,7 +3799,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -5260,7 +3852,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cpu-build: + conda-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -5268,11 +3860,12 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -5317,6 +3910,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -5360,7 +3956,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -5383,20 +3979,21 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cpu-test: # Testing + conda-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-build - runs-on: linux.4xlarge + needs: conda-py3_9-cuda11_6-build + runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -5421,7 +4018,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -5446,6 +4043,17 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a + name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + with: + timeout_minutes: 10 + max_attempts: 3 + command: | + set -ex + pushd pytorch + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + popd - name: Pull Docker image run: | retry () { @@ -5503,19 +4111,20 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cpu-upload: # Uploading + conda-py3_9-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-test + needs: conda-py3_9-cuda11_6-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/conda-builder:cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/conda-builder:cuda11.6 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -5542,7 +4151,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -5595,7 +4204,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda10_2-build: + conda-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -5603,10 +4212,9 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -5696,7 +4304,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_10-cuda10_2 + name: conda-py3_10-cpu retention-days: 14 if-no-files-found: error path: @@ -5719,19 +4327,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda10_2-test: # Testing + conda-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda10_2-build - runs-on: linux.4xlarge.nvidia.gpu + needs: conda-py3_10-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -5758,7 +4365,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda10_2 + name: conda-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -5783,17 +4390,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - name: Pull Docker image run: | retry () { @@ -5851,18 +4447,17 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda10_2-upload: # Uploading + conda-py3_10-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda10_2-test + needs: conda-py3_10-cpu-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/conda-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -5891,7 +4486,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda10_2 + name: conda-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -5944,7 +4539,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_3-build: + conda-py3_10-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -5952,10 +4547,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6002,9 +4597,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -6048,7 +4640,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -6071,19 +4663,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_3-test: # Testing + conda-py3_10-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-build + needs: conda-py3_10-cuda10_2-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6110,7 +4702,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -6203,18 +4795,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_3-upload: # Uploading + conda-py3_10-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-test + needs: conda-py3_10-cuda10_2-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 + DOCKER_IMAGE: pytorch/conda-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6243,7 +4835,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -6296,7 +4888,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_5-build: + conda-py3_10-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -6304,10 +4896,10 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6400,7 +4992,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -6423,19 +5015,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_5-test: # Testing + conda-py3_10-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_5-build + needs: conda-py3_10-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6462,7 +5054,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -6555,18 +5147,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_5-upload: # Uploading + conda-py3_10-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_5-test + needs: conda-py3_10-cuda11_3-test env: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/conda-builder:cuda11.5 + DOCKER_IMAGE: pytorch/conda-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -6595,7 +5187,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} diff --git a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml index 515f48964c7b1..04336fe662a31 100644 --- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml @@ -4220,1426 +4220,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - libtorch-cuda11_5-shared-with-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-shared-with-deps-cxx11-abi - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-with-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-cxx11-abi-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-with-deps-cxx11-abi-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-cxx11-abi-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-shared-without-deps-cxx11-abi - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-cxx11-abi-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-cxx11-abi-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-cxx11-abi-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-static-with-deps-cxx11-abi - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-cxx11-abi-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-cxx11-abi-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-cxx11-abi-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-cxx11-abi-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-static-without-deps-cxx11-abi - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-cxx11-abi-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-cxx11-abi-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-cxx11-abi-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-cxx11-abi-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: cxx11-abi - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-cxx11-abi - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af libtorch-cuda11_6-shared-with-deps-cxx11-abi-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge diff --git a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml index c833e65a7572f..d45a00057b014 100644 --- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml +++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml @@ -4220,1426 +4220,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - libtorch-cuda11_5-shared-with-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-shared-with-deps-pre-cxx11 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-with-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-pre-cxx11-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-with-deps-pre-cxx11-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-pre-cxx11-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-shared-without-deps-pre-cxx11 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-pre-cxx11-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-pre-cxx11-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-pre-cxx11-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: shared-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-static-with-deps-pre-cxx11 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-pre-cxx11-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-pre-cxx11-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-pre-cxx11-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-with-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-pre-cxx11-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/libtorch/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: libtorch-cuda11_5-static-without-deps-pre-cxx11 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-pre-cxx11-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-pre-cxx11-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-pre-cxx11-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-pre-cxx11-test - env: - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 - SKIP_ALL_TESTS: 1 - LIBTORCH_VARIANT: static-without-deps - DESIRED_DEVTOOLSET: pre-cxx11 - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-pre-cxx11 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af libtorch-cuda11_6-shared-with-deps-pre-cxx11-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 56719d264e2dc..8d2ae2c7dd256 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -1076,7 +1076,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_5-build: + manywheel-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1084,10 +1084,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1180,7 +1180,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_7-cuda11_5 + name: manywheel-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -1203,19 +1203,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_5-test: # Testing + manywheel-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_5-build + needs: manywheel-py3_7-cuda11_6-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1242,7 +1242,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-cuda11_5 + name: manywheel-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1335,18 +1335,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_5-upload: # Uploading + manywheel-py3_7-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_5-test + needs: manywheel-py3_7-cuda11_6-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1375,7 +1375,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-cuda11_5 + name: manywheel-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -1428,7 +1428,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_6-build: + manywheel-py3_7-rocm4_5_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1436,10 +1436,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1486,9 +1486,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -1532,7 +1529,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_7-cuda11_6 + name: manywheel-py3_7-rocm4_5_2 retention-days: 14 if-no-files-found: error path: @@ -1555,46 +1552,62 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_6-test: # Testing + manywheel-py3_7-rocm4_5_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_6-build - runs-on: linux.4xlarge.nvidia.gpu + needs: manywheel-py3_7-rocm4_5_2-build + runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Set DOCKER_HOST + run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" + - name: Runner health check system info + if: always() + run: | + cat /etc/os-release || true + cat /etc/apt/sources.list.d/rocm.list || true + cat /opt/rocm/.info/version || true + whoami + - name: Runner health check rocm-smi + if: always() + run: | + rocm-smi + - name: Runner health check rocminfo + if: always() + run: | + rocminfo + - name: Runner health check GPU count + if: always() + run: | + ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') + if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then + echo "Failed to detect GPUs on the runner" + exit 1 + fi + - name: Runner health check disconnect on failure + if: ${{ failure() }} + run: | + killall runsvc.sh + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-cuda11_6 + name: manywheel-py3_7-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -1619,17 +1632,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - name: Pull Docker image run: | retry () { @@ -1669,16 +1674,6 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | @@ -1687,18 +1682,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-cuda11_6-upload: # Uploading + manywheel-py3_7-rocm4_5_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-cuda11_6-test + needs: manywheel-py3_7-rocm4_5_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1727,7 +1722,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-cuda11_6 + name: manywheel-py3_7-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -1780,7 +1775,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm4_5_2-build: + manywheel-py3_7-rocm5_0-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -1788,10 +1783,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1881,7 +1876,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_7-rocm4_5_2 + name: manywheel-py3_7-rocm5_0 retention-days: 14 if-no-files-found: error path: @@ -1904,19 +1899,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm4_5_2-test: # Testing + manywheel-py3_7-rocm5_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm4_5_2-build + needs: manywheel-py3_7-rocm5_0-build runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -1959,7 +1954,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-rocm4_5_2 + name: manywheel-py3_7-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2034,18 +2029,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm4_5_2-upload: # Uploading + manywheel-py3_7-rocm5_0-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm4_5_2-test + needs: manywheel-py3_7-rocm5_0-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.7" steps: @@ -2074,7 +2069,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-rocm4_5_2 + name: manywheel-py3_7-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2127,7 +2122,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm5_0-build: + manywheel-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2135,12 +2130,11 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2228,7 +2222,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_7-rocm5_0 + name: manywheel-py3_8-cpu retention-days: 14 if-no-files-found: error path: @@ -2251,62 +2245,45 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm5_0-test: # Testing + manywheel-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm5_0-build - runs-on: linux.rocm.gpu + needs: manywheel-py3_8-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@master + - name: Setup Linux + uses: ./.github/actions/setup-linux + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: Set DOCKER_HOST - run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" - - name: Runner health check system info - if: always() - run: | - cat /etc/os-release || true - cat /etc/apt/sources.list.d/rocm.list || true - cat /opt/rocm/.info/version || true - whoami - - name: Runner health check rocm-smi - if: always() - run: | - rocm-smi - - name: Runner health check rocminfo - if: always() - run: | - rocminfo - - name: Runner health check GPU count - if: always() - run: | - ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') - if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then - echo "Failed to detect GPUs on the runner" - exit 1 - fi - - name: Runner health check disconnect on failure - if: ${{ failure() }} - run: | - killall runsvc.sh - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-rocm5_0 + name: manywheel-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2331,9 +2308,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: ROCm set GPU_FLAG - run: | - echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - name: Pull Docker image run: | retry () { @@ -2373,7 +2347,17 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Kill containers, clean up images + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . + - name: Kill containers, clean up images if: always() run: | # ignore expansion of "docker ps -q" since it could be empty @@ -2381,20 +2365,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_7-rocm5_0-upload: # Uploading + manywheel-py3_8-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_7-rocm5_0-test + needs: manywheel-py3_8-cpu-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2421,7 +2404,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_7-rocm5_0 + name: manywheel-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2474,7 +2457,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cpu-build: + manywheel-py3_8-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2482,9 +2465,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2574,7 +2558,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_8-cpu + name: manywheel-py3_8-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -2597,18 +2581,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cpu-test: # Testing + manywheel-py3_8-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cpu-build - runs-on: linux.4xlarge + needs: manywheel-py3_8-cuda10_2-build + runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2635,7 +2620,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cpu + name: manywheel-py3_8-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2660,6 +2645,17 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a + name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + with: + timeout_minutes: 10 + max_attempts: 3 + command: | + set -ex + pushd pytorch + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + popd - name: Pull Docker image run: | retry () { @@ -2717,17 +2713,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cpu-upload: # Uploading + manywheel-py3_8-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cpu-test + needs: manywheel-py3_8-cuda10_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2756,7 +2753,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cpu + name: manywheel-py3_8-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2809,7 +2806,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda10_2-build: + manywheel-py3_8-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -2817,10 +2814,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2867,6 +2864,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -2910,7 +2910,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_8-cuda10_2 + name: manywheel-py3_8-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -2933,19 +2933,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda10_2-test: # Testing + manywheel-py3_8-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda10_2-build + needs: manywheel-py3_8-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2972,7 +2972,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda10_2 + name: manywheel-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3065,18 +3065,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda10_2-upload: # Uploading + manywheel-py3_8-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda10_2-test + needs: manywheel-py3_8-cuda11_3-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3105,7 +3105,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda10_2 + name: manywheel-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3158,7 +3158,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_3-build: + manywheel-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -3166,10 +3166,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3262,7 +3262,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_8-cuda11_3 + name: manywheel-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -3285,19 +3285,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_3-test: # Testing + manywheel-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_3-build + needs: manywheel-py3_8-cuda11_6-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3324,7 +3324,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda11_3 + name: manywheel-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3417,18 +3417,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_3-upload: # Uploading + manywheel-py3_8-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_3-test + needs: manywheel-py3_8-cuda11_6-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3457,7 +3457,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda11_3 + name: manywheel-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3510,7 +3510,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_5-build: + manywheel-py3_8-rocm4_5_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -3518,10 +3518,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3568,9 +3568,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -3614,7 +3611,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_8-cuda11_5 + name: manywheel-py3_8-rocm4_5_2 retention-days: 14 if-no-files-found: error path: @@ -3637,46 +3634,62 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_5-test: # Testing + manywheel-py3_8-rocm4_5_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_5-build - runs-on: linux.4xlarge.nvidia.gpu + needs: manywheel-py3_8-rocm4_5_2-build + runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Set DOCKER_HOST + run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" + - name: Runner health check system info + if: always() + run: | + cat /etc/os-release || true + cat /etc/apt/sources.list.d/rocm.list || true + cat /opt/rocm/.info/version || true + whoami + - name: Runner health check rocm-smi + if: always() + run: | + rocm-smi + - name: Runner health check rocminfo + if: always() + run: | + rocminfo + - name: Runner health check GPU count + if: always() + run: | + ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') + if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then + echo "Failed to detect GPUs on the runner" + exit 1 + fi + - name: Runner health check disconnect on failure + if: ${{ failure() }} + run: | + killall runsvc.sh + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda11_5 + name: manywheel-py3_8-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3701,17 +3714,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - name: Pull Docker image run: | retry () { @@ -3751,16 +3756,6 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | @@ -3769,18 +3764,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_5-upload: # Uploading + manywheel-py3_8-rocm4_5_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_5-test + needs: manywheel-py3_8-rocm4_5_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3809,7 +3804,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_8-cuda11_5 + name: manywheel-py3_8-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3862,7 +3857,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_6-build: + manywheel-py3_8-rocm5_0-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -3870,10 +3865,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -3920,9 +3915,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -3966,7 +3958,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_8-cuda11_6 + name: manywheel-py3_8-rocm5_0 retention-days: 14 if-no-files-found: error path: @@ -3989,1424 +3981,62 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_8-cuda11_6-test: # Testing + manywheel-py3_8-rocm5_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_6-build - runs-on: linux.4xlarge.nvidia.gpu + needs: manywheel-py3_8-rocm5_0-build + runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace + - name: Set DOCKER_HOST + run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" + - name: Runner health check system info if: always() run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images + cat /etc/os-release || true + cat /etc/apt/sources.list.d/rocm.list || true + cat /opt/rocm/.info/version || true + whoami + - name: Runner health check rocm-smi if: always() run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-cuda11_6-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-cuda11_6-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace + rocm-smi + - name: Runner health check rocminfo if: always() run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images + rocminfo + - name: Runner health check GPU count if: always() run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm4_5_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: manywheel-py3_8-rocm4_5_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm4_5_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm4_5_2-build - runs-on: linux.rocm.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: Set DOCKER_HOST - run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" - - name: Runner health check system info - if: always() - run: | - cat /etc/os-release || true - cat /etc/apt/sources.list.d/rocm.list || true - cat /opt/rocm/.info/version || true - whoami - - name: Runner health check rocm-smi - if: always() - run: | - rocm-smi - - name: Runner health check rocminfo - if: always() - run: | - rocminfo - - name: Runner health check GPU count - if: always() - run: | - ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') - if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then - echo "Failed to detect GPUs on the runner" - exit 1 - fi - - name: Runner health check disconnect on failure - if: ${{ failure() }} + ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') + if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then + echo "Failed to detect GPUs on the runner" + exit 1 + fi + - name: Runner health check disconnect on failure + if: ${{ failure() }} run: | killall runsvc.sh - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm4_5_2 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: ROCm set GPU_FLAG - run: | - echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm4_5_2-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm4_5_2-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm4_5_2 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm5_0-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: manywheel-py3_8-rocm5_0 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm5_0-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm5_0-build - runs-on: linux.rocm.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: Set DOCKER_HOST - run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" - - name: Runner health check system info - if: always() - run: | - cat /etc/os-release || true - cat /etc/apt/sources.list.d/rocm.list || true - cat /opt/rocm/.info/version || true - whoami - - name: Runner health check rocm-smi - if: always() - run: | - rocm-smi - - name: Runner health check rocminfo - if: always() - run: | - rocminfo - - name: Runner health check GPU count - if: always() - run: | - ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') - if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then - echo "Failed to detect GPUs on the runner" - exit 1 - fi - - name: Runner health check disconnect on failure - if: ${{ failure() }} - run: | - killall runsvc.sh - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm5_0 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: ROCm set GPU_FLAG - run: | - echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_8-rocm5_0-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_8-rocm5_0-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_8-rocm5_0 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: manywheel-py3_9-cpu - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cpu-build - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_9-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Test PyTorch binary - run: | - set -x - # shellcheck disable=SC2086,SC2090 - container_name=$(docker run \ - ${GPU_FLAG:-} \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - # Generate test script - docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-cpu-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cpu-test - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: manywheel-py3_9-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-cuda10_2-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: linux.4xlarge - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Build PyTorch binary - run: | - set -x - mkdir -p artifacts/ - container_name=$(docker run \ - -e BINARY_ENV_FILE \ - -e BUILDER_ROOT \ - -e BUILD_ENVIRONMENT \ - -e BUILD_SPLIT_CUDA \ - -e DESIRED_CUDA \ - -e DESIRED_DEVTOOLSET \ - -e DESIRED_PYTHON \ - -e GPU_ARCH_TYPE \ - -e GPU_ARCH_VERSION \ - -e IS_GHA \ - -e LIBTORCH_VARIANT \ - -e PACKAGE_TYPE \ - -e PYTORCH_FINAL_PACKAGE_DIR \ - -e PYTORCH_ROOT \ - -e SKIP_ALL_TESTS \ - --tty \ - --detach \ - -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ - -v "${GITHUB_WORKSPACE}/builder:/builder" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w / \ - "${DOCKER_IMAGE}" - ) - docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh" - - name: Chown artifacts - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - uses: seemethere/upload-artifact-s3@v4 - with: - name: manywheel-py3_9-cuda10_2 - retention-days: 14 - if-no-files-found: error - path: - ${{ runner.temp }}/artifacts/* - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - manywheel-py3_9-cuda10_2-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda10_2-build - runs-on: linux.4xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda10_2 + name: manywheel-py3_8-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -5431,17 +4061,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - name: Pull Docker image run: | retry () { @@ -5481,16 +4103,6 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | @@ -5499,20 +4111,20 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda10_2-upload: # Uploading + manywheel-py3_8-rocm5_0-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda10_2-test + needs: manywheel-py3_8-rocm5_0-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -5539,7 +4151,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda10_2 + name: manywheel-py3_8-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -5592,7 +4204,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_3-build: + manywheel-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -5600,10 +4212,9 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5650,9 +4261,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -5696,7 +4304,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_9-cuda11_3 + name: manywheel-py3_9-cpu retention-days: 14 if-no-files-found: error path: @@ -5719,19 +4327,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_3-test: # Testing + manywheel-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_3-build - runs-on: linux.4xlarge.nvidia.gpu + needs: manywheel-py3_9-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5758,7 +4365,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_3 + name: manywheel-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -5783,17 +4390,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - name: Pull Docker image run: | retry () { @@ -5851,18 +4447,17 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_3-upload: # Uploading + manywheel-py3_9-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_3-test + needs: manywheel-py3_9-cpu-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -5891,7 +4486,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_3 + name: manywheel-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -5944,7 +4539,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_5-build: + manywheel-py3_9-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -5952,10 +4547,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6002,9 +4597,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -6048,7 +4640,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_9-cuda11_5 + name: manywheel-py3_9-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -6071,19 +4663,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_5-test: # Testing + manywheel-py3_9-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_5-build + needs: manywheel-py3_9-cuda10_2-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6110,7 +4702,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_5 + name: manywheel-py3_9-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -6203,18 +4795,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_5-upload: # Uploading + manywheel-py3_9-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_5-test + needs: manywheel-py3_9-cuda10_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6243,7 +4835,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_5 + name: manywheel-py3_9-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -6296,7 +4888,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_6-build: + manywheel-py3_9-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -6304,10 +4896,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6400,7 +4992,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_9-cuda11_6 + name: manywheel-py3_9-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -6423,19 +5015,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_6-test: # Testing + manywheel-py3_9-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_6-build + needs: manywheel-py3_9-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6462,7 +5054,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_6 + name: manywheel-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -6555,18 +5147,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-cuda11_6-upload: # Uploading + manywheel-py3_9-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-cuda11_6-test + needs: manywheel-py3_9-cuda11_3-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6595,7 +5187,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-cuda11_6 + name: manywheel-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -6648,7 +5240,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm4_5_2-build: + manywheel-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -6656,10 +5248,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6706,6 +5298,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - name: Set BUILD_SPLIT_CUDA + run: | + echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -6749,7 +5344,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_9-rocm4_5_2 + name: manywheel-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: @@ -6772,62 +5367,46 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm4_5_2-test: # Testing + manywheel-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm4_5_2-build - runs-on: linux.rocm.gpu + needs: manywheel-py3_9-cuda11_6-build + runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@master + - name: Setup Linux + uses: ./.github/actions/setup-linux + - name: Chown workspace + run: | + retry () { + "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") + } + retry docker pull "${ALPINE_IMAGE}" + # Ensure the working directory gets chowned back to the current user + docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: Set DOCKER_HOST - run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" - - name: Runner health check system info - if: always() - run: | - cat /etc/os-release || true - cat /etc/apt/sources.list.d/rocm.list || true - cat /opt/rocm/.info/version || true - whoami - - name: Runner health check rocm-smi - if: always() - run: | - rocm-smi - - name: Runner health check rocminfo - if: always() - run: | - rocminfo - - name: Runner health check GPU count - if: always() - run: | - ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') - if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then - echo "Failed to detect GPUs on the runner" - exit 1 - fi - - name: Runner health check disconnect on failure - if: ${{ failure() }} - run: | - killall runsvc.sh - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: seemethere/add-github-ssh-key@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-rocm4_5_2 + name: manywheel-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -6852,9 +5431,17 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: ROCm set GPU_FLAG - run: | - echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" + - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a + name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + with: + timeout_minutes: 10 + max_attempts: 3 + command: | + set -ex + pushd pytorch + bash .github/scripts/install_nvidia_utils_linux.sh + echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" + popd - name: Pull Docker image run: | retry () { @@ -6894,6 +5481,16 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" + - name: Hold runner for 2 hours or until ssh sessions have drained + working-directory: pytorch/ + # Always hold for active ssh sessions + if: always() + run: .github/scripts/wait_for_ssh_to_drain.sh + - name: Chown workspace + if: always() + run: | + # Ensure the working directory gets chowned back to the current user + docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | @@ -6902,18 +5499,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm4_5_2-upload: # Uploading + manywheel-py3_9-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm4_5_2-test + needs: manywheel-py3_9-cuda11_6-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm4.5.2 - GPU_ARCH_VERSION: 4.5.2 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.6 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -6942,7 +5539,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-rocm4_5_2 + name: manywheel-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -6995,7 +5592,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm5_0-build: + manywheel-py3_9-rocm4_5_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -7003,10 +5600,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -7096,7 +5693,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_9-rocm5_0 + name: manywheel-py3_9-rocm4_5_2 retention-days: 14 if-no-files-found: error path: @@ -7119,19 +5716,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm5_0-test: # Testing + manywheel-py3_9-rocm4_5_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm5_0-build + needs: manywheel-py3_9-rocm4_5_2-build runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -7174,7 +5771,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-rocm5_0 + name: manywheel-py3_9-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -7249,18 +5846,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_9-rocm5_0-upload: # Uploading + manywheel-py3_9-rocm4_5_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_9-rocm5_0-test + needs: manywheel-py3_9-rocm4_5_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm5.0 - GPU_ARCH_VERSION: 5.0 + DESIRED_CUDA: rocm4.5.2 + GPU_ARCH_VERSION: 4.5.2 GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 + DOCKER_IMAGE: pytorch/manylinux-builder:rocm4.5.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -7289,7 +5886,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_9-rocm5_0 + name: manywheel-py3_9-rocm4_5_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -7342,7 +5939,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cpu-build: + manywheel-py3_9-rocm5_0-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -7350,11 +5947,12 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -7442,7 +6040,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_10-cpu + name: manywheel-py3_9-rocm5_0 retention-days: 14 if-no-files-found: error path: @@ -7465,45 +6063,62 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cpu-test: # Testing + manywheel-py3_9-rocm5_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cpu-build - runs-on: linux.4xlarge + needs: manywheel-py3_9-rocm5_0-build + runs-on: linux.rocm.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Set DOCKER_HOST + run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" + - name: Runner health check system info + if: always() + run: | + cat /etc/os-release || true + cat /etc/apt/sources.list.d/rocm.list || true + cat /opt/rocm/.info/version || true + whoami + - name: Runner health check rocm-smi + if: always() + run: | + rocm-smi + - name: Runner health check rocminfo + if: always() + run: | + rocminfo + - name: Runner health check GPU count + if: always() + run: | + ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') + if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then + echo "Failed to detect GPUs on the runner" + exit 1 + fi + - name: Runner health check disconnect on failure + if: ${{ failure() }} + run: | + killall runsvc.sh + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cpu + name: manywheel-py3_9-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -7528,6 +6143,9 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - name: Pull Docker image run: | retry () { @@ -7567,16 +6185,6 @@ jobs: # Generate test script docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh" docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh" - - name: Hold runner for 2 hours or until ssh sessions have drained - working-directory: pytorch/ - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | @@ -7585,19 +6193,20 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cpu-upload: # Uploading + manywheel-py3_9-rocm5_0-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cpu-test + needs: manywheel-py3_9-rocm5_0-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu + DESIRED_CUDA: rocm5.0 + GPU_ARCH_VERSION: 5.0 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: pytorch/manylinux-builder:rocm5.0 SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -7624,7 +6233,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cpu + name: manywheel-py3_9-rocm5_0 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -7677,7 +6286,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda10_2-build: + manywheel-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -7685,10 +6294,9 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -7778,7 +6386,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_10-cuda10_2 + name: manywheel-py3_10-cpu retention-days: 14 if-no-files-found: error path: @@ -7801,19 +6409,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda10_2-test: # Testing + manywheel-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda10_2-build - runs-on: linux.4xlarge.nvidia.gpu + needs: manywheel-py3_10-cpu-build + runs-on: linux.4xlarge timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -7840,7 +6447,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda10_2 + name: manywheel-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -7865,17 +6472,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG - with: - timeout_minutes: 10 - max_attempts: 3 - command: | - set -ex - pushd pytorch - bash .github/scripts/install_nvidia_utils_linux.sh - echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}" - popd - name: Pull Docker image run: | retry () { @@ -7933,18 +6529,17 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda10_2-upload: # Uploading + manywheel-py3_10-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda10_2-test + needs: manywheel-py3_10-cpu-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu102 - GPU_ARCH_VERSION: 10.2 - GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: pytorch/manylinux-builder:cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -7973,7 +6568,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda10_2 + name: manywheel-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -8026,7 +6621,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_3-build: + manywheel-py3_10-cuda10_2-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -8034,10 +6629,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8084,9 +6679,6 @@ jobs: # Remove any artifacts from the previous checkouts git clean -fxd working-directory: builder - - name: Set BUILD_SPLIT_CUDA - run: | - echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV" - name: Pull Docker image run: | retry () { @@ -8130,7 +6722,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_10-cuda11_3 + name: manywheel-py3_10-cuda10_2 retention-days: 14 if-no-files-found: error path: @@ -8153,19 +6745,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_3-test: # Testing + manywheel-py3_10-cuda10_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_3-build + needs: manywheel-py3_10-cuda10_2-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8192,7 +6784,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda11_3 + name: manywheel-py3_10-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -8285,18 +6877,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_3-upload: # Uploading + manywheel-py3_10-cuda10_2-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_3-test + needs: manywheel-py3_10-cuda10_2-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu102 + GPU_ARCH_VERSION: 10.2 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8325,7 +6917,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda11_3 + name: manywheel-py3_10-cuda10_2 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -8378,7 +6970,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_5-build: + manywheel-py3_10-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: linux.4xlarge timeout-minutes: 240 @@ -8386,10 +6978,10 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8482,7 +7074,7 @@ jobs: docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - uses: seemethere/upload-artifact-s3@v4 with: - name: manywheel-py3_10-cuda11_5 + name: manywheel-py3_10-cuda11_3 retention-days: 14 if-no-files-found: error path: @@ -8505,19 +7097,19 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_5-test: # Testing + manywheel-py3_10-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_5-build + needs: manywheel-py3_10-cuda11_3-build runs-on: linux.4xlarge.nvidia.gpu timeout-minutes: 240 env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8544,7 +7136,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda11_5 + name: manywheel-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -8637,18 +7229,18 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - manywheel-py3_10-cuda11_5-upload: # Uploading + manywheel-py3_10-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: manywheel-py3_10-cuda11_5-test + needs: manywheel-py3_10-cuda11_3-test env: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.5 + DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.3 SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -8677,7 +7269,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: manywheel-py3_10-cuda11_5 + name: manywheel-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} diff --git a/.github/workflows/generated-windows-binary-conda-nightly.yml b/.github/workflows/generated-windows-binary-conda-nightly.yml index 9ecb827023a27..32dc4f4eb9452 100644 --- a/.github/workflows/generated-windows-binary-conda-nightly.yml +++ b/.github/workflows/generated-windows-binary-conda-nightly.yml @@ -635,1208 +635,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_7-cuda11_5-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: conda-py3_7-cuda11_5 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_5-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_5-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_7-cuda11_5 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_5-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_5-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_7-cuda11_5 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_7-cuda11_6-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: conda-py3_7-cuda11_6 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_6-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_7-cuda11_6 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_7-cuda11_6-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_7-cuda11_6-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_7-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: conda-py3_8-cpu - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-build - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cpu - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cpu-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cpu-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: conda-py3_8-cuda11_3 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_3 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_3-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_3-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: conda - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: conda-py3_8-cuda11_3 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - conda-py3_8-cuda11_5-build: + conda-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1846,11 +645,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1923,7 +722,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_8-cuda11_5 + name: conda-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1940,9 +739,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_5-test: # Testing + conda-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_5-build + needs: conda-py3_7-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1951,11 +750,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1997,7 +796,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_5 + name: conda-py3_7-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2043,21 +842,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_5-upload: # Uploading + conda-py3_7-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_5-test + needs: conda-py3_7-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2084,7 +883,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_5 + name: conda-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2137,7 +936,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_8-cuda11_6-build: + conda-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2147,9 +946,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2224,7 +1022,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_8-cuda11_6 + name: conda-py3_8-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2241,10 +1039,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_6-test: # Testing + conda-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_8-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2252,9 +1050,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2298,7 +1095,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_6 + name: conda-py3_8-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2344,19 +1141,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_8-cuda11_6-upload: # Uploading + conda-py3_8-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_8-cuda11_6-test + needs: conda-py3_8-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2385,7 +1181,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_8-cuda11_6 + name: conda-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2438,7 +1234,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cpu-build: + conda-py3_8-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2448,10 +1244,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2524,7 +1321,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_9-cpu + name: conda-py3_8-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2541,10 +1338,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cpu-test: # Testing + conda-py3_8-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-build - runs-on: windows.4xlarge + needs: conda-py3_8-cuda11_3-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2552,10 +1349,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2597,7 +1395,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cpu + name: conda-py3_8-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2643,20 +1441,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cpu-upload: # Uploading + conda-py3_8-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cpu-test + needs: conda-py3_8-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2683,7 +1482,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cpu + name: conda-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2736,7 +1535,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_3-build: + conda-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2746,11 +1545,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2823,7 +1622,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_9-cuda11_3 + name: conda-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2840,9 +1639,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_3-test: # Testing + conda-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-build + needs: conda-py3_8-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -2851,11 +1650,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2897,7 +1696,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_3 + name: conda-py3_8-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2943,21 +1742,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_3-upload: # Uploading + conda-py3_8-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_3-test + needs: conda-py3_8-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2984,7 +1783,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_3 + name: conda-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3037,7 +1836,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_5-build: + conda-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3047,9 +1846,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3124,7 +1922,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3141,10 +1939,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_5-test: # Testing + conda-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_5-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_9-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -3152,9 +1950,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3198,7 +1995,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3244,19 +2041,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_5-upload: # Uploading + conda-py3_9-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_5-test + needs: conda-py3_9-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3285,7 +2081,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_5 + name: conda-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3338,7 +2134,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_9-cuda11_6-build: + conda-py3_9-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3348,8 +2144,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3425,7 +2221,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3442,9 +2238,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_6-test: # Testing + conda-py3_9-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-build + needs: conda-py3_9-cuda11_3-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -3453,8 +2249,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3499,7 +2295,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3545,18 +2341,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_9-cuda11_6-upload: # Uploading + conda-py3_9-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_9-cuda11_6-test + needs: conda-py3_9-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3586,7 +2382,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_9-cuda11_6 + name: conda-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3639,7 +2435,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cpu-build: + conda-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3649,10 +2445,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -3725,7 +2522,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3742,10 +2539,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cpu-test: # Testing + conda-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-build - runs-on: windows.4xlarge + needs: conda-py3_9-cuda11_6-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -3753,10 +2550,11 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -3798,7 +2596,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3844,20 +2642,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cpu-upload: # Uploading + conda-py3_9-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cpu-test + needs: conda-py3_9-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -3884,7 +2683,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cpu + name: conda-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3937,7 +2736,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_3-build: + conda-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3947,9 +2746,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4024,7 +2822,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4041,10 +2839,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_3-test: # Testing + conda-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu + needs: conda-py3_10-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -4052,9 +2850,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4098,7 +2895,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4144,19 +2941,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_3-upload: # Uploading + conda-py3_10-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_3-test + needs: conda-py3_10-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4185,7 +2981,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_3 + name: conda-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -4238,7 +3034,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - conda-py3_10-cuda11_5-build: + conda-py3_10-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -4248,8 +3044,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4325,7 +3121,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4342,9 +3138,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_5-test: # Testing + conda-py3_10-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_5-build + needs: conda-py3_10-cuda11_3-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -4353,8 +3149,8 @@ jobs: PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4399,7 +3195,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4445,18 +3241,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - conda-py3_10-cuda11_5-upload: # Uploading + conda-py3_10-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: conda-py3_10-cuda11_5-test + needs: conda-py3_10-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: conda # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4486,7 +3282,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: conda-py3_10-cuda11_5 + name: conda-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml index 7b0cb7c904867..5983d1b4212e7 100644 --- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml +++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml @@ -2528,1258 +2528,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - libtorch-cuda11_5-shared-with-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-shared-with-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-with-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-with-deps-debug-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-debug-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-debug - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-shared-without-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-without-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-without-deps-debug-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-debug-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-debug - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-static-with-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-with-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-with-deps-debug-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-debug-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-debug - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-debug-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-static-without-deps-debug - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-without-deps-debug-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-debug-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-debug - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-without-deps-debug-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-debug-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: debug - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-debug - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af libtorch-cuda11_6-shared-with-deps-debug-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml index 268c823703604..2ecfafae499f3 100644 --- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml +++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml @@ -2528,1258 +2528,6 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - libtorch-cuda11_5-shared-with-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-shared-with-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-with-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-with-deps-release-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-with-deps-release-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-with-deps-release - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-shared-without-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-shared-without-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-without-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-shared-without-deps-release-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-shared-without-deps-release-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: shared-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-shared-without-deps-release - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-with-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-static-with-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-with-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-with-deps-release-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-with-deps-release-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-with-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-with-deps-release - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - libtorch-cuda11_5-static-without-deps-release-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: libtorch-cuda11_5-static-without-deps-release - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-without-deps-release-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-release-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-release - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - libtorch-cuda11_5-static-without-deps-release-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: libtorch-cuda11_5-static-without-deps-release-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: libtorch - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - LIBTORCH_CONFIG: release - LIBTORCH_VARIANT: static-without-deps - # This is a dummy value for libtorch to work correctly with our batch scripts - # without this value pip does not get installed for some reason - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: libtorch-cuda11_5-static-without-deps-release - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af libtorch-cuda11_6-shared-with-deps-release-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge diff --git a/.github/workflows/generated-windows-binary-wheel-nightly.yml b/.github/workflows/generated-windows-binary-wheel-nightly.yml index 7eea8b6a489ac..12a8b5661f4ec 100644 --- a/.github/workflows/generated-windows-binary-wheel-nightly.yml +++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml @@ -635,1208 +635,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_7-cuda11_5-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: wheel-py3_7-cuda11_5 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_5-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_5-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_7-cuda11_5 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_5-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_5-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_7-cuda11_5 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - wheel-py3_7-cuda11_6-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: wheel-py3_7-cuda11_6 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_6-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_7-cuda11_6 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_7-cuda11_6-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_7-cuda11_6-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.7" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_7-cuda11_6 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - wheel-py3_8-cpu-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: wheel-py3_8-cpu - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cpu-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cpu-build - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_8-cpu - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cpu-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cpu-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_8-cpu - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - wheel-py3_8-cuda11_3-build: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: windows.4xlarge - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Build PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - - uses: seemethere/upload-artifact-s3@v4 - if: always() - with: - name: wheel-py3_8-cuda11_3 - retention-days: 14 - if-no-files-found: error - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_3-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu - timeout-minutes: 240 - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - echo "system info $(uname -a)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 - - name: Enable long paths on Windows - shell: powershell - run: | - Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 - # Since it's just a defensive command, the workflow should continue even the command fails - - name: Disables Windows Defender scheduled and real-time scanning for files in pytorch directory. - shell: powershell - run: | - Add-MpPreference -ExclusionPath $(Get-Location).tostring() -ErrorAction Ignore - # NOTE: These environment variables are put here so that they can be applied on every job equally - # They are also here because setting them at a workflow level doesn't give us access to the - # runner.temp variable, which we need. - - name: Populate binary env - shell: bash - run: | - echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" - echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" - echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_8-cuda11_3 - path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: Checkout pytorch/builder - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: main - submodules: recursive - repository: pytorch/builder - path: builder - - name: Clean pytorch/builder checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: builder - - name: Populate binary env - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - - name: Test PyTorch binary - shell: bash - run: | - "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - - name: Wait until all sessions have drained - shell: powershell - working-directory: pytorch - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - working-directory: pytorch - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_3-upload: # Uploading - runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts - if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_3-test - env: - PYTORCH_ROOT: ${{ github.workspace }}/pytorch - BUILDER_ROOT: ${{ github.workspace }}/builder - PACKAGE_TYPE: wheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda - SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" - steps: - - name: Checkout PyTorch - uses: pytorch/pytorch/.github/actions/checkout-pytorch@master - - name: Setup Linux - uses: ./.github/actions/setup-linux - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Clone pytorch/pytorch - uses: actions/checkout@v2 - - uses: seemethere/download-artifact-s3@v3 - name: Download Build Artifacts - with: - name: wheel-py3_8-cuda11_3 - path: "${{ runner.temp }}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} - run: | - echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) - if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/') }} - run: | - # reference ends with an RC suffix - if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then - echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV" - fi - - name: Upload binaries - env: - PKG_DIR: "${{ runner.temp }}/artifacts" - UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}" - # When running these on pull_request events these should be blank - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }} - ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} - run: | - docker run --rm -i \ - -e ANACONDA_API_TOKEN \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY \ - -e DRY_RUN \ - -e PACKAGE_TYPE \ - -e PKG_DIR=/artifacts \ - -e UPLOAD_CHANNEL \ - -e UPLOAD_SUBFOLDER \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -v "${GITHUB_WORKSPACE}:/v" \ - -w /v \ - 308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \ - bash -c '.circleci/scripts/binary_upload.sh' - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - wheel-py3_8-cuda11_5-build: + wheel-py3_7-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -1846,11 +645,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1923,7 +722,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_8-cuda11_5 + name: wheel-py3_7-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1940,9 +739,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_5-test: # Testing + wheel-py3_7-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_5-build + needs: wheel-py3_7-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -1951,11 +750,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Display EC2 information shell: bash @@ -1997,7 +796,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_5 + name: wheel-py3_7-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2043,21 +842,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_5-upload: # Uploading + wheel-py3_7-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_5-test + needs: wheel-py3_7-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.8" + DESIRED_PYTHON: "3.7" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2084,7 +883,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_5 + name: wheel-py3_7-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2137,7 +936,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_8-cuda11_6-build: + wheel-py3_8-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2147,9 +946,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2224,7 +1022,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_8-cuda11_6 + name: wheel-py3_8-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2241,10 +1039,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_6-test: # Testing + wheel-py3_8-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_6-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_8-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2252,9 +1050,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2298,7 +1095,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_6 + name: wheel-py3_8-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2344,19 +1141,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_8-cuda11_6-upload: # Uploading + wheel-py3_8-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_8-cuda11_6-test + needs: wheel-py3_8-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.8" steps: @@ -2385,7 +1181,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_8-cuda11_6 + name: wheel-py3_8-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2438,7 +1234,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_9-cpu-build: + wheel-py3_8-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2448,10 +1244,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2524,7 +1321,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_9-cpu + name: wheel-py3_8-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2541,10 +1338,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cpu-test: # Testing + wheel-py3_8-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cpu-build - runs-on: windows.4xlarge + needs: wheel-py3_8-cuda11_3-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -2552,10 +1349,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2597,7 +1395,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cpu + name: wheel-py3_8-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2643,20 +1441,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cpu-upload: # Uploading + wheel-py3_8-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cpu-test + needs: wheel-py3_8-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2683,7 +1482,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cpu + name: wheel-py3_8-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -2736,7 +1535,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_9-cuda11_3-build: + wheel-py3_8-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -2746,11 +1545,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2823,7 +1622,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_9-cuda11_3 + name: wheel-py3_8-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2840,9 +1639,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_3-test: # Testing + wheel-py3_8-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_3-build + needs: wheel-py3_8-cuda11_6-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -2851,11 +1650,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Display EC2 information shell: bash @@ -2897,7 +1696,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_3 + name: wheel-py3_8-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -2943,21 +1742,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_3-upload: # Uploading + wheel-py3_8-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_3-test + needs: wheel-py3_8-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.9" + DESIRED_PYTHON: "3.8" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -2984,7 +1783,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_3 + name: wheel-py3_8-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3037,7 +1836,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_9-cuda11_5-build: + wheel-py3_9-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3047,9 +1846,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3124,7 +1922,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_9-cuda11_5 + name: wheel-py3_9-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3141,10 +1939,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_5-test: # Testing + wheel-py3_9-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_5-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_9-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -3152,9 +1950,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3198,7 +1995,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_5 + name: wheel-py3_9-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3244,19 +2041,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_5-upload: # Uploading + wheel-py3_9-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_5-test + needs: wheel-py3_9-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" steps: @@ -3285,7 +2081,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_5 + name: wheel-py3_9-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3338,7 +2134,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_9-cuda11_6-build: + wheel-py3_9-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3348,8 +2144,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3425,7 +2221,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_9-cuda11_6 + name: wheel-py3_9-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3442,9 +2238,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_6-test: # Testing + wheel-py3_9-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_6-build + needs: wheel-py3_9-cuda11_3-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -3453,8 +2249,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3499,7 +2295,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_6 + name: wheel-py3_9-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3545,18 +2341,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_9-cuda11_6-upload: # Uploading + wheel-py3_9-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_9-cuda11_6-test + needs: wheel-py3_9-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu116 - GPU_ARCH_VERSION: 11.6 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.9" @@ -3586,7 +2382,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_9-cuda11_6 + name: wheel-py3_9-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3639,7 +2435,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_10-cpu-build: + wheel-py3_9-cuda11_6-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3649,10 +2445,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -3725,7 +2522,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_10-cpu + name: wheel-py3_9-cuda11_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3742,10 +2539,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cpu-test: # Testing + wheel-py3_9-cuda11_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cpu-build - runs-on: windows.4xlarge + needs: wheel-py3_9-cuda11_6-build + runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -3753,10 +2550,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Display EC2 information shell: bash @@ -3798,7 +2596,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cpu + name: wheel-py3_9-cuda11_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -3844,20 +2642,21 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cpu-upload: # Uploading + wheel-py3_9-cuda11_6-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cpu-test + needs: wheel-py3_9-cuda11_6-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu116 + GPU_ARCH_VERSION: 11.6 + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.10" + DESIRED_PYTHON: "3.9" steps: - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@master @@ -3884,7 +2683,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cpu + name: wheel-py3_9-cuda11_6 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -3937,7 +2736,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_10-cuda11_3-build: + wheel-py3_10-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -3947,9 +2746,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4024,7 +2822,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_10-cuda11_3 + name: wheel-py3_10-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4041,10 +2839,10 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_3-test: # Testing + wheel-py3_10-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_3-build - runs-on: windows.8xlarge.nvidia.gpu + needs: wheel-py3_10-cpu-build + runs-on: windows.4xlarge timeout-minutes: 240 env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch @@ -4052,9 +2850,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4098,7 +2895,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cuda11_3 + name: wheel-py3_10-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4144,19 +2941,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_3-upload: # Uploading + wheel-py3_10-cpu-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_3-test + needs: wheel-py3_10-cpu-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu113 - GPU_ARCH_VERSION: 11.3 - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -4185,7 +2981,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cuda11_3 + name: wheel-py3_10-cpu path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }} @@ -4238,7 +3034,7 @@ jobs: docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af - wheel-py3_10-cuda11_5-build: + wheel-py3_10-cuda11_3-build: if: ${{ github.repository_owner == 'pytorch' }} runs-on: windows.4xlarge timeout-minutes: 240 @@ -4248,8 +3044,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4325,7 +3121,7 @@ jobs: - uses: seemethere/upload-artifact-s3@v4 if: always() with: - name: wheel-py3_10-cuda11_5 + name: wheel-py3_10-cuda11_3 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4342,9 +3138,9 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_5-test: # Testing + wheel-py3_10-cuda11_3-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_5-build + needs: wheel-py3_10-cuda11_3-build runs-on: windows.8xlarge.nvidia.gpu timeout-minutes: 240 env: @@ -4353,8 +3149,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4399,7 +3195,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cuda11_5 + name: wheel-py3_10-cuda11_3 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Checkout PyTorch uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 @@ -4445,18 +3241,18 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda11_5-upload: # Uploading + wheel-py3_10-cuda11_3-upload: # Uploading runs-on: linux.2xlarge # self hosted runner to download ec2 artifacts if: ${{ github.repository_owner == 'pytorch' }} - needs: wheel-py3_10-cuda11_5-test + needs: wheel-py3_10-cuda11_3-test env: PYTORCH_ROOT: ${{ github.workspace }}/pytorch BUILDER_ROOT: ${{ github.workspace }}/builder PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu115 - GPU_ARCH_VERSION: 11.5 + DESIRED_CUDA: cu113 + GPU_ARCH_VERSION: 11.3 GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -4486,7 +3282,7 @@ jobs: - uses: seemethere/download-artifact-s3@v3 name: Download Build Artifacts with: - name: wheel-py3_10-cuda11_5 + name: wheel-py3_10-cuda11_3 path: "${{ runner.temp }}/artifacts/" - name: Set DRY_RUN (only for tagged pushes) if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}