.azure/gpu-tests.yml

# Create and test a Python package on multiple PyTorch versions.

trigger:
  tags:
    include:
      - "*"
  branches:
    include:
      - main
      - release/*
      - refs/tags/*
pr:
  - main
  - release/*

jobs:
  - job: serve_GPU
    # how long to run the job before automatically cancelling
    timeoutInMinutes: "20"
    # how much time to give 'run always even if cancelled tasks' before stopping them
    cancelTimeoutInMinutes: "2"

    pool: "lit-rtx-3090"

    variables:
      DEVICES: $( python -c 'name = "$(Agent.Name)" ; gpus = name.split("_")[-1] if "_" in name else "0,1"; print(gpus)' )
      # these two caches assume to run repetitively on the same set of machines
      #  see: https://github.com/microsoft/azure-pipelines-agent/issues/4113#issuecomment-1439241481
      TORCH_HOME: "/var/tmp/torch"
      PIP_CACHE_DIR: "/var/tmp/pip"

    container:
      image: "pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime"
      options: "--gpus=all --shm-size=8g -v /var/tmp:/var/tmp"

    workspace:
      clean: all

    steps:
      - bash: |
          echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
          CUDA_version=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p')
          CUDA_version_mm="${CUDA_version//'.'/''}"
          echo "##vso[task.setvariable variable=CUDA_VERSION_MM]$CUDA_version_mm"
          echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${CUDA_version_mm}/torch_stable.html"
        displayName: "set Env. vars"

      - bash: |
          whoami && id
          lspci | egrep 'VGA|3D'
          whereis nvidia
          nvidia-smi
          echo $CUDA_VISIBLE_DEVICES
          echo $TORCH_URL
          python --version
          pip --version
          pip cache dir
          pip list
        displayName: "Image info & NVIDIA"

      - bash: |
          pip install . -U --prefer-binary \
            -r ./_requirements/test.txt --find-links=${TORCH_URL}
        displayName: "Install environment"

      - bash: |
          pip list
          python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
        displayName: "Sanity check"

      - bash: |
          pip install -q py-tree
          py-tree /var/tmp/torch
        displayName: "Show caches"

      - bash: |
          coverage run --source litserve -m pytest src tests -v
        displayName: "Testing"

      - bash: |
          python -m coverage report
          python -m coverage xml
          python -m codecov --token=$(CODECOV_TOKEN) --name="GPU-coverage" \
            --commit=$(Build.SourceVersion) --flags=gpu,unittest --env=linux,azure
          ls -l
        displayName: "Statistics"

      - bash: |
          pip install jsonargparse torch torchvision -U -q --find-links=${TORCH_URL}
          export PYTHONPATH=$PWD && python tests/parity_fastapi/main.py

        displayName: "Run parity tests"