.github/azure-gpu-test.yml

trigger:
  branches:
    include:
      - "main"
      - "wip"

pr:
  branches:
    include:
      - "main"
      - "wip"
      - "carmocca/*"

jobs:
  - job: testing
    timeoutInMinutes: "20"
    cancelTimeoutInMinutes: "2"
    pool: "lit-rtx-3090"
    variables:
      DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
    container:
      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
      options: "--gpus=all --shm-size=8gb"
    workspace:
      clean: all
    steps:

    - bash: |
        echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
      displayName: 'set env. vars'

    - bash: |
        echo $(DEVICES)
        echo $CUDA_VISIBLE_DEVICES
        whereis nvidia
        nvidia-smi
        which python && which pip
        python --version
        pip --version
        pip list
      displayName: "Image info & NVIDIA"

    - script: |
        pip install --upgrade pip
        pip install '.[all,test]'
      displayName: 'Install dependencies'

    - script: |
        pip uninstall -y torchvision torchaudio
        pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
      displayName: 'Install PyTorch nightly'

    - bash: |
        set -e
        pip list
        python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
      displayName: "Env details"

    - bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
      displayName: 'Ordinary tests'
      env:
        PL_RUN_CUDA_TESTS: "1"
      timeoutInMinutes: "5"

    - bash: bash run_standalone_tests.sh
      workingDirectory: tests
      env:
        PL_RUN_CUDA_TESTS: "1"
      displayName: "Standalone tests"
      timeoutInMinutes: "5"