Skip to content

Commit

Permalink
Implement test filtering (Lightning-AI#742)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca authored Nov 17, 2023
1 parent f475bd5 commit 0a202f6
Show file tree
Hide file tree
Showing 13 changed files with 205 additions and 83 deletions.
12 changes: 11 additions & 1 deletion .github/azure-gpu-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,14 @@ jobs:
displayName: "Env details"
- bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
displayName: 'Testing'
displayName: 'Ordinary tests'
env:
PL_RUN_CUDA_TESTS: "1"
timeoutInMinutes: "5"

- bash: bash run_standalone_tests.sh
workingDirectory: tests
env:
PL_RUN_CUDA_TESTS: "1"
displayName: "Standalone tests"
timeoutInMinutes: "5"
10 changes: 6 additions & 4 deletions .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,17 @@ jobs:
echo "$modules"
python -c "$modules"
- name: Run tests without the package installed
- name: Install all dependencies
run: |
pip install -r requirements-all.txt pytest pytest-rerunfailures transformers einops protobuf
pip install -r requirements-all.txt pytest pytest-rerunfailures pytest-timeout transformers einops protobuf
pip list
pytest -v --disable-pytest-warnings --strict-markers --color=yes
- name: Run tests without the package installed
run: |
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 60
- name: Run tests
run: |
pip install . --no-deps
pytest -v --disable-pytest-warnings --strict-markers --color=yes
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 60
15 changes: 7 additions & 8 deletions pretrain/tinyllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,13 @@ def train(fabric, state, train_dataloader, val_dataloader, resume):
if curr_iter < initial_iter:
curr_iter += 1
continue
else:
resume = False
curr_iter = -1
fabric.barrier()
fabric.print(
"Resuming data loader finished."
f"Took {time.perf_counter() - total_t0:.1f} seconds to reach iteration {initial_iter}."
)
resume = False
curr_iter = -1
fabric.barrier()
fabric.print(
"Resuming data loader finished."
f"Took {time.perf_counter() - total_t0:.1f} seconds to reach iteration {initial_iter}."
)

if state["iter_num"] >= max_iters:
break
Expand Down
52 changes: 52 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import sys
from pathlib import Path
from typing import List

import pytest
import torch
from lightning.fabric.utilities.testing import _runif_reasons

wd = Path(__file__).parent.parent.absolute()

Expand Down Expand Up @@ -40,3 +42,53 @@ def tensor_like():
def restore_default_dtype():
# just in case
torch.set_default_dtype(torch.float32)


def RunIf(**kwargs):
reasons, marker_kwargs = _runif_reasons(**kwargs)
return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs)


# https://github.com/Lightning-AI/lightning/blob/6e517bd55b50166138ce6ab915abd4547702994b/tests/tests_fabric/conftest.py#L140
def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.Config) -> None:
initial_size = len(items)
conditions = []
filtered, skipped = 0, 0

options = {"standalone": "PL_RUN_STANDALONE_TESTS", "min_cuda_gpus": "PL_RUN_CUDA_TESTS"}
if os.getenv(options["standalone"], "0") == "1" and os.getenv(options["min_cuda_gpus"], "0") == "1":
# special case: we don't have a CPU job for standalone tests, so we shouldn't run only cuda tests.
# by deleting the key, we avoid filtering out the CPU tests
del options["min_cuda_gpus"]

for kwarg, env_var in options.items():
# this will compute the intersection of all tests selected per environment variable
if os.getenv(env_var, "0") == "1":
conditions.append(env_var)
for i, test in reversed(list(enumerate(items))): # loop in reverse, since we are going to pop items
already_skipped = any(marker.name == "skip" for marker in test.own_markers)
if already_skipped:
# the test was going to be skipped anyway, filter it out
items.pop(i)
skipped += 1
continue
has_runif_with_kwarg = any(
marker.name == "skipif" and marker.kwargs.get(kwarg) for marker in test.own_markers
)
if not has_runif_with_kwarg:
# the test has `@RunIf(kwarg=True)`, filter it out
items.pop(i)
filtered += 1

if config.option.verbose >= 0 and (filtered or skipped):
writer = config.get_terminal_writer()
writer.write(
(
f"\nThe number of tests has been filtered from {initial_size} to {initial_size - filtered} after the"
f" filters {conditions}.\n{skipped} tests are marked as unconditional skips.\nIn total,"
f" {len(items)} tests will run.\n"
),
flush=True,
bold=True,
purple=True, # oh yeah, branded pytest messages
)
78 changes: 78 additions & 0 deletions tests/run_standalone_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash
set -e

# Batch size for testing: Determines how many standalone test invocations run in parallel
# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE
test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-1}"

# this environment variable allows special tests to run
export PL_RUN_STANDALONE_TESTS=1
# python arguments
defaults="-m pytest --no-header -v --disable-pytest-warnings --strict-markers --color=yes -s --timeout 120"
echo "Using defaults: ${defaults}"

# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')

# file paths, remove duplicates
files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq)

# get the list of parametrizations. we need to call them separately. the last two lines are removed.
# note: if there's a syntax error, this will fail with some garbled output
if [[ "$OSTYPE" == "darwin"* ]]; then
parametrizations=$(python3 -m pytest $files --collect-only --quiet --disable-pytest-warnings "$@" | tail -r | sed -e '1,3d' | tail -r)
else
parametrizations=$(python3 -m pytest $files --collect-only --quiet --disable-pytest-warnings "$@" | head -n -2)
fi
# remove the "tests/" path suffix
path_suffix=$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345
parametrizations=${parametrizations//$path_suffix/}
parametrizations_arr=($parametrizations)

report=''

rm -f standalone_test_output.txt # in case it exists, remove it
function show_batched_output {
if [ -f standalone_test_output.txt ]; then # if exists
cat standalone_test_output.txt
# heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail
if grep -iE 'error|exception|traceback|failed' standalone_test_output.txt | grep -qvE 'on_exception|xfailed'; then
echo "Potential error! Stopping."
rm standalone_test_output.txt
exit 1
fi
rm standalone_test_output.txt
fi
}
trap show_batched_output EXIT # show the output on exit

for i in "${!parametrizations_arr[@]}"; do
parametrization=${parametrizations_arr[$i]}
prefix="$((i+1))/${#parametrizations_arr[@]}"

echo "$prefix: Running $parametrization"
# execute the test in the background
# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
# output to std{out,err} because the outputs would be garbled together
python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
# save the PID in an array
pids[${i}]=$!
# add row to the final report
report+="Ran\t$parametrization\n"

if ((($i + 1) % $test_batch_size == 0)); then
# wait for running tests
for pid in ${pids[*]}; do wait $pid; done
unset pids # empty the array
show_batched_output
fi
done
# wait for leftover tests
for pid in ${pids[*]}; do wait $pid; done
show_batched_output

# echo test report
printf '=%.s' {1..80}
printf "\n$report"
printf '=%.s' {1..80}
printf '\n'
5 changes: 2 additions & 3 deletions tests/test_adapter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import sys
from contextlib import redirect_stdout
from dataclasses import asdict
from io import StringIO
from unittest.mock import Mock

import pytest
import torch
from conftest import RunIf
from lightning import Fabric


Expand Down Expand Up @@ -106,7 +105,7 @@ def test_adapter_gpt_init_weights():
assert (param == 0).all()


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_adapter_compile():
from lit_gpt.adapter import GPT
Expand Down
5 changes: 2 additions & 3 deletions tests/test_adapter_v2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import sys
from contextlib import redirect_stdout
from io import StringIO
from unittest.mock import Mock

import pytest
import torch
from conftest import RunIf
from lightning import Fabric


Expand Down Expand Up @@ -135,7 +134,7 @@ def test_base_model_can_be_adapter_v2_loaded():
assert adapter_filter(k, None)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_adapter_v2_compile():
from lit_gpt.adapter_v2 import GPT
Expand Down
17 changes: 17 additions & 0 deletions tests/test_ci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# this file is just to validate on the CI logs that these tests were run
from conftest import RunIf


@RunIf(min_cuda_gpus=1)
def test_runif_min_cuda_gpus():
assert True


@RunIf(min_cuda_gpus=1, standalone=True)
def test_runif_min_cuda_gpus_standalone():
assert True


@RunIf(standalone=True)
def test_runif_standalone():
assert True
18 changes: 3 additions & 15 deletions tests/test_convert_lit_checkpoint.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import operator
import os
from dataclasses import asdict
from pathlib import Path
Expand All @@ -8,7 +7,6 @@

import pytest
import torch
from lightning_utilities import compare_version

wd = Path(__file__).parent.parent.absolute()

Expand Down Expand Up @@ -115,18 +113,7 @@ def test_against_original_gpt_neox():

@torch.inference_mode()
@pytest.mark.parametrize(
"ours_kwargs",
[
{"name": "Llama-2-7b-hf"},
pytest.param(
{"name": "CodeLlama-7b-hf"},
marks=pytest.mark.skipif(
compare_version("transformers", operator.lt, "4.33.0", use_base_version=True),
reason="requires rope_theta",
),
),
{"name": "Llama-2-70b-chat-hf"},
],
"ours_kwargs", [{"name": "Llama-2-7b-hf"}, {"name": "CodeLlama-7b-hf"}, {"name": "Llama-2-70b-chat-hf"}]
)
def test_against_hf_llama2(ours_kwargs):
from transformers.models.llama.configuration_llama import LlamaConfig
Expand Down Expand Up @@ -207,9 +194,10 @@ def test_against_hf_phi():
if not file_path.is_file():
urlretrieve(url=url, filename=file_path)

from original_phi_1_5 import MixFormerSequentialConfig, MixFormerSequentialForCausalLM

from lit_gpt import GPT, Config
from scripts.convert_lit_checkpoint import copy_weights_phi
from tests.original_phi_1_5 import MixFormerSequentialConfig, MixFormerSequentialForCausalLM

ours_config = Config.from_name(
"phi-1_5", padded_vocab_size=10000, n_layer=2, n_head=4, n_embd=256, rotary_percentage=0.5
Expand Down
4 changes: 2 additions & 2 deletions tests/test_gptq.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import lightning as L
import pytest
import torch
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_2
from conftest import RunIf


@pytest.mark.skipif(_TORCH_GREATER_EQUAL_2_2, reason="Core dumped")
@RunIf(max_torch="2.2") # TODO: core dumped
def test_gptq_blockwise_quantization():
from quantize.gptq import _TRITON_AVAILABLE

Expand Down
6 changes: 3 additions & 3 deletions tests/test_lora.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import sys
from contextlib import redirect_stdout
from io import StringIO
from itertools import product
from unittest.mock import Mock

import pytest
import torch
from conftest import RunIf
from lightning import Fabric


Expand Down Expand Up @@ -351,7 +351,7 @@ def test_lora_qkv_linear_weights_merged_status(rank, enable_lora, expected_merge
assert layer.merged == expected_merged


@pytest.mark.skipif(not torch.cuda.is_available(), reason="8bit requires CUDA")
@RunIf(min_cuda_gpus=1)
# platform dependent cuda issue: libbitsandbytes_cpu.so: undefined symbol: cquantize_blockwise_fp16_nf4
@pytest.mark.xfail(raises=AttributeError, strict=False)
# https://github.com/Lightning-AI/lit-gpt/issues/513
Expand Down Expand Up @@ -456,7 +456,7 @@ def test_base_model_can_be_lora_loaded():
assert lora_filter(k, None)


@pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
@RunIf(dynamo=True)
@torch.inference_mode()
def test_lora_compile():
from lit_gpt.lora import GPT
Expand Down
Loading

0 comments on commit 0a202f6

Please sign in to comment.