Reduce pytest prints (pytorch#117069)

* custom pytest-shard so I can control the verbosity (also index by 1 since it's confusing) * normal runs (not keep-going) always rerun each failed test 9 times (3 per process, 3 processes). Previously it would only run the entire test file 3 times, so if a test before you segfaulted, you only got 2 tries Example of quieter log https://github.com/pytorch/pytorch/actions/runs/7481334046/job/20363147497 "items in shard" only gets printed once at the beginning, and the reruns just say how many got skipped. Pull Request resolved: pytorch#117069 Approved by: https://github.com/huydhn
corey-lambda · Jan 18, 2024 · 2f89ef2 · 2f89ef2
1 parent e432b2e
commit 2f89ef2
Show file tree

Hide file tree

Showing 7 changed files with 122 additions and 46 deletions.
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -162,11 +162,6 @@ pytest-xdist==3.3.1
 #Pinned versions:
 #test that import:
 
-pytest-shard==0.1.2
-#Description: plugin spliting up tests in pytest
-#Pinned versions:
-#test that import:
-
 pytest-flakefinder==1.1.0
 #Description: plugin for rerunning tests a fixed number of times in pytest
 #Pinned versions: 1.1.0

diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt
@@ -16,7 +16,6 @@ pytest==7.3.2
 pytest-xdist==3.3.1
 pytest-rerunfailures==10.3
 pytest-flakefinder==1.1.0
-pytest-shard==0.1.2
 scipy==1.10.1
 sympy==1.11.1
 unittest-xml-reporting<=3.2.0,>=2.0.0

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -225,7 +225,7 @@ jobs:
           cache: pip
       - name: Install dependencies
         run: |
-          pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
+          pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
           pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
       - name: Run run_test.py (nonretryable)
         run: |

diff --git a/pytest.ini b/pytest.ini
@@ -8,6 +8,8 @@ addopts =
     --capture=sys
     # don't suppress warnings, but don't shove them all to the end either
     -p no:warnings
+    # Use custom pytest shard located in test/pytest_shard_custom.py instead
+    -p no:pytest-shard
     # don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844)
     --assert=plain
 testpaths =

diff --git a/test/conftest.py b/test/conftest.py
@@ -19,6 +19,7 @@
 import json
 import re
 from collections import defaultdict
+from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions
 
 # a lot of this file is copied from _pytest.junitxml and modified to get rerun info
 
@@ -84,6 +85,7 @@ def pytest_addoption(parser: Parser) -> None:
         "Emit XML for schema: one of legacy|xunit1|xunit2",
         default="xunit2",
     )
+    shard_addoptions(parser)
 
 
 def pytest_configure(config: Config) -> None:
@@ -105,6 +107,8 @@ def pytest_configure(config: Config) -> None:
         config.option.stepcurrent = config.getoption("stepcurrent_skip")
     if config.getoption("stepcurrent"):
         config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin")
+    if config.getoption("num_shards"):
+        config.pluginmanager.register(PytestShardPlugin(config), "pytestshardplugin")
 
 
 def pytest_unconfigure(config: Config) -> None:

diff --git a/test/pytest_shard_custom.py b/test/pytest_shard_custom.py
@@ -0,0 +1,67 @@
+"""
+Custom pytest shard plugin
+https://github.com/AdamGleave/pytest-shard/blob/64610a08dac6b0511b6d51cf895d0e1040d162ad/pytest_shard/pytest_shard.py#L1
+Modifications:
+* shards are now 1 indexed instead of 0 indexed
+* option for printing items in shard
+"""
+import hashlib
+
+from _pytest.config.argparsing import Parser
+
+
+def pytest_addoptions(parser: Parser):
+    """Add options to control sharding."""
+    group = parser.getgroup("shard")
+    group.addoption(
+        "--shard-id", dest="shard_id", type=int, default=1, help="Number of this shard."
+    )
+    group.addoption(
+        "--num-shards",
+        dest="num_shards",
+        type=int,
+        default=1,
+        help="Total number of shards.",
+    )
+    group.addoption(
+        "--print-items",
+        dest="print_items",
+        action="store_true",
+        default=False,
+        help="Print out the items being tested in this shard.",
+    )
+
+
+class PytestShardPlugin:
+    def __init__(self, config):
+        self.config = config
+
+    def pytest_report_collectionfinish(self, config, items) -> str:
+        """Log how many and which items are tested in this shard."""
+        msg = f"Running {len(items)} items in this shard"
+        if config.getoption("print_items"):
+            msg += ": " + ", ".join([item.nodeid for item in items])
+        return msg
+
+    def sha256hash(self, x: str) -> int:
+        return int.from_bytes(hashlib.sha256(x.encode()).digest(), "little")
+
+    def filter_items_by_shard(self, items, shard_id: int, num_shards: int):
+        """Computes `items` that should be tested in `shard_id` out of `num_shards` total shards."""
+        new_items = [
+            item
+            for item in items
+            if self.sha256hash(item.nodeid) % num_shards == shard_id - 1
+        ]
+        return new_items
+
+    def pytest_collection_modifyitems(self, config, items):
+        """Mutate the collection to consist of just items to be tested in this shard."""
+        shard_id = config.getoption("shard_id")
+        shard_total = config.getoption("num_shards")
+        if shard_id < 1 or shard_id > shard_total:
+            raise ValueError(
+                f"{shard_id} is not a valid shard ID out of {shard_total} total shards"
+            )
+
+        items[:] = self.filter_items_by_shard(items, shard_id, shard_total)
diff --git a/test/run_test.py b/test/run_test.py
@@ -517,11 +517,11 @@ def run_test(
     else:
         unittest_args.extend(
             [
-                f"--shard-id={test_module.shard - 1}",
+                f"--shard-id={test_module.shard}",
                 f"--num-shards={test_module.num_shards}",
             ]
         )
-        stepcurrent_key = f"{test_file}_{test_module.shard - 1}_{os.urandom(8).hex()}"
+        stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}"
 
     if options.verbose:
         unittest_args.append(f'-{"v"*options.verbose}')  # in case of pytest
@@ -540,7 +540,6 @@ def run_test(
         unittest_args.extend(
             get_pytest_args(
                 options,
-                stepcurrent_key,
                 is_cpp_test=is_cpp_test,
                 is_distributed_test=is_distributed_test,
             )
@@ -603,15 +602,15 @@ def run_test(
         os.close(log_fd)
 
     command = (launcher_cmd or []) + executable + argv
-    num_retries = 0 if "--subprocess" in command or RERUN_DISABLED_TESTS else 2
+    should_retry = "--subprocess" not in command and not RERUN_DISABLED_TESTS
     is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get(
         "BUILD_ENVRIONMENT", ""
     )
     timeout = (
         THRESHOLD * 6
         if is_slow
         else THRESHOLD * 3
-        if num_retries > 0
+        if should_retry
         and isinstance(test_module, ShardedTest)
         and test_module.time is not None
         else None
@@ -623,20 +622,25 @@ def run_test(
         if IS_CI:
             output = stack.enter_context(open(log_path, "w"))
 
-        if options.continue_through_error and "--subprocess" not in command:
-            # I think subprocess is better handled by common_utils? but it's not working atm
-            ret_code, was_rerun = run_test_continue_through_error(
-                command, test_directory, env, timeout, stepcurrent_key, output
+        if should_retry:
+            ret_code, was_rerun = run_test_retries(
+                command,
+                test_directory,
+                env,
+                timeout,
+                stepcurrent_key,
+                output,
+                options.continue_through_error,
             )
         else:
+            command.append(f"--sc={stepcurrent_key}")
             ret_code, was_rerun = retry_shell(
                 command,
                 test_directory,
                 stdout=output,
                 stderr=output,
                 env=env,
                 timeout=timeout,
-                retries=num_retries,
             )
 
             # Pytest return code 5 means no test is collected. This is needed
@@ -655,23 +659,37 @@ def run_test(
     return ret_code
 
 
-def run_test_continue_through_error(
-    command, test_directory, env, timeout, stepcurrent_key, output
+def run_test_retries(
+    command,
+    test_directory,
+    env,
+    timeout,
+    stepcurrent_key,
+    output,
+    continue_through_error,
 ):
     # Run the test with -x to stop at first failure. Try again, skipping the
     # previously run tests, repeating this until there is a test that fails 3
-    # times (same number of rVetries we typically give).  Then we skip that
-    # test, and keep going. Basically if the same test fails 3 times in a row,
-    # skip the test on the next run, but still fail in the end. I take advantage
-    # of the value saved in stepcurrent to keep track of the most recently run
-    # test (which is the one that failed if there was a failure).
+    # times (same number of rVetries we typically give).
+    #
+    # If continue through error is not set, then we fail fast.
+    #
+    # If continue through error is set, then we skip that test, and keep going.
+    # Basically if the same test fails 3 times in a row, skip the test on the
+    # next run, but still fail in the end. I take advantage of the value saved
+    # in stepcurrent to keep track of the most recently run test (which is the
+    # one that failed if there was a failure).
+
+    def print_to_file(s):
+        print(s, file=output, flush=True)
 
     num_failures = defaultdict(int)
 
+    print_items = ["--print-items"]
     sc_command = f"--sc={stepcurrent_key}"
     while True:
         ret_code = shell(
-            command + [sc_command],
+            command + [sc_command] + print_items,
             test_directory,
             stdout=output,
             stderr=output,
@@ -682,11 +700,7 @@ def run_test_continue_through_error(
         if ret_code == 0:
             break  # Got to the end of the test suite successfully
         signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else ""
-        print(
-            f"Got exit code {ret_code}{signal_name}, retrying...",
-            file=output,
-            flush=True,
-        )
+        print_to_file(f"Got exit code {ret_code}{signal_name}")
 
         # Read what just failed
         with open(
@@ -696,25 +710,24 @@ def run_test_continue_through_error(
 
         num_failures[current_failure] += 1
         if num_failures[current_failure] >= 3:
+            if not continue_through_error:
+                print_to_file("Stopping at first consistent failure")
+                break
             sc_command = f"--scs={stepcurrent_key}"
         else:
             sc_command = f"--sc={stepcurrent_key}"
+        print_to_file("Retrying...")
+        print_items = []  # do not continue printing them, massive waste of space
 
     consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3]
     flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3]
     if len(flaky_failures) > 0:
-        print(
-            "The following tests failed flakily (had to be rerun in a new process,"
-            + f" doesn't include reruns froms same process): {flaky_failures}",
-            file=output,
-            flush=True,
+        print_to_file(
+            "The following tests failed and then succeeded when run in a new process"
+            + f"{flaky_failures}",
         )
     if len(consistent_failures) > 0:
-        print(
-            f"The following tests failed consistently: {consistent_failures}",
-            file=output,
-            flush=True,
-        )
+        print_to_file(f"The following tests failed consistently: {consistent_failures}")
         return 1, True
     return 0, any(x > 0 for x in num_failures.values())
 
@@ -1040,9 +1053,7 @@ def handle_log_file(
     os.remove(file_path)
 
 
-def get_pytest_args(
-    options, stepcurrent_key, is_cpp_test=False, is_distributed_test=False
-):
+def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
     if RERUN_DISABLED_TESTS:
         # Distributed tests are too slow, so running them x50 will cause the jobs to timeout after
         # 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the
@@ -1063,9 +1074,8 @@ def get_pytest_args(
     ]
     if not is_cpp_test:
         # C++ tests need to be run with pytest directly, not via python
+        # We have a custom pytest shard that conflicts with the normal plugin
         pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
-        if not options.continue_through_error and IS_CI:
-            pytest_args.append(f"--sc={stepcurrent_key}")
     else:
         # Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test
         # is much slower than running them directly
@@ -1638,7 +1648,6 @@ def parallel_test_completion_callback(failure):
 def check_pip_packages() -> None:
     packages = [
         "pytest-rerunfailures",
-        "pytest-shard",
         "pytest-flakefinder",
         "pytest-xdist",
     ]