[TD] Upload names of failures to s3 for pytest cache (pytorch#126315)

Some tests don't get run through pytest and pytest crashes when a test segfaults, so in both caess, the pytest cache won't have an entry (similar to pytorch/test-infra#5205). Instead, manually upload/download an extra file that lists the failing test files Technically this would be more general than the pytest cache Pull Request resolved: pytorch#126315 Approved by: https://github.com/ZainRizvi
weiyusheng · May 20, 2024 · 6550386 · 6550386
1 parent 8c38d0c
commit 6550386
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 1 deletion.
diff --git a/.github/scripts/pytest_caching_utils.py b/.github/scripts/pytest_caching_utils.py
@@ -18,6 +18,7 @@
 PYTEST_CACHE_DIR_NAME = ".pytest_cache"
 BUCKET = "gha-artifacts"
 LASTFAILED_FILE_PATH = Path("v/cache/lastfailed")
+TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL = "previous_failures_additional.json"
 
 # Temp folders
 ZIP_UPLOAD = "zip-upload"
@@ -191,6 +192,10 @@ def _merge_pytest_caches(
         pytest_cache_dir_to_merge_from, pytest_cache_dir_to_merge_into
     )
 
+    _merge_additional_failures_files(
+        pytest_cache_dir_to_merge_from, pytest_cache_dir_to_merge_into
+    )
+
 
 def _merge_lastfailed_files(source_pytest_cache: Path, dest_pytest_cache: Path) -> None:
     # Simple cases where one of the files doesn't exist
@@ -232,3 +237,27 @@ def _merged_lastfailed_content(
             del to_lastfailed[""]
 
     return to_lastfailed
+
+
+def _merge_additional_failures_files(
+    source_pytest_cache: Path, dest_pytest_cache: Path
+) -> None:
+    # Simple cases where one of the files doesn't exist
+    source_lastfailed_file = (
+        source_pytest_cache / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL
+    )
+    dest_lastfailed_file = dest_pytest_cache / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL
+
+    if not source_lastfailed_file.exists():
+        return
+    if not dest_lastfailed_file.exists():
+        copy_file(source_lastfailed_file, dest_lastfailed_file)
+        return
+
+    # Both files exist, so we need to merge them
+    from_lastfailed = load_json_file(source_lastfailed_file)
+    to_lastfailed = load_json_file(dest_lastfailed_file)
+    merged_content = list(set(from_lastfailed + to_lastfailed))
+
+    # Save the results
+    write_json_file(dest_lastfailed_file, merged_content)
diff --git a/test/run_test.py b/test/run_test.py
@@ -59,6 +59,9 @@
 )
 from tools.testing.do_target_determination_for_s3 import import_results
 from tools.testing.target_determination.gen_artifact import gen_ci_artifact
+from tools.testing.target_determination.heuristics.previously_failed_in_pr import (
+    gen_additional_test_failures_file,
+)
 from tools.testing.target_determination.heuristics.utils import get_pr_number
 
 from tools.testing.test_run import TestRun
@@ -1795,6 +1798,9 @@ def __str__(self):
                         **test_stats,
                     },
                 )
+            gen_additional_test_failures_file(
+                [test.test_file for test, _ in all_failures]
+            )
 
     if len(all_failures):
         for _, err in all_failures:

diff --git a/tools/stats/import_test_stats.py b/tools/stats/import_test_stats.py
@@ -28,6 +28,7 @@ def get_disabled_issues() -> List[str]:
 TD_HEURISTIC_PROFILING_FILE = "td_heuristic_profiling.json"
 TD_HEURISTIC_HISTORICAL_EDITED_FILES = "td_heuristic_historical_edited_files.json"
 TD_HEURISTIC_PREVIOUSLY_FAILED = "previous_failures.json"
+TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL = "previous_failures_additional.json"
 
 FILE_CACHE_LIFESPAN_SECONDS = datetime.timedelta(hours=3).seconds
 
@@ -165,6 +166,20 @@ def copy_pytest_cache() -> None:
     )
 
 
+def copy_additional_previous_failures() -> None:
+    original_path = (
+        REPO_ROOT / ".pytest_cache" / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL
+    )
+    if not original_path.exists():
+        return
+    shutil.copyfile(
+        original_path,
+        REPO_ROOT
+        / ADDITIONAL_CI_FILES_FOLDER
+        / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL,
+    )
+
+
 def get_from_test_infra_generated_stats(
     from_file: str, to_file: str, failure_explanation: str
 ) -> Dict[str, Any]:

diff --git a/tools/testing/do_target_determination_for_s3.py b/tools/testing/do_target_determination_for_s3.py
@@ -8,6 +8,7 @@
 sys.path.insert(0, str(REPO_ROOT))
 
 from tools.stats.import_test_stats import (
+    copy_additional_previous_failures,
     copy_pytest_cache,
     get_td_heuristic_historial_edited_files_json,
     get_td_heuristic_profiling_json,
@@ -51,6 +52,7 @@ def main() -> None:
     get_td_heuristic_historial_edited_files_json()
     get_td_heuristic_profiling_json()
     copy_pytest_cache()
+    copy_additional_previous_failures()
 
     aggregated_heuristics = get_test_prioritizations(selected_tests)
 

diff --git a/tools/testing/target_determination/heuristics/previously_failed_in_pr.py b/tools/testing/target_determination/heuristics/previously_failed_in_pr.py
@@ -6,6 +6,7 @@
 from tools.stats.import_test_stats import (
     ADDITIONAL_CI_FILES_FOLDER,
     TD_HEURISTIC_PREVIOUSLY_FAILED,
+    TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL,
 )
 
 from tools.testing.target_determination.heuristics.interface import (
@@ -25,7 +26,7 @@ def __init__(self, **kwargs: Dict[str, Any]):
         super().__init__(**kwargs)
 
     def get_prediction_confidence(self, tests: List[str]) -> TestPrioritizations:
-        critical_tests = get_previous_failures()
+        critical_tests = get_previous_failures() | read_additional_test_failures_file()
         return TestPrioritizations(
             tests, {TestRun(test): 1 for test in critical_tests if test in tests}
         )
@@ -54,3 +55,29 @@ def _parse_prev_failing_test_files(last_failed_tests: Dict[str, bool]) -> Set[st
             prioritized_tests.add(test_file)
 
     return prioritized_tests
+
+
+def gen_additional_test_failures_file(tests: List[str]) -> None:
+    # Segfaults usually result in no xml and some tests don't run through pytest
+    # (ex doctests).  In these cases, there will be no entry in the pytest
+    # cache, so we should generate a separate file for them and upload it to s3
+    # along with the pytest cache
+    with open(
+        REPO_ROOT / ".pytest_cache" / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL, "w"
+    ) as f:
+        json.dump(tests, f, indent=2)
+
+
+def read_additional_test_failures_file() -> Set[str]:
+    path = (
+        REPO_ROOT
+        / ADDITIONAL_CI_FILES_FOLDER
+        / TD_HEURISTIC_PREVIOUSLY_FAILED_ADDITIONAL
+    )
+    if not os.path.exists(path):
+        print(f"could not find path {path}")
+        return set()
+    with open(path) as f:
+        s = set(json.load(f))
+        print(f"additional failures: {s}")
+        return s