First step to refactoring S3 reading logic (pytorch#53755)

Summary: This is an initial attempt in refactoring and consolidating our S3 read logic for print_test_stats.py, test_history.py, and run_test.py. This way, boto3 and botocore do not need to be imported in various places throughout the code base, and duplicated logic (such as the many type definitions) can exist in one place: `tools/stat_utils/s3_stat_parser.py`. walterddr contributed to this PR by moving print_test_stats.py to the tools folder and the corresponding tests a subfolder within tools. **NOTE: this removes those tests from CI as the new `tools/test/test_stats.py` is not in the test/ directory as the other tests in TESTS in run_test.py.** Pull Request resolved: pytorch#53755 Test Plan: This refactoring change should not break anything, so running the files as before should work as they did previously. To make sure that print_test_stats.py still functions: run `python tools/test/test_stats.py` and make sure all tests pass. To make sure that test_history.py works, run the example commands from `tools/test_history.py --help` and check that their output matches that shown. Note that the script will continue printing for a while, so don't be alarmed. Some next steps: - Actually coming up with similarities among the three current use cases and further refactoring/consolidating of functions (e.g., combining simplify and get_cases) - Moving more parsing logic to s3_stat_parser.py to have better abstraction between our files - Adding tests for s3_stat_parser.py when there is more functionality in it Reviewed By: agolynski, samestep Differential Revision: D27030285 Pulled By: janeyx99 fbshipit-source-id: e664781324ef7c0c30943bfd7f17c895075ef7a7
Xxx14124 · Mar 17, 2021 · 2e7311e · 2e7311e
1 parent ccdcfba
commit 2e7311e
Show file tree

Hide file tree

Showing 11 changed files with 808 additions and 886 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -636,7 +636,8 @@ jobs:
           export CIRCLE_JOB="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
-          python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          export PYTHONPATH="\${PWD}"
+          python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           EOL
           echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
@@ -800,8 +801,9 @@ jobs:
             export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
             export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            export PYTHONPATH="$PWD"
             pip install typing_extensions boto3
-            python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports

diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -198,7 +198,8 @@ jobs:
           export CIRCLE_JOB="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
-          python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          export PYTHONPATH="\${PWD}"
+          python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           EOL
           echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
@@ -362,8 +363,9 @@ jobs:
             export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
             export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            export PYTHONPATH="$PWD"
             pip install typing_extensions boto3
-            python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+            python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
           when: always
       - store_test_results:
           path: test/test-reports
diff --git a/mypy-strict.ini b/mypy-strict.ini
@@ -34,13 +34,15 @@ warn_return_any = True
 implicit_reexport = False
 strict_equality = True
 
-files = tools/codegen/gen.py,
+files =
     tools/autograd/*.py,
+    tools/codegen/gen.py,
+    tools/print_test_stats.py,
     tools/pyi/*.py,
+    tools/stats_utils/*.py,
     tools/test_history.py,
     torch/testing/_internal/framework_utils.py,
     torch/testing/_internal/mypy_wrapper.py,
-    torch/testing/_internal/print_test_stats.py,
     torch/utils/benchmark/utils/common.py,
     torch/utils/benchmark/utils/timer.py,
     torch/utils/benchmark/utils/valgrind_wrapper/*.py,

diff --git a/mypy.ini b/mypy.ini
@@ -34,8 +34,9 @@ files =
     test/test_type_hints.py,
     test/test_type_info.py,
     test/test_utils.py,
+    tools/clang_format_utils.py,
     tools/generate_torch_version.py,
-    tools/clang_format_utils.py
+    tools/stats_utils/*.py
 
 
 # Minimum version supported - variable annotations were introduced

diff --git a/test/run_test.py b/test/run_test.py
@@ -22,11 +22,10 @@
 from typing_extensions import TypedDict
 
 try:
-    import boto3  # type: ignore[import]
-    import botocore  # type: ignore[import]
-    import botocore.exceptions  # type: ignore[import]
-    HAVE_BOTO3 = True
+    sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
+    from tools.stats_utils.s3_stat_parser import (get_S3_bucket_readonly, HAVE_BOTO3)
 except ImportError:
+    print("Unable to import s3_stat_parser from tools. Running without S3 stats...")
     HAVE_BOTO3 = False
 
 
@@ -378,25 +377,19 @@ def get_test_time_reports_from_S3() -> List[Dict[str, Any]]:
     job = os.environ.get("CIRCLE_JOB", "")
     job_minus_shard_number = job.rstrip('0123456789')
 
-    try:
-        s3 = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
-        bucket = s3.Bucket(name="ossci-metrics")
-
-        reports = []
-        commit_index = 0
-        while len(reports) == 0 and commit_index < len(nightly_commits):
-            nightly_commit = nightly_commits[commit_index]
-            print(f'Grabbing reports from nightly commit: {nightly_commit}')
-            summaries = bucket.objects.filter(Prefix=f"test_time/{nightly_commit}/{job_minus_shard_number}")
-            for summary in summaries:
-                binary = summary.get()["Body"].read()
-                string = bz2.decompress(binary).decode("utf-8")
-                reports.append(json.loads(string))
-            commit_index += 1
-        return reports
-    except botocore.exceptions.ClientError as err:
-        print('Error Message: {}'.format(err.response['Error']['Message']))
-        return []
+    bucket = get_S3_bucket_readonly('ossci-metrics')
+    reports = []
+    commit_index = 0
+    while len(reports) == 0 and commit_index < len(nightly_commits):
+        nightly_commit = nightly_commits[commit_index]
+        print(f'Grabbing reports from nightly commit: {nightly_commit}')
+        summaries = bucket.objects.filter(Prefix=f"test_time/{nightly_commit}/{job_minus_shard_number}")
+        for summary in summaries:
+            binary = summary.get()["Body"].read()
+            string = bz2.decompress(binary).decode("utf-8")
+            reports.append(json.loads(string))
+        commit_index += 1
+    return reports
 
 
 def calculate_job_times(reports: List[Dict[str, Any]]) -> Dict[str, Tuple[float, int]]:
@@ -431,7 +424,8 @@ def pull_job_times_from_S3() -> Dict[str, Tuple[float, int]]:
     if HAVE_BOTO3:
         s3_reports = get_test_time_reports_from_S3()
     else:
-        print('Please install boto3 to enable using S3 test times for automatic sharding and test categorization.')
+        print('Uh oh, boto3 is not found. Either it is not installed or we failed to import s3_stat_parser.')
+        print('If not installed, please install boto3 for automatic sharding and test categorization.')
         s3_reports = []
 
     if len(s3_reports) == 0: