Set --no-pre-install-wheels and PEX_MAX_INSTALL_JOBS for faster build…

…s of internal pexes (pantsbuild#20670) This has all internal PEXes be built with settings to improve performance: - with `--no-pre-install-wheels`, to package `.whl` directly rather than unpack and install them. (NB. this requires Pex 2.3.0 to pick up pex-tool/pex#2392) - with `PEX_MAX_INSTALL_JOBS`, to use more concurrency for install, when available This is designed to be a performance improvement for any processing where Pants synthesises a PEX internally, like `pants run path/to/script.py` or `pants test ...`. pex-tool/pex#2292 has benchmarks for the PEX tool itself. For benchmarks, I did some more purposeful ones with tensorflow (PyTorch seems a bit awkward hard to set-up and Tensorflow is still huge), using https://gist.github.com/huonw/0560f5aaa34630b68bfb7e0995e99285 . I did 3 runs each of two goals, with 2.21.0.dev4 and with `PANTS_SOURCE` pointing to this PR, and pulled the numbers out by finding the relevant log lines: - `pants --no-local-cache --no-pantsd --named-caches-dir=$(mktemp -d) test example_test.py`. This involves building 4 separate PEXes partially in parallel, partially sequentially: `requirements.pex`, `local_dists.pex` `pytest.pex`, and then `pytest_runner.pex`. The first and last are the interesting ones for this test. - `pants --no-local-cache --no-pantsd --named-caches-dir=$(mktemp -d) run script.py`. This just builds the requirements into `script.pex`. (NB. these are potentially unrealistic in they're running with all caching turned off or cleared, so are truly a worst case. This means they're downloading tensorflow wheels and all the others, each time, which takes about 30s on my 100Mbit/s connection. Faster connections will thus see a higher ratio of benefit.) | goal | period | before (s) | after (s) | |---------------------|------------------------------|-----------:|----------:| | `run script.py` | building requirements | 74-82 | 49-52 | | `test some_test.py` | building requirements | 67-71 | 30-36 | | | building pytest runner | 8-9 | 17-18 | | | total to start running tests | 76-80 | 53-58 | I also did more adhoc ones on a real-world work repo of mine, which doesn't use any of the big ML libraries, just running some basic goals once. | goal | period | before (s) | after (s) | | |---------------------------------------------------|-----------------------------------------|-----------:|----------:|----| | `pants export` on largest resolve | building requirements | 66 | 35 | | | | total | 82 | 54 | | | "random" `pants test path/to/file.py` (1 attempt) | building requirements and pytest runner | 1 | 49 | 38 | Fixes pantsbuild#15062
SvenW · Apr 15, 2024 · 00f5d69 · 00f5d69
1 parent 970e2cd
commit 00f5d69
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 6 deletions.
diff --git a/src/python/pants/backend/python/goals/publish_test.py b/src/python/pants/backend/python/goals/publish_test.py
@@ -136,7 +136,13 @@ def test_twine_upload(rule_runner, packages) -> None:
                 "my-package-0.1.0.tar.gz",
                 "my_package-0.1.0-py3-none-any.whl",
             ),
-            env=FrozenDict({"TWINE_USERNAME": "whoareyou", "TWINE_PASSWORD": "secret"}),
+            env=FrozenDict(
+                {
+                    "TWINE_USERNAME": "whoareyou",
+                    "TWINE_PASSWORD": "secret",
+                    "PEX_MAX_INSTALL_JOBS": "0",
+                }
+            ),
         ),
     )
     assert_package(
@@ -156,7 +162,7 @@ def test_twine_upload(rule_runner, packages) -> None:
                 "my-package-0.1.0.tar.gz",
                 "my_package-0.1.0-py3-none-any.whl",
             ),
-            env=FrozenDict({"TWINE_USERNAME": "whoami"}),
+            env=FrozenDict({"TWINE_USERNAME": "whoami", "PEX_MAX_INSTALL_JOBS": "0"}),
         ),
     )
 

diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py
@@ -705,6 +705,12 @@ async def build_pex(
     if request.pex_path:
         argv.extend(["--pex-path", ":".join(pex.name for pex in request.pex_path)])
 
+    if request.internal_only:
+        # An internal-only runs on a single machine, and pre-installing wheels is wasted work in
+        # that case (see https://github.com/pex-tool/pex/issues/2292#issuecomment-1854582647 for
+        # analysis).
+        argv.append("--no-pre-install-wheels")
+
     argv.append(f"--sources-directory={source_dir_name}")
     sources_digest_as_subdir = await Get(
         Digest, AddPrefix(request.sources or EMPTY_DIGEST, source_dir_name)
@@ -1157,6 +1163,9 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment
     complete_pex_env = pex_environment.in_sandbox(working_directory=request.working_directory)
     argv = complete_pex_env.create_argv(pex.name, *request.argv)
     env = {
+        # Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
+        # the install on cold boot is handy.
+        "PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
         **complete_pex_env.environment_dict(python=pex.python),
         **request.extra_env,
     }
@@ -1196,7 +1205,7 @@ class VenvPexProcess:
     level: LogLevel
     input_digest: Digest | None
     working_directory: str | None
-    extra_env: FrozenDict[str, str] | None
+    extra_env: FrozenDict[str, str]
     output_files: tuple[str, ...] | None
     output_directories: tuple[str, ...] | None
     timeout_seconds: int | None
@@ -1229,7 +1238,7 @@ def __init__(
         object.__setattr__(self, "level", level)
         object.__setattr__(self, "input_digest", input_digest)
         object.__setattr__(self, "working_directory", working_directory)
-        object.__setattr__(self, "extra_env", FrozenDict(extra_env) if extra_env else None)
+        object.__setattr__(self, "extra_env", FrozenDict(extra_env or {}))
         object.__setattr__(self, "output_files", tuple(output_files) if output_files else None)
         object.__setattr__(
             self, "output_directories", tuple(output_directories) if output_directories else None
@@ -1252,6 +1261,12 @@ async def setup_venv_pex_process(
         else venv_pex.pex.argv0
     )
     argv = (pex_bin, *request.argv)
+    env = {
+        # Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
+        # the install on cold boot is handy.
+        "PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
+        **request.extra_env,
+    }
     input_digest = (
         await Get(Digest, MergeDigests((venv_pex.digest, request.input_digest)))
         if request.input_digest
@@ -1270,7 +1285,7 @@ async def setup_venv_pex_process(
         level=request.level,
         input_digest=input_digest,
         working_directory=request.working_directory,
-        env=request.extra_env,
+        env=env,
         output_files=request.output_files,
         output_directories=request.output_directories,
         append_only_caches=append_only_caches,

diff --git a/src/python/pants/backend/python/util_rules/pex_cli.py b/src/python/pants/backend/python/util_rules/pex_cli.py
@@ -40,7 +40,7 @@ class PexCli(TemplatedExternalTool):
 
     default_version = "v2.3.1"
     default_url_template = "https://github.com/pex-tool/pex/releases/download/{version}/pex"
-    version_constraints = ">=2.1.161,<3.0"
+    version_constraints = ">=2.3.0,<3.0"
 
     @classproperty
     def default_known_versions(cls):