Skip to content

Commit

Permalink
Set --no-pre-install-wheels and PEX_MAX_INSTALL_JOBS for faster build…
Browse files Browse the repository at this point in the history
…s of internal pexes (pantsbuild#20670)

This has all internal PEXes be built with settings to improve
performance:

- with `--no-pre-install-wheels`, to package `.whl` directly rather than
unpack and install them. (NB. this requires Pex 2.3.0 to pick up
pex-tool/pex#2392)
- with `PEX_MAX_INSTALL_JOBS`, to use more concurrency for install, when
available

This is designed to be a performance improvement for any processing
where Pants synthesises a PEX internally, like `pants run
path/to/script.py` or `pants test ...`.
pex-tool/pex#2292 has benchmarks for the PEX
tool itself.

For benchmarks, I did some more purposeful ones with tensorflow (PyTorch
seems a bit awkward hard to set-up and Tensorflow is still huge), using
https://gist.github.com/huonw/0560f5aaa34630b68bfb7e0995e99285 . I did 3
runs each of two goals, with 2.21.0.dev4 and with `PANTS_SOURCE`
pointing to this PR, and pulled the numbers out by finding the relevant
log lines:

- `pants --no-local-cache --no-pantsd --named-caches-dir=$(mktemp -d)
test example_test.py`. This involves building 4 separate PEXes partially
in parallel, partially sequentially: `requirements.pex`,
`local_dists.pex` `pytest.pex`, and then `pytest_runner.pex`. The first
and last are the interesting ones for this test.
- `pants --no-local-cache --no-pantsd --named-caches-dir=$(mktemp -d)
run script.py`. This just builds the requirements into `script.pex`.

(NB. these are potentially unrealistic in they're running with all
caching turned off or cleared, so are truly a worst case. This means
they're downloading tensorflow wheels and all the others, each time,
which takes about 30s on my 100Mbit/s connection. Faster connections
will thus see a higher ratio of benefit.)

| goal                | period                       | before (s) | after (s) |
|---------------------|------------------------------|-----------:|----------:|
| `run script.py`     | building requirements        |      74-82 |     49-52 |
| `test some_test.py` | building requirements        |      67-71 |     30-36 |
|                     | building pytest runner       |        8-9 |     17-18 |
|                     | total to start running tests |      76-80 |     53-58 |
 
I also did more adhoc ones on a real-world work repo of mine, which
doesn't use any of the big ML libraries, just running some basic goals
once.

| goal                                              | period                                  | before (s) | after (s) |    |
|---------------------------------------------------|-----------------------------------------|-----------:|----------:|----|
| `pants export` on largest resolve                 | building requirements                   |         66 |        35 |    |
|                                                   | total                                   |         82 |        54 |    |
| "random" `pants test path/to/file.py` (1 attempt) | building requirements and pytest runner |          1 |        49 | 38 |

Fixes pantsbuild#15062
  • Loading branch information
huonw authored Apr 15, 2024
1 parent 970e2cd commit 00f5d69
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
10 changes: 8 additions & 2 deletions src/python/pants/backend/python/goals/publish_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,13 @@ def test_twine_upload(rule_runner, packages) -> None:
"my-package-0.1.0.tar.gz",
"my_package-0.1.0-py3-none-any.whl",
),
env=FrozenDict({"TWINE_USERNAME": "whoareyou", "TWINE_PASSWORD": "secret"}),
env=FrozenDict(
{
"TWINE_USERNAME": "whoareyou",
"TWINE_PASSWORD": "secret",
"PEX_MAX_INSTALL_JOBS": "0",
}
),
),
)
assert_package(
Expand All @@ -156,7 +162,7 @@ def test_twine_upload(rule_runner, packages) -> None:
"my-package-0.1.0.tar.gz",
"my_package-0.1.0-py3-none-any.whl",
),
env=FrozenDict({"TWINE_USERNAME": "whoami"}),
env=FrozenDict({"TWINE_USERNAME": "whoami", "PEX_MAX_INSTALL_JOBS": "0"}),
),
)

Expand Down
21 changes: 18 additions & 3 deletions src/python/pants/backend/python/util_rules/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,12 @@ async def build_pex(
if request.pex_path:
argv.extend(["--pex-path", ":".join(pex.name for pex in request.pex_path)])

if request.internal_only:
# An internal-only runs on a single machine, and pre-installing wheels is wasted work in
# that case (see https://github.com/pex-tool/pex/issues/2292#issuecomment-1854582647 for
# analysis).
argv.append("--no-pre-install-wheels")

argv.append(f"--sources-directory={source_dir_name}")
sources_digest_as_subdir = await Get(
Digest, AddPrefix(request.sources or EMPTY_DIGEST, source_dir_name)
Expand Down Expand Up @@ -1157,6 +1163,9 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment
complete_pex_env = pex_environment.in_sandbox(working_directory=request.working_directory)
argv = complete_pex_env.create_argv(pex.name, *request.argv)
env = {
# Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
# the install on cold boot is handy.
"PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
**complete_pex_env.environment_dict(python=pex.python),
**request.extra_env,
}
Expand Down Expand Up @@ -1196,7 +1205,7 @@ class VenvPexProcess:
level: LogLevel
input_digest: Digest | None
working_directory: str | None
extra_env: FrozenDict[str, str] | None
extra_env: FrozenDict[str, str]
output_files: tuple[str, ...] | None
output_directories: tuple[str, ...] | None
timeout_seconds: int | None
Expand Down Expand Up @@ -1229,7 +1238,7 @@ def __init__(
object.__setattr__(self, "level", level)
object.__setattr__(self, "input_digest", input_digest)
object.__setattr__(self, "working_directory", working_directory)
object.__setattr__(self, "extra_env", FrozenDict(extra_env) if extra_env else None)
object.__setattr__(self, "extra_env", FrozenDict(extra_env or {}))
object.__setattr__(self, "output_files", tuple(output_files) if output_files else None)
object.__setattr__(
self, "output_directories", tuple(output_directories) if output_directories else None
Expand All @@ -1252,6 +1261,12 @@ async def setup_venv_pex_process(
else venv_pex.pex.argv0
)
argv = (pex_bin, *request.argv)
env = {
# Set this in case this PEX was built with --no-pre-install-wheels, and thus parallelising
# the install on cold boot is handy.
"PEX_MAX_INSTALL_JOBS": str(request.concurrency_available),
**request.extra_env,
}
input_digest = (
await Get(Digest, MergeDigests((venv_pex.digest, request.input_digest)))
if request.input_digest
Expand All @@ -1270,7 +1285,7 @@ async def setup_venv_pex_process(
level=request.level,
input_digest=input_digest,
working_directory=request.working_directory,
env=request.extra_env,
env=env,
output_files=request.output_files,
output_directories=request.output_directories,
append_only_caches=append_only_caches,
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/python/util_rules/pex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class PexCli(TemplatedExternalTool):

default_version = "v2.3.1"
default_url_template = "https://github.com/pex-tool/pex/releases/download/{version}/pex"
version_constraints = ">=2.1.161,<3.0"
version_constraints = ">=2.3.0,<3.0"

@classproperty
def default_known_versions(cls):
Expand Down

0 comments on commit 00f5d69

Please sign in to comment.