From dc436bd562eab1fd013ea76cbc05073d0135c957 Mon Sep 17 00:00:00 2001 From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> Date: Fri, 14 Oct 2022 17:47:46 -0500 Subject: [PATCH] Add `cache_binary_discovery` field to `remote_environment` target (#17227) Closes https://github.com/pantsbuild/pants/issues/17180. [ci skip-rust] [ci skip-build-wheels] --- .../pants/core/util_rules/environments.py | 49 +++++++++++++++++-- .../core/util_rules/environments_test.py | 29 +++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/python/pants/core/util_rules/environments.py b/src/python/pants/core/util_rules/environments.py index 40d02331d4c..9561daa6697 100644 --- a/src/python/pants/core/util_rules/environments.py +++ b/src/python/pants/core/util_rules/environments.py @@ -24,6 +24,7 @@ from pants.engine.rules import Get, MultiGet, QueryRule, collect_rules, rule, rule_helper from pants.engine.target import ( COMMON_TARGET_FIELDS, + BoolField, Field, FieldDefaultFactoryRequest, FieldDefaultFactoryResult, @@ -313,6 +314,28 @@ class RemoteFallbackEnvironmentField(FallbackEnvironmentField): ) +class RemoteEnvironmentCacheBinaryDiscovery(BoolField): + alias = "cache_binary_discovery" + default = False + help = softwrap( + f""" + If true, will cache system binary discovery, e.g. finding Python interpreters. + + When safe to do, it is preferable to set this option to `True` for faster performance by + avoiding wasted work. Otherwise, Pants will search for system binaries whenever the + Pants daemon is restarted. + + However, it is only safe to set this to `True` if the remote execution environment has a + stable environment, e.g. the server will not change versions of installed system binaries. + Otherwise, you risk caching results that become stale when the server changes its + environment, which may break your builds. With some remote execution servers, you can + specify a Docker image to run with via the field + `{RemoteExtraPlatformPropertiesField.alias}`; if you are able to specify what Docker image + to use, and also use a pinned tag of the image, it is likely safe to set this field to true. + """ + ) + + class RemoteEnvironmentTarget(Target): alias = "remote_environment" core_fields = ( @@ -320,6 +343,7 @@ class RemoteEnvironmentTarget(Target): RemotePlatformField, RemoteExtraPlatformPropertiesField, RemoteFallbackEnvironmentField, + RemoteEnvironmentCacheBinaryDiscovery, ) help = softwrap( """ @@ -381,7 +405,7 @@ class EnvironmentTarget: def executable_search_path_cache_scope( self, *, cache_failures: bool = False ) -> ProcessCacheScope: - """Whether it's safe to cache exectuable search path discovery or not. + """Whether it's safe to cache executable search path discovery or not. If the environment may change on us, e.g. the user upgrades a binary, then it's not safe to cache the discovery to disk. Technically, in that case, we should recheck the binary every @@ -390,11 +414,28 @@ def executable_search_path_cache_scope( Meanwhile, when running with Docker, we already invalidate whenever the image changes thanks to https://github.com/pantsbuild/pants/pull/17101. + + Remote execution often is safe to cache, but depends on the remote execution server. + So, we rely on the user telling us what is safe. """ - docker = self.val and self.val.has_field(DockerImageField) + caching_allowed = self.val and ( + self.val.has_field(DockerImageField) + or ( + self.val.has_field(RemoteEnvironmentCacheBinaryDiscovery) + and self.val[RemoteEnvironmentCacheBinaryDiscovery].value + ) + ) if cache_failures: - return ProcessCacheScope.ALWAYS if docker else ProcessCacheScope.PER_RESTART_ALWAYS - return ProcessCacheScope.SUCCESSFUL if docker else ProcessCacheScope.PER_RESTART_SUCCESSFUL + return ( + ProcessCacheScope.ALWAYS + if caching_allowed + else ProcessCacheScope.PER_RESTART_ALWAYS + ) + return ( + ProcessCacheScope.SUCCESSFUL + if caching_allowed + else ProcessCacheScope.PER_RESTART_SUCCESSFUL + ) @dataclass(frozen=True) diff --git a/src/python/pants/core/util_rules/environments_test.py b/src/python/pants/core/util_rules/environments_test.py index c2617a702ea..5643e71deba 100644 --- a/src/python/pants/core/util_rules/environments_test.py +++ b/src/python/pants/core/util_rules/environments_test.py @@ -27,6 +27,7 @@ FallbackEnvironmentField, LocalEnvironmentTarget, NoFallbackEnvironmentError, + RemoteEnvironmentCacheBinaryDiscovery, RemoteEnvironmentTarget, RemoteExtraPlatformPropertiesField, UnrecognizedEnvironmentError, @@ -35,6 +36,7 @@ ) from pants.engine.internals.docker import DockerResolveImageRequest, DockerResolveImageResult from pants.engine.platform import Platform +from pants.engine.process import ProcessCacheScope from pants.engine.target import FieldSet, OptionalSingleSourceField, Target from pants.option.global_options import GlobalOptions from pants.testutil.option_util import create_subsystem @@ -444,3 +446,30 @@ class EnvFS(FieldSet): assert EnvironmentNameRequest.from_field_set(EnvFS.create(tgt)) == EnvironmentNameRequest( "my_env", description_of_origin="the `the_env_field` field from the target dir:dir" ) + + +def test_executable_search_path_cache_scope() -> None: + def assert_cache( + tgt: Target | None, *, cache_failures: bool, expected: ProcessCacheScope + ) -> None: + assert ( + EnvironmentTarget(tgt).executable_search_path_cache_scope(cache_failures=cache_failures) + == expected + ) + + addr = Address("envs") + + for tgt in ( + None, + LocalEnvironmentTarget({}, addr), + RemoteEnvironmentTarget({RemoteEnvironmentCacheBinaryDiscovery.alias: False}, addr), + ): + assert_cache(tgt, cache_failures=False, expected=ProcessCacheScope.PER_RESTART_SUCCESSFUL) + assert_cache(tgt, cache_failures=True, expected=ProcessCacheScope.PER_RESTART_ALWAYS) + + for tgt in ( # type: ignore[assignment] + DockerEnvironmentTarget({DockerImageField.alias: "img"}, addr), + RemoteEnvironmentTarget({RemoteEnvironmentCacheBinaryDiscovery.alias: True}, addr), + ): + assert_cache(tgt, cache_failures=False, expected=ProcessCacheScope.SUCCESSFUL) + assert_cache(tgt, cache_failures=True, expected=ProcessCacheScope.ALWAYS)