From bb7d83381076d40c7f495ea8fdef0d2d76697eae Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Sat, 4 Mar 2023 10:53:13 +1100 Subject: [PATCH] Add `sources_fingerprint` to `peek` on source-creating targets (#18383) This has the `peek` output include the fingerprint of the sources referenced in a target. This is a step towards #8445, by putting more information into `peek`. For instance, with this, one way to get a crude "hash" of a target would be something like: ```shell { pants dependencies --transitive --closed path/to:target | xargs pants peek # these might change behaviour and so need to be included cat pants.toml cat 3rdparty/python/default.lock # or whatever other lock files are relevant } | openssl sha256 ``` This is conservative: the hash can be different without the behaviour of the target changing at all. For instance: - irrelevant changes in `pants.toml`: adjusting comments, unrelated subsystem config (e.g. settings in `[golang]` when `path/to:target` is a Python-only `pex_binary`) - upgrading 3rd party dependencies in the resolve that aren't (transitively) used by `path/to:target`. This relates to #12733: if all transitive 3rd party deps appeared in `pants dependencies --transitive`, and `pants peek` included the right info for them (e.g. version and fingerprints), the `cat 3rdparty/...` could be removed because the `peek` pipe would handle it. - target fields that don't impact execution behaviour, e.g. changing the `skip_black` setting on a `python_source` target, without changing the file contents (this might be _most_ fields on the (transitive) dependencies of a packageable target?) This is also only the hash of the input configuration, rather than a hash of a built artefact. If there's processes that aren't deterministic (e.g. `shell_command(command="date > output.txt", output_files=["output.txt"])` somewhere in the chain), the exact output artefact might be different if built twice, even if the hash hasn't changed. This PR is, in some sense, a partial revival of #8450, although is much simpler, because the JSON-outputting `peek` target already exists, and this doesn't try to solve the full problem. --- src/python/pants/backend/project_info/peek.py | 8 ++- .../pants/backend/project_info/peek_test.py | 62 +++++++++++++++---- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/python/pants/backend/project_info/peek.py b/src/python/pants/backend/project_info/peek.py index a1f2f040b7b..d5c819b1aea 100644 --- a/src/python/pants/backend/project_info/peek.py +++ b/src/python/pants/backend/project_info/peek.py @@ -13,6 +13,7 @@ from pants.engine.addresses import Addresses from pants.engine.collection import Collection from pants.engine.console import Console +from pants.engine.fs import Snapshot from pants.engine.goal import Goal, GoalSubsystem, Outputting from pants.engine.internals.build_files import _get_target_family_and_adaptor_for_dep_rules from pants.engine.internals.dep_rules import DependencyRuleApplication, DependencyRuleSet @@ -79,7 +80,7 @@ def _normalize_value(val: Any) -> Any: class TargetData: target: Target # Sources may not be registered on the target, so we'll have nothing to expand. - expanded_sources: tuple[str, ...] | None + expanded_sources: Snapshot | None expanded_dependencies: tuple[str, ...] dependencies_rules: tuple[str, ...] | None = None @@ -98,7 +99,8 @@ def to_dict(self, exclude_defaults: bool = False, include_dep_rules: bool = Fals fields["dependencies"] = self.expanded_dependencies if self.expanded_sources is not None: - fields["sources"] = self.expanded_sources + fields["sources"] = self.expanded_sources.files + fields["sources_fingerprint"] = self.expanded_sources.digest.fingerprint if include_dep_rules: fields["_dependencies_rules"] = self.dependencies_rules @@ -189,7 +191,7 @@ async def get_target_data( for tgt, deps in zip(sorted_targets, dependencies_per_target) ] expanded_sources_map = { - tgt.address: hs.snapshot.files + tgt.address: hs.snapshot for tgt, hs in zip(targets_with_sources, hydrated_sources_per_target) } diff --git a/src/python/pants/backend/project_info/peek_test.py b/src/python/pants/backend/project_info/peek_test.py index f9b483e437f..60443a5b346 100644 --- a/src/python/pants/backend/project_info/peek_test.py +++ b/src/python/pants/backend/project_info/peek_test.py @@ -1,7 +1,11 @@ # Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +from __future__ import annotations + +import dataclasses from textwrap import dedent +from typing import Sequence import pytest @@ -11,11 +15,16 @@ from pants.base.specs import RawSpecs, RecursiveGlobSpec from pants.core.target_types import ArchiveTarget, FilesGeneratorTarget, FileTarget, GenericTarget from pants.engine.addresses import Address +from pants.engine.fs import Digest, Snapshot from pants.engine.internals.dep_rules import DependencyRuleAction, DependencyRuleApplication from pants.engine.rules import QueryRule from pants.testutil.rule_runner import RuleRunner +def _snapshot(fingerprint: str, files: tuple[str, ...]) -> Snapshot: + return Snapshot._unsafe_create(Digest(fingerprint.ljust(64, "0"), 1), files, ()) + + @pytest.mark.parametrize( "expanded_target_infos, exclude_defaults, include_dep_rules, expected_output", [ @@ -38,7 +47,10 @@ }, Address("example", target_name="files_target"), ), - ("foo.txt", "bar.txt"), + _snapshot( + "2", + ("foo.txt", "bar.txt"), + ), tuple(), ) ], @@ -59,9 +71,10 @@ } }, "sources": [ - "foo.txt", - "bar.txt" + "bar.txt", + "foo.txt" ], + "sources_fingerprint": "2000000000000000000000000000000000000000000000000000000000000000", "sources_raw": [ "*.txt" ] @@ -77,7 +90,10 @@ FilesGeneratorTarget( {"sources": ["foo.txt"]}, Address("example", target_name="files_target") ), - ("foo.txt",), + _snapshot( + "1", + ("foo.txt",), + ), tuple(), ) ], @@ -95,6 +111,7 @@ "sources": [ "foo.txt" ], + "sources_fingerprint": "1000000000000000000000000000000000000000000000000000000000000000", "sources_raw": [ "foo.txt" ], @@ -112,7 +129,10 @@ {"sources": ["*.txt"], "tags": ["zippable"]}, Address("example", target_name="files_target"), ), - tuple(), + _snapshot( + "0", + (), + ), tuple(), ), TargetData( @@ -138,6 +158,7 @@ "target_type": "files", "dependencies": [], "sources": [], + "sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000", "sources_raw": [ "*.txt" ], @@ -167,7 +188,7 @@ [ TargetData( FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")), - ("foo/a.txt",), + _snapshot("", ("foo/a.txt",)), ("foo/a.txt:baz",), dependencies_rules=("does", "apply", "*"), dependents_rules=("fall-through", "*"), @@ -218,6 +239,7 @@ "sources": [ "foo/a.txt" ], + "sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000", "sources_raw": [ "*.txt" ] @@ -254,6 +276,19 @@ def test_non_matching_build_target(rule_runner: RuleRunner) -> None: assert result.stdout == "[]\n" +def _normalize_fingerprints(tds: Sequence[TargetData]) -> list[TargetData]: + """We're not here to test the computation of fingerprints.""" + return [ + dataclasses.replace( + td, + expanded_sources=None + if td.expanded_sources is None + else _snapshot("", td.expanded_sources.files), + ) + for td in tds + ] + + def test_get_target_data(rule_runner: RuleRunner) -> None: rule_runner.write_files( { @@ -272,7 +307,8 @@ def test_get_target_data(rule_runner: RuleRunner) -> None: TargetDatas, [RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")], ) - assert list(tds) == [ + + assert _normalize_fingerprints(tds) == [ TargetData( GenericTarget({"dependencies": [":baz"]}, Address("foo", target_name="bar")), None, @@ -280,21 +316,21 @@ def test_get_target_data(rule_runner: RuleRunner) -> None: ), TargetData( FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")), - ("foo/a.txt", "foo/b.txt"), + _snapshot("", ("foo/a.txt", "foo/b.txt")), ("foo/a.txt:baz", "foo/b.txt:baz"), ), TargetData( FileTarget( {"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz") ), - ("foo/a.txt",), + _snapshot("", ("foo/a.txt",)), (), ), TargetData( FileTarget( {"source": "b.txt"}, Address("foo", relative_file_path="b.txt", target_name="baz") ), - ("foo/b.txt",), + _snapshot("", ("foo/b.txt",)), (), ), ] @@ -325,10 +361,10 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None: TargetDatas, [RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")], ) - assert list(tds) == [ + assert _normalize_fingerprints(tds) == [ TargetData( FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")), - ("foo/a.txt",), + _snapshot("", ("foo/a.txt",)), ("foo/a.txt:baz",), dependencies_rules=("does", "apply", "*"), dependents_rules=("fall-through", "*"), @@ -349,7 +385,7 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None: FileTarget( {"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz") ), - ("foo/a.txt",), + _snapshot("", ("foo/a.txt",)), (), dependencies_rules=("does", "apply", "*"), dependents_rules=("fall-through", "*"),