From bb7d83381076d40c7f495ea8fdef0d2d76697eae Mon Sep 17 00:00:00 2001
From: Huon Wilson <huon@exoflare.io>
Date: Sat, 4 Mar 2023 10:53:13 +1100
Subject: [PATCH] Add `sources_fingerprint` to `peek` on source-creating
 targets (#18383)

This has the `peek` output include the fingerprint of the sources
referenced in a target. This is a step towards #8445, by putting more
information into `peek`.

For instance, with this, one way to get a crude "hash" of a target would
be something like:

```shell
{
   pants dependencies --transitive --closed path/to:target | xargs pants peek
   # these might change behaviour and so need to be included
   cat pants.toml
   cat 3rdparty/python/default.lock # or whatever other lock files are relevant
} | openssl sha256
```

This is conservative: the hash can be different without the behaviour of
the target changing at all. For instance:

- irrelevant changes in `pants.toml`: adjusting comments, unrelated
subsystem config (e.g. settings in `[golang]` when `path/to:target` is a
Python-only `pex_binary`)
- upgrading 3rd party dependencies in the resolve that aren't
(transitively) used by `path/to:target`. This relates to #12733: if all
transitive 3rd party deps appeared in `pants dependencies --transitive`,
and `pants peek` included the right info for them (e.g. version and
fingerprints), the `cat 3rdparty/...` could be removed because the
`peek` pipe would handle it.
- target fields that don't impact execution behaviour, e.g. changing the
`skip_black` setting on a `python_source` target, without changing the
file contents (this might be _most_ fields on the (transitive)
dependencies of a packageable target?)

This is also only the hash of the input configuration, rather than a
hash of a built artefact. If there's processes that aren't deterministic
(e.g. `shell_command(command="date > output.txt",
output_files=["output.txt"])` somewhere in the chain), the exact output
artefact might be different if built twice, even if the hash hasn't
changed.

This PR is, in some sense, a partial revival of #8450, although is much
simpler, because the JSON-outputting `peek` target already exists, and
this doesn't try to solve the full problem.
---
 src/python/pants/backend/project_info/peek.py |  8 ++-
 .../pants/backend/project_info/peek_test.py   | 62 +++++++++++++++----
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/src/python/pants/backend/project_info/peek.py b/src/python/pants/backend/project_info/peek.py
index a1f2f040b7b..d5c819b1aea 100644
--- a/src/python/pants/backend/project_info/peek.py
+++ b/src/python/pants/backend/project_info/peek.py
@@ -13,6 +13,7 @@
 from pants.engine.addresses import Addresses
 from pants.engine.collection import Collection
 from pants.engine.console import Console
+from pants.engine.fs import Snapshot
 from pants.engine.goal import Goal, GoalSubsystem, Outputting
 from pants.engine.internals.build_files import _get_target_family_and_adaptor_for_dep_rules
 from pants.engine.internals.dep_rules import DependencyRuleApplication, DependencyRuleSet
@@ -79,7 +80,7 @@ def _normalize_value(val: Any) -> Any:
 class TargetData:
     target: Target
     # Sources may not be registered on the target, so we'll have nothing to expand.
-    expanded_sources: tuple[str, ...] | None
+    expanded_sources: Snapshot | None
     expanded_dependencies: tuple[str, ...]
 
     dependencies_rules: tuple[str, ...] | None = None
@@ -98,7 +99,8 @@ def to_dict(self, exclude_defaults: bool = False, include_dep_rules: bool = Fals
 
         fields["dependencies"] = self.expanded_dependencies
         if self.expanded_sources is not None:
-            fields["sources"] = self.expanded_sources
+            fields["sources"] = self.expanded_sources.files
+            fields["sources_fingerprint"] = self.expanded_sources.digest.fingerprint
 
         if include_dep_rules:
             fields["_dependencies_rules"] = self.dependencies_rules
@@ -189,7 +191,7 @@ async def get_target_data(
         for tgt, deps in zip(sorted_targets, dependencies_per_target)
     ]
     expanded_sources_map = {
-        tgt.address: hs.snapshot.files
+        tgt.address: hs.snapshot
         for tgt, hs in zip(targets_with_sources, hydrated_sources_per_target)
     }
 
diff --git a/src/python/pants/backend/project_info/peek_test.py b/src/python/pants/backend/project_info/peek_test.py
index f9b483e437f..60443a5b346 100644
--- a/src/python/pants/backend/project_info/peek_test.py
+++ b/src/python/pants/backend/project_info/peek_test.py
@@ -1,7 +1,11 @@
 # Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
+from __future__ import annotations
+
+import dataclasses
 from textwrap import dedent
+from typing import Sequence
 
 import pytest
 
@@ -11,11 +15,16 @@
 from pants.base.specs import RawSpecs, RecursiveGlobSpec
 from pants.core.target_types import ArchiveTarget, FilesGeneratorTarget, FileTarget, GenericTarget
 from pants.engine.addresses import Address
+from pants.engine.fs import Digest, Snapshot
 from pants.engine.internals.dep_rules import DependencyRuleAction, DependencyRuleApplication
 from pants.engine.rules import QueryRule
 from pants.testutil.rule_runner import RuleRunner
 
 
+def _snapshot(fingerprint: str, files: tuple[str, ...]) -> Snapshot:
+    return Snapshot._unsafe_create(Digest(fingerprint.ljust(64, "0"), 1), files, ())
+
+
 @pytest.mark.parametrize(
     "expanded_target_infos, exclude_defaults, include_dep_rules, expected_output",
     [
@@ -38,7 +47,10 @@
                         },
                         Address("example", target_name="files_target"),
                     ),
-                    ("foo.txt", "bar.txt"),
+                    _snapshot(
+                        "2",
+                        ("foo.txt", "bar.txt"),
+                    ),
                     tuple(),
                 )
             ],
@@ -59,9 +71,10 @@
                       }
                     },
                     "sources": [
-                      "foo.txt",
-                      "bar.txt"
+                      "bar.txt",
+                      "foo.txt"
                     ],
+                    "sources_fingerprint": "2000000000000000000000000000000000000000000000000000000000000000",
                     "sources_raw": [
                       "*.txt"
                     ]
@@ -77,7 +90,10 @@
                     FilesGeneratorTarget(
                         {"sources": ["foo.txt"]}, Address("example", target_name="files_target")
                     ),
-                    ("foo.txt",),
+                    _snapshot(
+                        "1",
+                        ("foo.txt",),
+                    ),
                     tuple(),
                 )
             ],
@@ -95,6 +111,7 @@
                     "sources": [
                       "foo.txt"
                     ],
+                    "sources_fingerprint": "1000000000000000000000000000000000000000000000000000000000000000",
                     "sources_raw": [
                       "foo.txt"
                     ],
@@ -112,7 +129,10 @@
                         {"sources": ["*.txt"], "tags": ["zippable"]},
                         Address("example", target_name="files_target"),
                     ),
-                    tuple(),
+                    _snapshot(
+                        "0",
+                        (),
+                    ),
                     tuple(),
                 ),
                 TargetData(
@@ -138,6 +158,7 @@
                     "target_type": "files",
                     "dependencies": [],
                     "sources": [],
+                    "sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000",
                     "sources_raw": [
                       "*.txt"
                     ],
@@ -167,7 +188,7 @@
             [
                 TargetData(
                     FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
-                    ("foo/a.txt",),
+                    _snapshot("", ("foo/a.txt",)),
                     ("foo/a.txt:baz",),
                     dependencies_rules=("does", "apply", "*"),
                     dependents_rules=("fall-through", "*"),
@@ -218,6 +239,7 @@
                     "sources": [
                       "foo/a.txt"
                     ],
+                    "sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000",
                     "sources_raw": [
                       "*.txt"
                     ]
@@ -254,6 +276,19 @@ def test_non_matching_build_target(rule_runner: RuleRunner) -> None:
     assert result.stdout == "[]\n"
 
 
+def _normalize_fingerprints(tds: Sequence[TargetData]) -> list[TargetData]:
+    """We're not here to test the computation of fingerprints."""
+    return [
+        dataclasses.replace(
+            td,
+            expanded_sources=None
+            if td.expanded_sources is None
+            else _snapshot("", td.expanded_sources.files),
+        )
+        for td in tds
+    ]
+
+
 def test_get_target_data(rule_runner: RuleRunner) -> None:
     rule_runner.write_files(
         {
@@ -272,7 +307,8 @@ def test_get_target_data(rule_runner: RuleRunner) -> None:
         TargetDatas,
         [RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")],
     )
-    assert list(tds) == [
+
+    assert _normalize_fingerprints(tds) == [
         TargetData(
             GenericTarget({"dependencies": [":baz"]}, Address("foo", target_name="bar")),
             None,
@@ -280,21 +316,21 @@ def test_get_target_data(rule_runner: RuleRunner) -> None:
         ),
         TargetData(
             FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
-            ("foo/a.txt", "foo/b.txt"),
+            _snapshot("", ("foo/a.txt", "foo/b.txt")),
             ("foo/a.txt:baz", "foo/b.txt:baz"),
         ),
         TargetData(
             FileTarget(
                 {"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz")
             ),
-            ("foo/a.txt",),
+            _snapshot("", ("foo/a.txt",)),
             (),
         ),
         TargetData(
             FileTarget(
                 {"source": "b.txt"}, Address("foo", relative_file_path="b.txt", target_name="baz")
             ),
-            ("foo/b.txt",),
+            _snapshot("", ("foo/b.txt",)),
             (),
         ),
     ]
@@ -325,10 +361,10 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None:
         TargetDatas,
         [RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")],
     )
-    assert list(tds) == [
+    assert _normalize_fingerprints(tds) == [
         TargetData(
             FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
-            ("foo/a.txt",),
+            _snapshot("", ("foo/a.txt",)),
             ("foo/a.txt:baz",),
             dependencies_rules=("does", "apply", "*"),
             dependents_rules=("fall-through", "*"),
@@ -349,7 +385,7 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None:
             FileTarget(
                 {"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz")
             ),
-            ("foo/a.txt",),
+            _snapshot("", ("foo/a.txt",)),
             (),
             dependencies_rules=("does", "apply", "*"),
             dependents_rules=("fall-through", "*"),