Skip to content

Commit

Permalink
Add sources_fingerprint to peek on source-creating targets (pants…
Browse files Browse the repository at this point in the history
…build#18383)

This has the `peek` output include the fingerprint of the sources
referenced in a target. This is a step towards pantsbuild#8445, by putting more
information into `peek`.

For instance, with this, one way to get a crude "hash" of a target would
be something like:

```shell
{ 
   pants dependencies --transitive --closed path/to:target | xargs pants peek
   # these might change behaviour and so need to be included
   cat pants.toml
   cat 3rdparty/python/default.lock # or whatever other lock files are relevant
} | openssl sha256
```

This is conservative: the hash can be different without the behaviour of
the target changing at all. For instance:

- irrelevant changes in `pants.toml`: adjusting comments, unrelated
subsystem config (e.g. settings in `[golang]` when `path/to:target` is a
Python-only `pex_binary`)
- upgrading 3rd party dependencies in the resolve that aren't
(transitively) used by `path/to:target`. This relates to pantsbuild#12733: if all
transitive 3rd party deps appeared in `pants dependencies --transitive`,
and `pants peek` included the right info for them (e.g. version and
fingerprints), the `cat 3rdparty/...` could be removed because the
`peek` pipe would handle it.
- target fields that don't impact execution behaviour, e.g. changing the
`skip_black` setting on a `python_source` target, without changing the
file contents (this might be _most_ fields on the (transitive)
dependencies of a packageable target?)

This is also only the hash of the input configuration, rather than a
hash of a built artefact. If there's processes that aren't deterministic
(e.g. `shell_command(command="date > output.txt",
output_files=["output.txt"])` somewhere in the chain), the exact output
artefact might be different if built twice, even if the hash hasn't
changed.

This PR is, in some sense, a partial revival of pantsbuild#8450, although is much
simpler, because the JSON-outputting `peek` target already exists, and
this doesn't try to solve the full problem.
  • Loading branch information
huonw authored Mar 3, 2023
1 parent dd2eb85 commit bb7d833
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 16 deletions.
8 changes: 5 additions & 3 deletions src/python/pants/backend/project_info/peek.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pants.engine.addresses import Addresses
from pants.engine.collection import Collection
from pants.engine.console import Console
from pants.engine.fs import Snapshot
from pants.engine.goal import Goal, GoalSubsystem, Outputting
from pants.engine.internals.build_files import _get_target_family_and_adaptor_for_dep_rules
from pants.engine.internals.dep_rules import DependencyRuleApplication, DependencyRuleSet
Expand Down Expand Up @@ -79,7 +80,7 @@ def _normalize_value(val: Any) -> Any:
class TargetData:
target: Target
# Sources may not be registered on the target, so we'll have nothing to expand.
expanded_sources: tuple[str, ...] | None
expanded_sources: Snapshot | None
expanded_dependencies: tuple[str, ...]

dependencies_rules: tuple[str, ...] | None = None
Expand All @@ -98,7 +99,8 @@ def to_dict(self, exclude_defaults: bool = False, include_dep_rules: bool = Fals

fields["dependencies"] = self.expanded_dependencies
if self.expanded_sources is not None:
fields["sources"] = self.expanded_sources
fields["sources"] = self.expanded_sources.files
fields["sources_fingerprint"] = self.expanded_sources.digest.fingerprint

if include_dep_rules:
fields["_dependencies_rules"] = self.dependencies_rules
Expand Down Expand Up @@ -189,7 +191,7 @@ async def get_target_data(
for tgt, deps in zip(sorted_targets, dependencies_per_target)
]
expanded_sources_map = {
tgt.address: hs.snapshot.files
tgt.address: hs.snapshot
for tgt, hs in zip(targets_with_sources, hydrated_sources_per_target)
}

Expand Down
62 changes: 49 additions & 13 deletions src/python/pants/backend/project_info/peek_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import dataclasses
from textwrap import dedent
from typing import Sequence

import pytest

Expand All @@ -11,11 +15,16 @@
from pants.base.specs import RawSpecs, RecursiveGlobSpec
from pants.core.target_types import ArchiveTarget, FilesGeneratorTarget, FileTarget, GenericTarget
from pants.engine.addresses import Address
from pants.engine.fs import Digest, Snapshot
from pants.engine.internals.dep_rules import DependencyRuleAction, DependencyRuleApplication
from pants.engine.rules import QueryRule
from pants.testutil.rule_runner import RuleRunner


def _snapshot(fingerprint: str, files: tuple[str, ...]) -> Snapshot:
return Snapshot._unsafe_create(Digest(fingerprint.ljust(64, "0"), 1), files, ())


@pytest.mark.parametrize(
"expanded_target_infos, exclude_defaults, include_dep_rules, expected_output",
[
Expand All @@ -38,7 +47,10 @@
},
Address("example", target_name="files_target"),
),
("foo.txt", "bar.txt"),
_snapshot(
"2",
("foo.txt", "bar.txt"),
),
tuple(),
)
],
Expand All @@ -59,9 +71,10 @@
}
},
"sources": [
"foo.txt",
"bar.txt"
"bar.txt",
"foo.txt"
],
"sources_fingerprint": "2000000000000000000000000000000000000000000000000000000000000000",
"sources_raw": [
"*.txt"
]
Expand All @@ -77,7 +90,10 @@
FilesGeneratorTarget(
{"sources": ["foo.txt"]}, Address("example", target_name="files_target")
),
("foo.txt",),
_snapshot(
"1",
("foo.txt",),
),
tuple(),
)
],
Expand All @@ -95,6 +111,7 @@
"sources": [
"foo.txt"
],
"sources_fingerprint": "1000000000000000000000000000000000000000000000000000000000000000",
"sources_raw": [
"foo.txt"
],
Expand All @@ -112,7 +129,10 @@
{"sources": ["*.txt"], "tags": ["zippable"]},
Address("example", target_name="files_target"),
),
tuple(),
_snapshot(
"0",
(),
),
tuple(),
),
TargetData(
Expand All @@ -138,6 +158,7 @@
"target_type": "files",
"dependencies": [],
"sources": [],
"sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000",
"sources_raw": [
"*.txt"
],
Expand Down Expand Up @@ -167,7 +188,7 @@
[
TargetData(
FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
("foo/a.txt",),
_snapshot("", ("foo/a.txt",)),
("foo/a.txt:baz",),
dependencies_rules=("does", "apply", "*"),
dependents_rules=("fall-through", "*"),
Expand Down Expand Up @@ -218,6 +239,7 @@
"sources": [
"foo/a.txt"
],
"sources_fingerprint": "0000000000000000000000000000000000000000000000000000000000000000",
"sources_raw": [
"*.txt"
]
Expand Down Expand Up @@ -254,6 +276,19 @@ def test_non_matching_build_target(rule_runner: RuleRunner) -> None:
assert result.stdout == "[]\n"


def _normalize_fingerprints(tds: Sequence[TargetData]) -> list[TargetData]:
"""We're not here to test the computation of fingerprints."""
return [
dataclasses.replace(
td,
expanded_sources=None
if td.expanded_sources is None
else _snapshot("", td.expanded_sources.files),
)
for td in tds
]


def test_get_target_data(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
Expand All @@ -272,29 +307,30 @@ def test_get_target_data(rule_runner: RuleRunner) -> None:
TargetDatas,
[RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")],
)
assert list(tds) == [

assert _normalize_fingerprints(tds) == [
TargetData(
GenericTarget({"dependencies": [":baz"]}, Address("foo", target_name="bar")),
None,
("foo/a.txt:baz", "foo/b.txt:baz"),
),
TargetData(
FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
("foo/a.txt", "foo/b.txt"),
_snapshot("", ("foo/a.txt", "foo/b.txt")),
("foo/a.txt:baz", "foo/b.txt:baz"),
),
TargetData(
FileTarget(
{"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz")
),
("foo/a.txt",),
_snapshot("", ("foo/a.txt",)),
(),
),
TargetData(
FileTarget(
{"source": "b.txt"}, Address("foo", relative_file_path="b.txt", target_name="baz")
),
("foo/b.txt",),
_snapshot("", ("foo/b.txt",)),
(),
),
]
Expand Down Expand Up @@ -325,10 +361,10 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None:
TargetDatas,
[RawSpecs(recursive_globs=(RecursiveGlobSpec("foo"),), description_of_origin="tests")],
)
assert list(tds) == [
assert _normalize_fingerprints(tds) == [
TargetData(
FilesGeneratorTarget({"sources": ["*.txt"]}, Address("foo", target_name="baz")),
("foo/a.txt",),
_snapshot("", ("foo/a.txt",)),
("foo/a.txt:baz",),
dependencies_rules=("does", "apply", "*"),
dependents_rules=("fall-through", "*"),
Expand All @@ -349,7 +385,7 @@ def test_get_target_data_with_dep_rules(rule_runner: RuleRunner) -> None:
FileTarget(
{"source": "a.txt"}, Address("foo", relative_file_path="a.txt", target_name="baz")
),
("foo/a.txt",),
_snapshot("", ("foo/a.txt",)),
(),
dependencies_rules=("does", "apply", "*"),
dependents_rules=("fall-through", "*"),
Expand Down

0 comments on commit bb7d833

Please sign in to comment.