Skip to content

Commit

Permalink
Ensure packaged artefacts are fully replaced in dist/ (pantsbuild#18930)
Browse files Browse the repository at this point in the history
This patch has the package goal clear out the artefact(s) it is about to
write to `dist/`, if they already exists.

For instance, if running ``pants package path/to:target`` on a
`pex_binary` (outputting `path.to/target.pex`), pants will now first
remove anything that's already at `dist/path.to/target.pex` before
writing the new output.

This resolves two problems:

- if there's existing contents of a different kind (e.g. a directory in
`dist/` and writing a file), the package call would explode. For
instance, switching a target like `pex_binary(..., format="zipapp")`
(file) to `pex_binary(..., format="packed")` (directory).

- if the package output is directory, stale files already in that
location in `dist/` would remain. For instance, a `pex_binary(...,
format="packed")` where a file was removed.

This fixes pantsbuild#17758 and fixes pantsbuild#18849, respectively.

This only fixes `package`, not any other goals that also write to fixed
paths (like `export` and `export-codegen`). In pantsbuild#18871, I start on
`export-codegen`, but it's a bit fiddlier (requires propagating "this is
the artefact" paths around) and it's best to land the infrastructure in
this PR first. I'll file follow-up issues covering them specifically.
  • Loading branch information
huonw authored May 8, 2023
1 parent ad4fcde commit a24a59e
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 29 deletions.
8 changes: 7 additions & 1 deletion src/python/pants/core/goals/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,13 @@ async def package_asset(workspace: Workspace, dist_dir: DistDir) -> Package:
)

merged_digest = await Get(Digest, MergeDigests(pkg.digest for pkg in packages))
workspace.write_digest(merged_digest, path_prefix=str(dist_dir.relpath))
all_relpaths = [
artifact.relpath for pkg in packages for artifact in pkg.artifacts if artifact.relpath
]

workspace.write_digest(
merged_digest, path_prefix=str(dist_dir.relpath), clear_paths=all_relpaths
)
for pkg in packages:
for artifact in pkg.artifacts:
msg = []
Expand Down
187 changes: 187 additions & 0 deletions src/python/pants/core/goals/package_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from textwrap import dedent

import pytest

from pants.core.goals import package
from pants.core.goals.package import BuiltPackage, BuiltPackageArtifact, Package, PackageFieldSet
from pants.engine.fs import CreateDigest, Digest, FileContent
from pants.engine.internals.selectors import Get
from pants.engine.rules import rule
from pants.engine.target import StringField, Target
from pants.engine.unions import UnionRule
from pants.testutil.rule_runner import RuleRunner


class MockTypeField(StringField):
alias = "type"

def synth(self, base_path: Path) -> tuple[CreateDigest, tuple[Path, ...]]:
if self.value == "single_file":
return (CreateDigest([FileContent(str(base_path), b"single")]), (base_path,))

elif self.value == "multiple_files":
a = base_path / "a"
b = base_path / "b"
return (
CreateDigest(
[FileContent(str(a), b"multiple: a"), FileContent(str(b), b"multiple: b")]
),
(a, b),
)

elif self.value == "directory":
a = base_path / "a"
b = base_path / "b"
return (
CreateDigest(
[
FileContent(str(a), b"directory: a"),
FileContent(str(b), b"directory: b"),
]
),
(base_path,),
)

raise ValueError(f"don't understand {self.value}")


class MockTarget(Target):
alias = "mock"
core_fields = (MockTypeField,)


@dataclass(frozen=True)
class MockPackageFieldSet(PackageFieldSet):
required_fields = (MockTypeField,)

type: MockTypeField


@rule
async def package_mock_target(field_set: MockPackageFieldSet) -> BuiltPackage:
base_path = Path(f"base/{field_set.address.target_name}")
create_digest, relpaths = field_set.type.synth(base_path)
digest = await Get(Digest, CreateDigest, create_digest)
return BuiltPackage(
digest, tuple(BuiltPackageArtifact(relpath=str(relpath)) for relpath in relpaths)
)


@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*package.rules(),
package_mock_target,
UnionRule(PackageFieldSet, MockPackageFieldSet),
],
target_types=[MockTarget],
)


@pytest.fixture
def dist_base(rule_runner) -> Path:
return Path(rule_runner.build_root, "dist/base")


def test_package_single_file_artifact(rule_runner: RuleRunner, dist_base: Path) -> None:
rule_runner.write_files({"src/BUILD": "mock(name='x', type='single_file')"})
result = rule_runner.run_goal_rule(
Package,
args=("src:x",),
env_inherit={"HOME", "PATH", "PYENV_ROOT"},
)

assert result.exit_code == 0
assert (dist_base / "x").read_text() == "single"


def test_package_directory_artifact(rule_runner: RuleRunner, dist_base: Path) -> None:
rule_runner.write_files({"src/BUILD": "mock(name='x', type='directory')"})
result = rule_runner.run_goal_rule(
Package,
args=("src:x",),
env_inherit={"HOME", "PATH", "PYENV_ROOT"},
)

assert result.exit_code == 0
assert (dist_base / "x/a").read_text() == "directory: a"
assert (dist_base / "x/b").read_text() == "directory: b"


def test_package_multiple_artifacts(rule_runner: RuleRunner, dist_base: Path) -> None:
rule_runner.write_files({"src/BUILD": "mock(name='x', type='multiple_files')"})
result = rule_runner.run_goal_rule(
Package,
args=("src:x",),
env_inherit={"HOME", "PATH", "PYENV_ROOT"},
)

assert result.exit_code == 0
assert (dist_base / "x/a").read_text() == "multiple: a"
assert (dist_base / "x/b").read_text() == "multiple: b"


def test_package_multiple_targets(rule_runner: RuleRunner, dist_base: Path) -> None:
rule_runner.write_files(
{
"src/BUILD": dedent(
"""\
mock(name='x', type='single_file')
mock(name='y', type='single_file')
"""
)
}
)
result = rule_runner.run_goal_rule(
Package,
args=("src:x", "src:y"),
env_inherit={"HOME", "PATH", "PYENV_ROOT"},
)

assert result.exit_code == 0
assert (dist_base / "x").read_text() == "single"
assert (dist_base / "y").read_text() == "single"


@pytest.mark.parametrize("existing", ["file", "directory"])
@pytest.mark.parametrize("type", ["single_file", "directory"])
def test_package_replace_existing(
existing: str, type: str, rule_runner: RuleRunner, dist_base: Path
) -> None:
"""All combinations of having existing contents (either file or directory) in dist/ and
replacing it with file or directory package contents: the final result should be exactly the
same as clearing dist/ and running package from scratch:
- works
- no extraneous files remaining within an artifact
"""
existing_contents = (
{"dist/base/x": "existing"}
if existing == "file"
else {"dist/base/x/a": "existing: a", "dist/base/x/c": "existing: c"}
)
rule_runner.write_files({**existing_contents, "src/BUILD": f"mock(name='x', type='{type}')"})
result = rule_runner.run_goal_rule(
Package,
args=("src:x",),
env_inherit={"HOME", "PATH", "PYENV_ROOT"},
)

assert result.exit_code == 0

if type == "single_file":
assert (dist_base / "x").read_text() == "single"
else:
a = dist_base / "x/a"
b = dist_base / "x/b"
assert set((dist_base / "x").iterdir()) == {a, b}
assert a.read_text() == "directory: a"
assert b.read_text() == "directory: b"
8 changes: 3 additions & 5 deletions src/python/pants/engine/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Iterable, Mapping, Optional, Tuple, Union
from typing import TYPE_CHECKING, Iterable, Mapping, Optional, Sequence, Tuple, Union

# Note: several of these types are re-exported as the public API of `engine/fs.py`.
from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior as GlobMatchErrorBehavior
Expand Down Expand Up @@ -286,7 +286,7 @@ def write_digest(
digest: Digest,
*,
path_prefix: Optional[str] = None,
clear_destination: bool = False,
clear_paths: Sequence[str] = (),
side_effecting: bool = True,
) -> None:
"""Write a digest to disk, relative to the build root.
Expand All @@ -299,9 +299,7 @@ def write_digest(
"""
if side_effecting:
self.side_effected()
self._scheduler.write_digest(
digest, path_prefix=path_prefix, clear_destination=clear_destination
)
self._scheduler.write_digest(digest, path_prefix=path_prefix, clear_paths=clear_paths)


@dataclass(frozen=True)
Expand Down
59 changes: 45 additions & 14 deletions src/python/pants/engine/fs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ def test_write_digest_workspace(rule_runner: RuleRunner) -> None:
assert path2.read_text() == "goodbye"


def test_write_digest_workspace_clear_destination(rule_runner: RuleRunner) -> None:
def test_write_digest_workspace_clear_paths(rule_runner: RuleRunner) -> None:
workspace = Workspace(rule_runner.scheduler, _enforce_effects=False)
digest_a = rule_runner.request(
Digest,
Expand All @@ -1199,19 +1199,50 @@ def test_write_digest_workspace_clear_destination(rule_runner: RuleRunner) -> No
Digest,
[CreateDigest([FileContent("newdir/c.txt", b"hello again")])],
)
path1 = Path(rule_runner.build_root, "newdir/a.txt")
path2 = Path(rule_runner.build_root, "newdir/b.txt")
path3 = Path(rule_runner.build_root, "newdir/c.txt")

workspace.write_digest(digest_a, clear_destination=False)
workspace.write_digest(digest_b, clear_destination=False)
assert path1.exists()
assert path2.exists()

workspace.write_digest(digest_c, clear_destination=True)
assert not path1.exists()
assert not path2.exists()
assert path3.read_text() == "hello again"
digest_c_root = rule_runner.request(
Digest, [CreateDigest([FileContent("c.txt", b"hello again")])]
)
digest_d = rule_runner.request(
Digest, [CreateDigest([SymlinkEntry("newdir/d.txt", "newdir/a.txt")])]
)
all_paths = {name: Path(rule_runner.build_root, f"newdir/{name}.txt") for name in "abcd"}

def check(expected_names: set[str]) -> None:
for name, path in all_paths.items():
expected = name in expected_names
assert path.exists() == expected

workspace.write_digest(digest_a, clear_paths=())
workspace.write_digest(digest_b, clear_paths=())
check({"a", "b"})

# clear a file
workspace.write_digest(digest_d, clear_paths=("newdir/b.txt",))
check({"a", "d"})

# clear a symlink (doesn't remove target file)
workspace.write_digest(digest_b, clear_paths=("newdir/d.txt",))
check({"a", "b"})

# clear a directory
workspace.write_digest(digest_c, clear_paths=("newdir",))
check({"c"})

# path prefix, and clearing the 'current' directory
workspace.write_digest(digest_c_root, path_prefix="newdir", clear_paths=("",))
check({"c"})

# clear multiple paths
workspace.write_digest(digest_b, clear_paths=())
check({"b", "c"})
workspace.write_digest(digest_a, clear_paths=("newdir/b.txt", "newdir/c.txt"))
check({"a"})

# clearing non-existent paths is fine
workspace.write_digest(
digest_b, clear_paths=("not-here", "newdir/not-here", "not-here/also-not-here")
)
check({"a", "b"})


@dataclass(frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def write_digest(
session: PySession,
digest: Digest,
path_prefix: str,
clear_destination: bool,
clear_paths: Sequence[str],
) -> None: ...
def write_log(msg: str, level: int, target: str) -> None: ...
def flush_log() -> None: ...
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def ensure_directory_digest_persisted(self, digest: Digest) -> None:
native_engine.ensure_directory_digest_persisted(self.py_scheduler, digest)

def write_digest(
self, digest: Digest, *, path_prefix: str | None = None, clear_destination: bool = False
self, digest: Digest, *, path_prefix: str | None = None, clear_paths: Sequence[str] = ()
) -> None:
"""Write a digest to disk, relative to the build root."""
if path_prefix and PurePath(path_prefix).is_absolute():
Expand All @@ -616,7 +616,7 @@ def write_digest(
"the digest relative to the build root."
)
native_engine.write_digest(
self.py_scheduler, self.py_session, digest, path_prefix or "", clear_destination
self.py_scheduler, self.py_session, digest, path_prefix or "", clear_paths
)

def lease_files_in_graph(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/testutil/rule_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def get_target(self, address: Address) -> Target:
).target

def write_digest(
self, digest: Digest, *, path_prefix: str | None = None, clear_destination: bool = False
self, digest: Digest, *, path_prefix: str | None = None, clear_paths: Sequence[str] = ()
) -> None:
"""Write a digest to disk, relative to the test's build root.
Expand All @@ -601,7 +601,7 @@ def write_digest(
self.scheduler.py_session,
digest,
path_prefix or "",
clear_destination,
clear_paths,
)

def run_interactive_process(self, request: InteractiveProcess) -> InteractiveProcessResult:
Expand Down
25 changes: 21 additions & 4 deletions src/rust/engine/src/externs/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1625,14 +1625,27 @@ fn single_file_digests_to_bytes<'py>(
})
}

fn ensure_path_doesnt_exist(path: &Path) -> io::Result<()> {
match std::fs::remove_file(path) {
Ok(()) => Ok(()),
Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()),
// Always fall through to remove_dir_all unless the path definitely doesn't exist, because
// std::io::ErrorKind::IsADirectory is unstable https://github.com/rust-lang/rust/issues/86442
//
// NB. we don't need to check this returning NotFound because remove_file will identify that
// above (except if there's a concurrent removal, which is out of scope)
Err(_) => std::fs::remove_dir_all(path),
}
}

#[pyfunction]
fn write_digest(
py: Python,
py_scheduler: &PyScheduler,
py_session: &PySession,
digest: &PyAny,
path_prefix: String,
clear_destination: bool,
clear_paths: Vec<String>,
) -> PyO3Result<()> {
let core = &py_scheduler.0.core;
core.executor.enter(|| {
Expand All @@ -1646,9 +1659,13 @@ fn write_digest(
destination.push(core.build_root.clone());
destination.push(path_prefix);

if clear_destination {
std::fs::remove_dir_all(&destination).map_err(|e| {
PyException::new_err(format!("Failed to clear {}: {e}", destination.display()))
for subpath in &clear_paths {
let resolved = destination.join(subpath);
ensure_path_doesnt_exist(&resolved).map_err(|e| {
PyIOError::new_err(format!(
"Failed to clear {} when writing digest: {e}",
resolved.display()
))
})?;
}

Expand Down

0 comments on commit a24a59e

Please sign in to comment.