Skip to content

Commit

Permalink
Add a FileDigest type to differentiate file APIs from directory APIs (p…
Browse files Browse the repository at this point in the history
…antsbuild#10900)

### Problem

Two codepaths in our `@rule` API use a `Digest` for a file rather than a directory, and this can cause confusion, since using a `Digest` of the wrong type will trigger a failure to lookup a digest in storage. More generally, our Python API has so far been _more_ typesafe than our Rust API with regard to `Digest`s: fairly consistently using them to represent `Directory`s.

### Solution

Introduce `FileDigest`, and use it in the two relevant locations:
1. `DownloadFile` takes the `FileDigest` for the file being downloaded.
2. `single_file_digests_to_bytes` takes `FileDigests` returned in `workunits.artifacts` (see pantsbuild#10698).

### Result

Improved type safety and clarity. Fixes pantsbuild#10898.

[ci skip-build-wheels]
  • Loading branch information
stuhood authored Oct 3, 2020
1 parent 4129e06 commit 787611d
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 89 deletions.
4 changes: 2 additions & 2 deletions src/python/pants/core/util_rules/external_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pants.core.util_rules import archive
from pants.core.util_rules.archive import ExtractedArchive
from pants.engine.fs import Digest, DownloadFile
from pants.engine.fs import Digest, DownloadFile, FileDigest
from pants.engine.platform import Platform
from pants.engine.rules import Get, collect_rules, rule
from pants.option.subsystem import Subsystem
Expand Down Expand Up @@ -154,7 +154,7 @@ def get_request(self, plat: Platform) -> ExternalToolRequest:
f"help-advanced {self.options_scope}): {known_version}"
)
if plat_val == plat.value and ver == self.version:
digest = Digest(fingerprint=sha256, serialized_bytes_length=int(length))
digest = FileDigest(fingerprint=sha256, serialized_bytes_length=int(length))
try:
url = self.generate_url(plat)
exe = self.generate_exe(plat)
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/core/util_rules/external_tool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
ExternalToolRequest,
UnknownVersion,
)
from pants.engine.fs import Digest, DownloadFile
from pants.engine.fs import DownloadFile, FileDigest
from pants.engine.platform import Platform
from pants.testutil.option_util import create_subsystem

Expand Down Expand Up @@ -49,7 +49,7 @@ def do_test(
assert (
ExternalToolRequest(
DownloadFile(
url=expected_url, expected_digest=Digest(expected_sha256, expected_length)
url=expected_url, expected_digest=FileDigest(expected_sha256, expected_length)
),
f"foobar-{version}/bin/foobar",
)
Expand Down
11 changes: 10 additions & 1 deletion src/python/pants/engine/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ class Paths:
dirs: Tuple[str, ...]


@dataclass(frozen=True)
class FileDigest:
"""A FileDigest is a digest that refers to a file's content, without its name."""

fingerprint: str
serialized_bytes_length: int


@dataclass(frozen=True)
class FileContent:
"""The content of a file.
Expand Down Expand Up @@ -251,7 +259,7 @@ class DownloadFile:
"""

url: str
expected_digest: Digest
expected_digest: FileDigest


@side_effecting
Expand All @@ -272,6 +280,7 @@ def write_digest(self, digest: Digest, *, path_prefix: Optional[str] = None) ->

_EMPTY_FINGERPRINT = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
EMPTY_DIGEST = Digest(fingerprint=_EMPTY_FINGERPRINT, serialized_bytes_length=0)
EMPTY_FILE_DIGEST = FileDigest(fingerprint=_EMPTY_FINGERPRINT, serialized_bytes_length=0)
EMPTY_SNAPSHOT = Snapshot(EMPTY_DIGEST, files=(), dirs=())


Expand Down
9 changes: 6 additions & 3 deletions src/python/pants/engine/fs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Directory,
DownloadFile,
FileContent,
FileDigest,
GlobMatchErrorBehavior,
MergeDigests,
PathGlobs,
Expand Down Expand Up @@ -797,7 +798,7 @@ def mutation_function(project_tree, dir_path):


class DownloadsTest(FSTestBase):
file_digest = Digest("8fcbc50cda241aee7238c71e87c27804e7abc60675974eaf6567aa16366bc105", 14)
file_digest = FileDigest("8fcbc50cda241aee7238c71e87c27804e7abc60675974eaf6567aa16366bc105", 14)

expected_snapshot_digest = Digest(
"4c9cf91fcd7ba1abbf7f9a0a1c8175556a82bee6a398e34db3284525ac24a3ad", 84
Expand Down Expand Up @@ -842,7 +843,7 @@ def test_download_wrong_digest(self) -> None:
[
DownloadFile(
f"http://localhost:{port}/file.txt",
Digest(
FileDigest(
self.file_digest.fingerprint,
self.file_digest.serialized_bytes_length + 1,
),
Expand All @@ -861,7 +862,9 @@ def test_caches_downloads(self) -> None:
# so we shouldn't see an error...
url = DownloadFile(
f"http://localhost:{port}/roland",
Digest("693d8db7b05e99c6b7a7c0616456039d89c555029026936248085193559a0b5d", 16),
FileDigest(
"693d8db7b05e99c6b7a7c0616456039d89c555029026936248085193559a0b5d", 16
),
)
snapshot = self.request(Snapshot, [url])
self.assert_snapshot_equals(
Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/engine/internals/engine_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from pants.engine.engine_aware import EngineAwareReturnType
from pants.engine.fs import (
EMPTY_DIGEST,
EMPTY_FILE_DIGEST,
EMPTY_SNAPSHOT,
CreateDigest,
Digest,
Expand Down Expand Up @@ -692,7 +692,7 @@ def test_process_digests_on_workunits(self):
stderr_digest = process_workunit["artifacts"]["stderr_digest"]

assert result.stdout == b"stdout output\n"
assert stderr_digest == EMPTY_DIGEST
assert stderr_digest == EMPTY_FILE_DIGEST
assert stdout_digest.serialized_bytes_length == len(result.stdout)

tracker = WorkunitTracker()
Expand Down Expand Up @@ -721,7 +721,7 @@ def test_process_digests_on_workunits(self):
stderr_digest = process_workunit["artifacts"]["stderr_digest"]

assert result.stderr == b"stderr output\n"
assert stdout_digest == EMPTY_DIGEST
assert stdout_digest == EMPTY_FILE_DIGEST
assert stderr_digest.serialized_bytes_length == len(result.stderr)

try:
Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
DigestSubset,
DownloadFile,
FileContent,
FileDigest,
MergeDigests,
PathGlobs,
PathGlobsAndRoot,
Expand Down Expand Up @@ -130,6 +131,7 @@ def __init__(

types = PyTypes(
directory_digest=Digest,
file_digest=FileDigest,
snapshot=Snapshot,
paths=Paths,
file_content=FileContent,
Expand Down
6 changes: 6 additions & 0 deletions src/rust/engine/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,12 @@ impl fmt::Debug for Value {
}
}

impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", externs::val_to_str(&self))
}
}

impl FromPyObject<'_> for Value {
fn extract(py: Python, obj: &PyObject) -> PyResult<Self> {
Ok(obj.clone_ref(py).into())
Expand Down
20 changes: 9 additions & 11 deletions src/rust/engine/src/externs/engine_aware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

use crate::core::Value;
use crate::externs;
use crate::nodes::lift_digest;
use crate::nodes::lift_directory_digest;
use crate::Types;

use cpython::{PyDict, PyObject, PyString, Python};
use hashing::Digest;
use workunit_store::Level;
Expand All @@ -13,42 +15,39 @@ use workunit_store::Level;

pub trait EngineAwareInformation {
type MaybeOutput;
fn retrieve(value: &Value) -> Option<Self::MaybeOutput>;
fn retrieve(types: &Types, value: &Value) -> Option<Self::MaybeOutput>;
}

#[derive(Clone, Debug)]
pub struct EngineAwareLevel {}

impl EngineAwareInformation for EngineAwareLevel {
type MaybeOutput = Level;

fn retrieve(value: &Value) -> Option<Level> {
fn retrieve(_types: &Types, value: &Value) -> Option<Level> {
let new_level_val: Value = externs::call_method(&value, "level", &[]).ok()?;
let new_level_val = externs::check_for_python_none(new_level_val)?;
externs::val_to_log_level(&new_level_val).ok()
}
}

#[derive(Clone, Debug)]
pub struct Message {}

impl EngineAwareInformation for Message {
type MaybeOutput = String;

fn retrieve(value: &Value) -> Option<String> {
fn retrieve(_types: &Types, value: &Value) -> Option<String> {
let msg_val: Value = externs::call_method(&value, "message", &[]).ok()?;
let msg_val = externs::check_for_python_none(msg_val)?;
Some(externs::val_to_str(&msg_val))
}
}

#[derive(Clone, Debug)]
pub struct Artifacts {}

impl EngineAwareInformation for Artifacts {
type MaybeOutput = Vec<(String, Digest)>;

fn retrieve(value: &Value) -> Option<Self::MaybeOutput> {
fn retrieve(types: &Types, value: &Value) -> Option<Self::MaybeOutput> {
let artifacts_val: Value = match externs::call_method(&value, "artifacts", &[]) {
Ok(value) => value,
Err(e) => {
Expand Down Expand Up @@ -78,7 +77,7 @@ impl EngineAwareInformation for Artifacts {
log::error!("Error in EngineAware.artifacts() - no `digest` attr: {}", e);
})
.ok()?;
let digest = match lift_digest(&Value::new(digest_value)) {
let digest = match lift_directory_digest(types, &Value::new(digest_value)) {
Ok(digest) => digest,
Err(e) => {
log::error!("Error in EngineAware.artifacts() implementation: {}", e);
Expand All @@ -92,13 +91,12 @@ impl EngineAwareInformation for Artifacts {
}
}

#[derive(Clone, Debug)]
pub struct DebugHint {}

impl EngineAwareInformation for DebugHint {
type MaybeOutput = String;

fn retrieve(value: &Value) -> Option<String> {
fn retrieve(_types: &Types, value: &Value) -> Option<String> {
externs::call_method(&value, "debug_hint", &[])
.ok()
.and_then(externs::check_for_python_none)
Expand Down
70 changes: 39 additions & 31 deletions src/rust/engine/src/externs/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ py_class!(class PyTypes |py| {
def __new__(
_cls,
directory_digest: PyType,
file_digest: PyType,
snapshot: PyType,
paths: PyType,
file_content: PyType,
Expand All @@ -420,6 +421,7 @@ py_class!(class PyTypes |py| {
py,
RefCell::new(Some(Types {
directory_digest: externs::type_for(directory_digest),
file_digest: externs::type_for(file_digest),
snapshot: externs::type_for(snapshot),
paths: externs::type_for(paths),
file_content: externs::type_for(file_content),
Expand Down Expand Up @@ -878,14 +880,14 @@ async fn workunit_to_py_value(workunit: &Workunit, core: &Arc<Core>) -> CPyResul
if let Some(stdout_digest) = &workunit.metadata.stdout.as_ref() {
artifact_entries.push((
externs::store_utf8("stdout_digest"),
crate::nodes::Snapshot::store_directory_digest(core, stdout_digest),
crate::nodes::Snapshot::store_file_digest(core, stdout_digest),
));
}

if let Some(stderr_digest) = &workunit.metadata.stderr.as_ref() {
artifact_entries.push((
externs::store_utf8("stderr_digest"),
crate::nodes::Snapshot::store_directory_digest(core, stderr_digest),
crate::nodes::Snapshot::store_file_digest(core, stderr_digest),
));
}

Expand Down Expand Up @@ -1358,33 +1360,33 @@ fn capture_snapshots(
session_ptr: PySession,
path_globs_and_root_tuple_wrapper: PyObject,
) -> CPyResult<PyObject> {
let values = externs::project_iterable(&path_globs_and_root_tuple_wrapper.into());
let path_globs_and_roots = values
.iter()
.map(|value| {
let root = PathBuf::from(externs::project_str(&value, "root"));
let path_globs = nodes::Snapshot::lift_prepared_path_globs(&externs::project_ignoring_type(
&value,
"path_globs",
));
let digest_hint = {
let maybe_digest = externs::project_ignoring_type(&value, "digest_hint");
if maybe_digest == Value::from(externs::none()) {
None
} else {
Some(nodes::lift_digest(&maybe_digest)?)
}
};
path_globs.map(|path_globs| (path_globs, root, digest_hint))
})
.collect::<Result<Vec<_>, _>>()
.map_err(|e| PyErr::new::<exc::ValueError, _>(py, (e,)))?;

with_scheduler(py, scheduler_ptr, |scheduler| {
with_session(py, session_ptr, |session| {
// TODO: A parent_id should be an explicit argument.
session.workunit_store().init_thread_state(None);
let core = scheduler.core.clone();

let values = externs::project_iterable(&path_globs_and_root_tuple_wrapper.into());
let path_globs_and_roots = values
.iter()
.map(|value| {
let root = PathBuf::from(externs::project_str(&value, "root"));
let path_globs = nodes::Snapshot::lift_prepared_path_globs(
&externs::project_ignoring_type(&value, "path_globs"),
);
let digest_hint = {
let maybe_digest = externs::project_ignoring_type(&value, "digest_hint");
if maybe_digest == Value::from(externs::none()) {
None
} else {
Some(nodes::lift_directory_digest(&core.types, &maybe_digest)?)
}
};
path_globs.map(|path_globs| (path_globs, root, digest_hint))
})
.collect::<Result<Vec<_>, _>>()
.map_err(|e| PyErr::new::<exc::ValueError, _>(py, (e,)))?;

let snapshot_futures = path_globs_and_roots
.into_iter()
.map(|(path_globs, root, digest_hint)| {
Expand Down Expand Up @@ -1422,9 +1424,14 @@ fn ensure_remote_has_recursive(
let core = scheduler.core.clone();
let store = core.store();

// NB: Supports either a FileDigest or Digest as input.
let digests: Vec<Digest> = py_digests
.iter(py)
.map(|item| crate::nodes::lift_digest(&item.into()))
.map(|item| {
let value = item.into();
crate::nodes::lift_directory_digest(&core.types, &value)
.or_else(|_| crate::nodes::lift_file_digest(&core.types, &value))
})
.collect::<Result<Vec<Digest>, _>>()
.map_err(|e| PyErr::new::<exc::Exception, _>(py, (e,)))?;

Expand All @@ -1444,14 +1451,14 @@ fn ensure_remote_has_recursive(
fn single_file_digests_to_bytes(
py: Python,
scheduler_ptr: PyScheduler,
py_digests: PyList,
py_file_digests: PyList,
) -> CPyResult<PyList> {
with_scheduler(py, scheduler_ptr, |scheduler| {
let core = scheduler.core.clone();

let digests: Vec<Digest> = py_digests
let digests: Vec<Digest> = py_file_digests
.iter(py)
.map(|item| crate::nodes::lift_digest(&item.into()))
.map(|item| crate::nodes::lift_file_digest(&core.types, &item.into()))
.collect::<Result<Vec<Digest>, _>>()
.map_err(|e| PyErr::new::<exc::Exception, _>(py, (e,)))?;

Expand Down Expand Up @@ -1513,7 +1520,7 @@ fn run_local_interactive_process(
};

let input_digest_value = externs::project_ignoring_type(&value, "input_digest");
let digest: Digest = nodes::lift_digest(&input_digest_value)?;
let digest: Digest = nodes::lift_directory_digest(types, &input_digest_value)?;
if digest != EMPTY_DIGEST {
if run_in_workspace {
warn!("Local interactive process should not attempt to materialize files when run in workspace");
Expand Down Expand Up @@ -1581,13 +1588,14 @@ fn write_digest(
digest: PyObject,
path_prefix: String,
) -> PyUnitResult {
let lifted_digest =
nodes::lift_digest(&digest.into()).map_err(|e| PyErr::new::<exc::ValueError, _>(py, (e,)))?;
with_scheduler(py, scheduler_ptr, |scheduler| {
with_session(py, session_ptr, |session| {
// TODO: A parent_id should be an explicit argument.
session.workunit_store().init_thread_state(None);

let lifted_digest = nodes::lift_directory_digest(&scheduler.core.types, &digest.into())
.map_err(|e| PyErr::new::<exc::ValueError, _>(py, (e,)))?;

// Python will have already validated that path_prefix is a relative path.
let mut destination = PathBuf::new();
destination.push(scheduler.core.build_root.clone());
Expand Down
Loading

0 comments on commit 787611d

Please sign in to comment.