diff --git a/src/python/pants/backend/project_info/peek.py b/src/python/pants/backend/project_info/peek.py index d5c819b1aea..5ea1bfc107d 100644 --- a/src/python/pants/backend/project_info/peek.py +++ b/src/python/pants/backend/project_info/peek.py @@ -5,7 +5,7 @@ import collections import json -from dataclasses import asdict, dataclass, is_dataclass +from dataclasses import dataclass, fields, is_dataclass from typing import Any, Iterable, Mapping from typing_extensions import Protocol, runtime_checkable @@ -134,7 +134,9 @@ def default(self, o): if o is None: return o if is_dataclass(o): - return asdict(o) + # NB: `dataclasses.asdict` creates a deep copy by default, which is unnecessary for + # this case. + return {field.name: getattr(o, field.name) for field in fields(o)} if isinstance(o, collections.abc.Mapping): return dict(o) if ( diff --git a/src/python/pants/base/specs.py b/src/python/pants/base/specs.py index ed96797de75..62a12708c35 100644 --- a/src/python/pants/base/specs.py +++ b/src/python/pants/base/specs.py @@ -69,7 +69,7 @@ def to_address(self) -> Address: self.path_component, target_name=self.target_component, generated_name=self.generated_component, - parameters=self.parameters, + parameters=dict(self.parameters), ) diff --git a/src/python/pants/build_graph/address.py b/src/python/pants/build_graph/address.py index 4aeee2afc07..d2554d9463f 100644 --- a/src/python/pants/build_graph/address.py +++ b/src/python/pants/build_graph/address.py @@ -4,610 +4,41 @@ from __future__ import annotations import dataclasses -import os from dataclasses import dataclass -from pathlib import PurePath -from typing import Any, Iterable, Mapping, Sequence +from typing import Iterable from pants.base.exceptions import MappingError from pants.engine.engine_aware import EngineAwareParameter -from pants.engine.internals import native_engine +from pants.engine.internals.native_engine import ( # noqa: F401 + BANNED_CHARS_IN_GENERATED_NAME as BANNED_CHARS_IN_GENERATED_NAME, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + BANNED_CHARS_IN_PARAMETERS as BANNED_CHARS_IN_PARAMETERS, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + BANNED_CHARS_IN_TARGET_NAME as BANNED_CHARS_IN_TARGET_NAME, +) +from pants.engine.internals.native_engine import Address as Address # noqa: F401 +from pants.engine.internals.native_engine import AddressInput as AddressInput # noqa: F401 from pants.engine.internals.native_engine import ( # noqa: F401 AddressParseException as AddressParseException, ) -from pants.util.dirutil import fast_relpath, longest_dir_prefix -from pants.util.frozendict import FrozenDict -from pants.util.strutil import bullet_list, softwrap, strip_prefix - -# `:`, `#`, `@` are used as delimiters already. Others are reserved for possible future needs. -BANNED_CHARS_IN_TARGET_NAME = frozenset(r":#!@?/\=") -BANNED_CHARS_IN_GENERATED_NAME = frozenset(r":#!@?=") -BANNED_CHARS_IN_PARAMETERS = frozenset(r":#!@?=, ") - - -class InvalidAddressError(AddressParseException): - pass - - -class InvalidSpecPathError(InvalidAddressError): - """Indicate an invalid spec path for `Address`.""" - - -class InvalidTargetNameError(InvalidAddressError): - """Indicate an invalid target name for `Address`.""" - - -class InvalidParametersError(InvalidAddressError): - """Indicate invalid parameter values for `Address`.""" - - -class UnsupportedWildcardError(InvalidAddressError): - """Indicate that an address wildcard was used.""" - - -@dataclass(frozen=True) -class AddressInput: - """A string that has been parsed and normalized using the Address syntax. - - An AddressInput must be resolved into an Address using the engine (which involves inspecting - disk to determine the types of its path component). - """ - - path_component: str - target_component: str | None - generated_component: str | None - parameters: FrozenDict[str, str] - description_of_origin: str - - def __init__( - self, - path_component: str, - target_component: str | None = None, - *, - generated_component: str | None = None, - parameters: Mapping[str, str] = FrozenDict(), - description_of_origin: str, - ) -> None: - object.__setattr__(self, "path_component", path_component) - object.__setattr__(self, "target_component", target_component) - object.__setattr__(self, "generated_component", generated_component) - object.__setattr__(self, "parameters", FrozenDict(parameters)) - object.__setattr__(self, "description_of_origin", description_of_origin) - - self.__post_init__() - - def __post_init__(self): - if not self.target_component: - if self.target_component is not None: - raise InvalidTargetNameError( - softwrap( - f""" - Address `{self.spec}` from {self.description_of_origin} sets - the name component to the empty string, which is not legal. - """ - ) - ) - if self.path_component == "": - raise InvalidTargetNameError( - softwrap( - f""" - Address `{self.spec}` from {self.description_of_origin} has no name part, - but it's necessary because the path is the build root. - """ - ) - ) - - if self.path_component != "": - if os.path.isabs(self.path_component): - raise InvalidSpecPathError( - softwrap( - f""" - Invalid address {self.spec} from {self.description_of_origin}. Cannot use - absolute paths. - """ - ) - ) - - invalid_component = next( - ( - component - for component in self.path_component.split(os.sep) - if component in (".", "..", "") - ), - None, - ) - if invalid_component is not None: - raise InvalidSpecPathError( - softwrap( - f""" - Invalid address `{self.spec}` from {self.description_of_origin}. It has an - un-normalized path part: '{os.sep}{invalid_component}'. - """ - ) - ) - - for k, v in self.parameters.items(): - key_banned = set(BANNED_CHARS_IN_PARAMETERS & set(k)) - if key_banned: - raise InvalidParametersError( - softwrap( - f""" - Invalid address `{self.spec}` from {self.description_of_origin}. It has - illegal characters in parameter keys: `{key_banned}` in `{k}={v}`. - """ - ) - ) - val_banned = set(BANNED_CHARS_IN_PARAMETERS & set(v)) - if val_banned: - raise InvalidParametersError( - softwrap( - f""" - Invalid address `{self.spec}` from {self.description_of_origin}. It has - illegal characters in parameter values: `{val_banned}` in `{k}={v}`. - """ - ) - ) - - @classmethod - def parse( - cls, - spec: str, - *, - relative_to: str | None = None, - subproject_roots: Sequence[str] | None = None, - description_of_origin: str, - ) -> AddressInput: - """Parse a string into an AddressInput. - - :param spec: Target address spec. - :param relative_to: path to use for sibling specs, ie: ':another_in_same_build_family', - interprets the missing spec_path part as `relative_to`. - :param subproject_roots: Paths that correspond with embedded build roots under - the current build root. - :param description_of_origin: where the AddressInput comes from, e.g. "CLI arguments" or - "the option `--paths-from`". This is used for better error messages. - - For example: - - some_target( - name='mytarget', - dependencies=['path/to/buildfile:targetname'], - ) - - Where `path/to/buildfile:targetname` is the dependent target address spec. - - In there is no target name component, it defaults the default target in the resulting - Address's spec_path. - - Optionally, specs can be prefixed with '//' to denote an absolute spec path. This is - normally not significant except when a spec referring to a root level target is needed - from deeper in the tree. For example, in `path/to/buildfile/BUILD`: - - some_target( - name='mytarget', - dependencies=[':targetname'], - ) - - The `targetname` spec refers to a target defined in `path/to/buildfile/BUILD*`. If instead - you want to reference `targetname` in a root level BUILD file, use the absolute form. - For example: - - some_target( - name='mytarget', - dependencies=['//:targetname'], - ) - - The spec may be for a generated target: `dir:generator#generated`. - - The spec may be a file, such as `a/b/c.txt`. It may include a relative address spec at the - end, such as `a/b/c.txt:original` or `a/b/c.txt:../original`, to disambiguate which target - the file comes from; otherwise, it will be assumed to come from the default target in the - directory, i.e. a target which leaves off `name`. - """ - subproject = ( - longest_dir_prefix(relative_to, subproject_roots) - if relative_to and subproject_roots - else None - ) - - def prefix_subproject(spec_path: str) -> str: - if not subproject: - return spec_path - if spec_path: - return os.path.join(subproject, spec_path) - return os.path.normpath(subproject) - - ( - ( - path_component, - target_component, - generated_component, - parameters, - ), - wildcard, - ) = native_engine.address_spec_parse(spec) - - if wildcard: - raise UnsupportedWildcardError( - softwrap( - f""" - The address `{spec}` from {description_of_origin} ended in a wildcard - (`{wildcard}`), which is not supported. - """ - ) - ) - - normalized_relative_to = None - if relative_to: - normalized_relative_to = ( - fast_relpath(relative_to, subproject) if subproject else relative_to - ) - if path_component.startswith("./") and normalized_relative_to: - path_component = os.path.join(normalized_relative_to, path_component[2:]) - if not path_component and normalized_relative_to: - path_component = normalized_relative_to - - path_component = prefix_subproject(strip_prefix(path_component, "//")) - - return cls( - path_component, - target_component, - generated_component=generated_component, - parameters=FrozenDict(sorted(parameters)), - description_of_origin=description_of_origin, - ) - - def file_to_address(self) -> Address: - """Converts to an Address by assuming that the path_component is a file on disk.""" - if self.target_component is None: - # Use the default target in the same directory as the file. - spec_path, relative_file_path = os.path.split(self.path_component) - # We validate that this is not a top-level file. We couldn't do this earlier in the - # AddressSpec constructor because we weren't sure if the path_spec referred to a file - # vs. a directory. - if not spec_path: - raise InvalidTargetNameError( - softwrap( - f""" - Addresses for generated first-party targets in the build root must include - which target generator they come from, such as - `{self.path_component}:original_target`. However, `{self.spec}` - from {self.description_of_origin} did not have a target name. - """ - ) - ) - return Address( - spec_path=spec_path, - relative_file_path=relative_file_path, - parameters=self.parameters, - ) - - # The target component may be "above" (but not below) the file in the filesystem. - # Determine how many levels above the file it is, and validate that the path is relative. - parent_count = self.target_component.count(os.path.sep) - if parent_count == 0: - spec_path, relative_file_path = os.path.split(self.path_component) - return Address( - spec_path=spec_path, - relative_file_path=relative_file_path, - target_name=self.target_component, - parameters=self.parameters, - ) - - expected_prefix = f"..{os.path.sep}" * parent_count - if self.target_component[: self.target_component.rfind(os.path.sep) + 1] != expected_prefix: - raise InvalidTargetNameError( - softwrap( - f""" - Invalid address `{self.spec}` from {self.description_of_origin}. The target - name portion of the address must refer to a target defined in the same - directory or a parent directory of the file path `{self.path_component}`, but - the value `{self.target_component}` is a subdirectory. - """ - ) - ) - - # Split the path_component into a spec_path and relative_file_path at the appropriate - # position. - path_components = self.path_component.split(os.path.sep) - if len(path_components) <= parent_count: - raise InvalidTargetNameError( - softwrap( - f""" - Invalid address `{self.spec}` from {self.description_of_origin}. The target - name portion of the address `{self.target_component}` has too many `../`, which - means it refers to a directory above the file path `{self.path_component}`. - Expected no more than {len(path_components) -1 } instances of `../` in - `{self.target_component}`, but found {parent_count} instances. - """ - ) - ) - offset = -1 * (parent_count + 1) - spec_path = os.path.join(*path_components[:offset]) if path_components[:offset] else "" - relative_file_path = os.path.join(*path_components[offset:]) - target_name = os.path.basename(self.target_component) - return Address( - spec_path, - relative_file_path=relative_file_path, - target_name=target_name, - parameters=self.parameters, - ) - - def dir_to_address(self) -> Address: - """Converts to an Address by assuming that the path_component is a directory on disk.""" - return Address( - spec_path=self.path_component, - target_name=self.target_component, - generated_name=self.generated_component, - parameters=self.parameters, - ) - - @property - def spec(self) -> str: - rep = self.path_component or "//" - if self.generated_component: - rep += f"#{self.generated_component}" - if self.target_component: - rep += f":{self.target_component}" - if self.parameters: - params_vals = ",".join(f"{k}={v}" for k, v in self.parameters.items()) - rep += f"@{params_vals}" - return rep - - -class Address(EngineAwareParameter): - """The unique address for a `Target`. - - Targets explicitly declared in BUILD files use the format `path/to:tgt`, whereas targets - generated from other targets use the format `path/to:generator#generated`. - """ - - def __init__( - self, - spec_path: str, - *, - target_name: str | None = None, - parameters: Mapping[str, str] | None = None, - generated_name: str | None = None, - relative_file_path: str | None = None, - ) -> None: - """ - :param spec_path: The path from the build root to the directory containing the BUILD file - for the target. If the target is generated, this is the path to the generator target. - :param target_name: The name of the target. For generated targets, this is the name of - its target generator. If the `name` is left off (i.e. the default), set to `None`. - :param parameters: A series of key-value pairs which are incorporated into the identity of - the Address. - :param generated_name: The name of what is generated. You can use a file path if the - generated target represents an entity from the file system, such as `a/b/c` or - `subdir/f.ext`. - :param relative_file_path: The relative path from the spec_path to an addressed file, - if any. Because files must always be located below targets that apply metadata to - them, this will always be relative. - """ - self.spec_path = spec_path - self.parameters = FrozenDict(parameters) if parameters else FrozenDict() - self.generated_name = generated_name - self._relative_file_path = relative_file_path - if generated_name: - if relative_file_path: - raise AssertionError( - f"Do not use both `generated_name` ({generated_name}) and " - f"`relative_file_path` ({relative_file_path})." - ) - banned_chars = BANNED_CHARS_IN_GENERATED_NAME & set(generated_name) - if banned_chars: - raise InvalidTargetNameError( - f"The generated name `{generated_name}` (defined in directory " - f"{self.spec_path}, the part after `#`) contains banned characters " - f"(`{'`,`'.join(banned_chars)}`). Please replace " - "these characters with another separator character like `_`, `-`, or `/`." - ) - - # If the target_name is the same as the default name would be, we normalize to None. - self._target_name = None - if target_name and target_name != os.path.basename(self.spec_path): - banned_chars = BANNED_CHARS_IN_TARGET_NAME & set(target_name) - if banned_chars: - raise InvalidTargetNameError( - f"The target name {target_name} (defined in directory {self.spec_path}) " - f"contains banned characters (`{'`,`'.join(banned_chars)}`). Please replace " - "these characters with another separator character like `_` or `-`." - ) - self._target_name = target_name - - self._hash = hash( - (self.spec_path, self._target_name, self.generated_name, self._relative_file_path) - ) - if PurePath(spec_path).name.startswith("BUILD"): - raise InvalidSpecPathError( - f"The address {self.spec} has {PurePath(spec_path).name} as the last part of its " - f"path, but BUILD is a reserved name. Please make sure that you did not name any " - f"directories BUILD." - ) - - @property - def is_generated_target(self) -> bool: - return self.generated_name is not None or self.is_file_target - - @property - def is_file_target(self) -> bool: - return self._relative_file_path is not None - - @property - def is_parametrized(self) -> bool: - return bool(self.parameters) - - def is_parametrized_subset_of(self, other: Address) -> bool: - """True if this Address is == to the given Address, but with a subset of its parameters.""" - if not self._equal_without_parameters(other): - return False - return self.parameters.items() <= other.parameters.items() - - @property - def filename(self) -> str: - if self._relative_file_path is None: - raise AssertionError( - f"Only a file Address (`self.is_file_target`) has a filename: {self}" - ) - return os.path.join(self.spec_path, self._relative_file_path) - - @property - def target_name(self) -> str: - if self._target_name is None: - return os.path.basename(self.spec_path) - return self._target_name - - @property - def parameters_repr(self) -> str: - if not self.parameters: - return "" - rhs = ",".join(f"{k}={v}" for k, v in self.parameters.items()) - return f"@{rhs}" - - @property - def spec(self) -> str: - """The canonical string representation of the Address. - - Prepends '//' if the target is at the root, to disambiguate build root level targets - from "relative" spec notation. - - :API: public - """ - prefix = "//" if not self.spec_path else "" - if self._relative_file_path is None: - path = self.spec_path - target = ( - "" - if self._target_name is None and (self.generated_name or self.parameters) - else self.target_name - ) - else: - path = self.filename - parent_prefix = "../" * self._relative_file_path.count(os.path.sep) - target = ( - "" - if self._target_name is None and not parent_prefix - else f"{parent_prefix}{self.target_name}" - ) - target_sep = ":" if target else "" - generated = "" if self.generated_name is None else f"#{self.generated_name}" - return f"{prefix}{path}{target_sep}{target}{generated}{self.parameters_repr}" - - @property - def path_safe_spec(self) -> str: - """ - :API: public - """ - - def sanitize(s: str) -> str: - return s.replace(os.path.sep, ".") - - if self._relative_file_path: - parent_count = self._relative_file_path.count(os.path.sep) - parent_prefix = "@" * parent_count if parent_count else "." - path = f".{sanitize(self._relative_file_path)}" - else: - parent_prefix = "." - path = "" - if parent_prefix == ".": - target = f"{parent_prefix}{self._target_name}" if self._target_name else "" - else: - target = f"{parent_prefix}{self.target_name}" - if self.parameters: - params = f"@{sanitize(self.parameters_repr)}" - else: - params = "" - generated = f"@{sanitize(self.generated_name)}" if self.generated_name else "" - prefix = sanitize(self.spec_path) - return f"{prefix}{path}{target}{generated}{params}" - - def parametrize(self, parameters: Mapping[str, str]) -> Address: - """Creates a new Address with the given `parameters` merged over self.parameters.""" - merged_parameters = {**self.parameters, **parameters} - return self.__class__( - self.spec_path, - target_name=self._target_name, - generated_name=self.generated_name, - relative_file_path=self._relative_file_path, - parameters=merged_parameters, - ) - - def maybe_convert_to_target_generator(self) -> Address: - """If this address is generated or parametrized, convert it to its generator target. - - Otherwise, return self unmodified. - """ - if self.is_generated_target or self.is_parametrized: - return self.__class__(self.spec_path, target_name=self._target_name) - return self - - def create_generated(self, generated_name: str) -> Address: - if self.is_generated_target: - raise AssertionError(f"Cannot call `create_generated` on `{self}`.") - return self.__class__( - self.spec_path, - target_name=self._target_name, - parameters=self.parameters, - generated_name=generated_name, - ) - - def create_file(self, relative_file_path: str) -> Address: - if self.is_generated_target: - raise AssertionError(f"Cannot call `create_file` on `{self}`.") - return self.__class__( - self.spec_path, - target_name=self._target_name, - parameters=self.parameters, - relative_file_path=relative_file_path, - ) - - def _equal_without_parameters(self, other: Address) -> bool: - return ( - self.spec_path == other.spec_path - and self._target_name == other._target_name - and self.generated_name == other.generated_name - and self._relative_file_path == other._relative_file_path - ) - - def __eq__(self, other): - if self is other: - return True - if not isinstance(other, Address): - return False - return self._equal_without_parameters(other) and self.parameters == other.parameters - - def __hash__(self): - return self._hash - - def __repr__(self) -> str: - return f"Address({self.spec})" - - def __str__(self) -> str: - return self.spec - - def __lt__(self, other): - # NB: This ordering is intentional so that we match the spec format: - # `{spec_path}{relative_file_path}:{tgt_name}#{generated_name}`. - return ( - self.spec_path, - self._relative_file_path or "", - self._target_name or "", - self.parameters, - self.generated_name or "", - ) < ( - other.spec_path, - other._relative_file_path or "", - other._target_name or "", - self.parameters, - other.generated_name or "", - ) - - def debug_hint(self) -> str: - return self.spec - - def metadata(self) -> dict[str, Any]: - return {"address": self.spec} +from pants.engine.internals.native_engine import ( # noqa: F401 + InvalidAddressError as InvalidAddressError, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + InvalidParametersError as InvalidParametersError, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + InvalidSpecPathError as InvalidSpecPathError, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + InvalidTargetNameError as InvalidTargetNameError, +) +from pants.engine.internals.native_engine import ( # noqa: F401 + UnsupportedWildcardError as UnsupportedWildcardError, +) +from pants.util.strutil import bullet_list, softwrap @dataclass(frozen=True) diff --git a/src/python/pants/build_graph/address_test.py b/src/python/pants/build_graph/address_test.py index dcb66054f0a..2992eafa331 100644 --- a/src/python/pants/build_graph/address_test.py +++ b/src/python/pants/build_graph/address_test.py @@ -14,7 +14,6 @@ InvalidTargetNameError, UnsupportedWildcardError, ) -from pants.util.frozendict import FrozenDict def test_address_input_parse_spec() -> None: @@ -33,7 +32,7 @@ def assert_parsed( assert ai.target_component is None else: assert ai.target_component == target_component - assert ai.parameters == FrozenDict(parameters or {}) + assert ai.parameters == (parameters or {}) if generated_component is None: assert ai.generated_component is None else: @@ -221,75 +220,44 @@ def parse(spec, relative_to): def test_address_input_from_file() -> None: - assert AddressInput( - "a/b/c.txt", target_component=None, description_of_origin="tests" + assert AddressInput.parse( + "a/b/c.txt", description_of_origin="tests" ).file_to_address() == Address("a/b", relative_file_path="c.txt") - assert AddressInput( - "a/b/c.txt", target_component="original", description_of_origin="tests" + assert AddressInput.parse( + "a/b/c.txt:original", description_of_origin="tests" ).file_to_address() == Address("a/b", target_name="original", relative_file_path="c.txt") - assert AddressInput( - "a/b/c.txt", target_component="../original", description_of_origin="tests" + assert AddressInput.parse( + "a/b/c.txt:../original", description_of_origin="tests" ).file_to_address() == Address("a", target_name="original", relative_file_path="b/c.txt") - assert AddressInput( - "a/b/c.txt", target_component="../../original", description_of_origin="tests" + assert AddressInput.parse( + "a/b/c.txt:../../original", description_of_origin="tests" ).file_to_address() == Address("", target_name="original", relative_file_path="a/b/c.txt") # These refer to targets "below" the file, which is illegal. with pytest.raises(InvalidTargetNameError): - AddressInput( - "f.txt", target_component="subdir/tgt", description_of_origin="tests" - ).file_to_address() + AddressInput.parse("f.txt:subdir/tgt", description_of_origin="tests").file_to_address() with pytest.raises(InvalidTargetNameError): - AddressInput( - "f.txt", target_component="subdir../tgt", description_of_origin="tests" - ).file_to_address() + AddressInput.parse("f.txt:subdir../tgt", description_of_origin="tests").file_to_address() with pytest.raises(InvalidTargetNameError): - AddressInput( - "a/f.txt", target_component="../a/original", description_of_origin="tests" - ).file_to_address() + AddressInput.parse("a/f.txt:../a/original", description_of_origin="tests").file_to_address() # Top-level files must include a target_name. with pytest.raises(InvalidTargetNameError): - AddressInput("f.txt", description_of_origin="tests").file_to_address() - assert AddressInput( - "f.txt", target_component="tgt", description_of_origin="tests" + AddressInput.parse("f.txt", description_of_origin="tests").file_to_address() + assert AddressInput.parse( + "f.txt:tgt", description_of_origin="tests" ).file_to_address() == Address("", relative_file_path="f.txt", target_name="tgt") def test_address_input_from_dir() -> None: - assert AddressInput("a", description_of_origin="tests").dir_to_address() == Address("a") - assert AddressInput( - "a", target_component="b", description_of_origin="tests" - ).dir_to_address() == Address("a", target_name="b") - assert AddressInput( - "a", target_component="b", generated_component="gen", description_of_origin="tests" - ).dir_to_address() == Address("a", target_name="b", generated_name="gen") - - -@pytest.mark.parametrize( - "addr,expected", - [ - (AddressInput("dir", description_of_origin="tests"), "dir"), - (AddressInput("dir", "tgt", description_of_origin="tests"), "dir:tgt"), - (AddressInput("", "tgt", description_of_origin="tests"), "//:tgt"), - (AddressInput("dir", generated_component="gen", description_of_origin="tests"), "dir#gen"), - ( - AddressInput("dir", "tgt", generated_component="gen", description_of_origin="tests"), - "dir#gen:tgt", - ), - ( - AddressInput("dir", parameters={"k1": "v", "k2": "v"}, description_of_origin="tests"), - "dir@k1=v,k2=v", - ), - ( - AddressInput("dir", "tgt", parameters={"k": "v"}, description_of_origin="tests"), - "dir:tgt@k=v", - ), - ], -) -def test_address_input_spec(addr: AddressInput, expected: str) -> None: - assert addr.spec == expected + assert AddressInput.parse("a", description_of_origin="tests").dir_to_address() == Address("a") + assert AddressInput.parse("a:b", description_of_origin="tests").dir_to_address() == Address( + "a", target_name="b" + ) + assert AddressInput.parse("a:b#gen", description_of_origin="tests").dir_to_address() == Address( + "a", target_name="b", generated_name="gen" + ) def test_address_normalize_target_name() -> None: @@ -314,8 +282,6 @@ def test_address_validate_build_in_spec_path() -> None: def test_address_equality() -> None: - assert "Not really an address" != Address("a/b", target_name="c") - assert Address("dir") == Address("dir") assert Address("dir") == Address("dir", target_name="dir") assert Address("dir") != Address("another_dir") @@ -480,69 +446,77 @@ def test_address_create_generated() -> None: [ ( Address("a/b/c"), - AddressInput("a/b/c", target_component="c", description_of_origin="tests"), + AddressInput.parse("a/b/c:c", description_of_origin="tests"), ), ( Address("a/b/c", target_name="tgt"), - AddressInput("a/b/c", "tgt", description_of_origin="tests"), + AddressInput.parse("a/b/c:tgt", description_of_origin="tests"), ), ( Address("a/b/c", target_name="tgt", generated_name="gen"), - AddressInput("a/b/c", "tgt", generated_component="gen", description_of_origin="tests"), + AddressInput.parse("a/b/c:tgt#gen", description_of_origin="tests"), ), ( Address("a/b/c", target_name="tgt", generated_name="dir/gen"), - AddressInput( - "a/b/c", "tgt", generated_component="dir/gen", description_of_origin="tests" - ), + AddressInput.parse("a/b/c:tgt#dir/gen", description_of_origin="tests"), ), ( Address("a/b/c", relative_file_path="f.txt"), - AddressInput("a/b/c/f.txt", description_of_origin="tests"), + AddressInput.parse("a/b/c/f.txt", description_of_origin="tests"), ), ( Address("a/b/c", relative_file_path="f.txt", target_name="tgt"), - AddressInput("a/b/c/f.txt", "tgt", description_of_origin="tests"), + AddressInput.parse("a/b/c/f.txt:tgt", description_of_origin="tests"), + ), + ( + Address("", target_name="tgt"), + AddressInput.parse("//:tgt", description_of_origin="tests"), ), - (Address("", target_name="tgt"), AddressInput("", "tgt", description_of_origin="tests")), ( Address("", target_name="tgt", generated_name="gen"), - AddressInput("", "tgt", generated_component="gen", description_of_origin="tests"), + AddressInput.parse("//:tgt#gen", description_of_origin="tests"), ), ( Address("", target_name="tgt", relative_file_path="f.txt"), - AddressInput("f.txt", "tgt", description_of_origin="tests"), + AddressInput.parse("//f.txt:tgt", description_of_origin="tests"), ), ( Address("a/b/c", relative_file_path="subdir/f.txt"), - AddressInput("a/b/c/subdir/f.txt", "../c", description_of_origin="tests"), + AddressInput.parse("a/b/c/subdir/f.txt:../c", description_of_origin="tests"), ), ( Address("a/b/c", relative_file_path="subdir/f.txt", target_name="tgt"), - AddressInput("a/b/c/subdir/f.txt", "../tgt", description_of_origin="tests"), + AddressInput.parse( + "a/b/c/subdir/f.txt:../tgt", + description_of_origin="tests", + ), ), ( Address("", target_name="t", parameters={"k": "v"}), - AddressInput("", "t", parameters={"k": "v"}, description_of_origin="tests"), + AddressInput.parse( + "//:t@k=v", + description_of_origin="tests", + ), ), ( Address("", target_name="t", parameters={"k": "v"}, generated_name="gen"), - AddressInput( - "", - "t", - parameters={"k": "v"}, - generated_component="gen", + AddressInput.parse( + "//:t#gen@k=v", description_of_origin="tests", ), ), ( Address("", target_name="t", parameters={"k": ""}), - AddressInput("", "t", parameters={"k": ""}, description_of_origin="tests"), + AddressInput.parse( + "//:t@k=", + description_of_origin="tests", + ), ), ( Address("", target_name="t", parameters={"k1": "v1", "k2": "v2"}), - AddressInput( - "", "t", parameters={"k1": "v1", "k2": "v2"}, description_of_origin="tests" + AddressInput.parse( + "//:t@k1=v1,k2=v2", + description_of_origin="tests", ), ), ], diff --git a/src/python/pants/engine/internals/build_files.py b/src/python/pants/engine/internals/build_files.py index 407feecbebc..72de315c914 100644 --- a/src/python/pants/engine/internals/build_files.py +++ b/src/python/pants/engine/internals/build_files.py @@ -419,7 +419,7 @@ async def find_target_adaptor(request: TargetAdaptorRequest) -> TargetAdaptor: def _rules_path(address: Address) -> str: - if address.is_file_target and os.path.sep in address._relative_file_path: # type: ignore[operator] + if address.is_file_target and os.path.sep in address.relative_file_path: # type: ignore[operator] # The file is in a subdirectory of spec_path return os.path.dirname(address.filename) else: diff --git a/src/python/pants/engine/internals/build_files_test.py b/src/python/pants/engine/internals/build_files_test.py index ee9a9088829..a56be15bd8d 100644 --- a/src/python/pants/engine/internals/build_files_test.py +++ b/src/python/pants/engine/internals/build_files_test.py @@ -365,34 +365,34 @@ def assert_is_expected(address_input: AddressInput, expected: Address) -> None: assert rule_runner.request(Address, [address_input]) == expected assert_is_expected( - AddressInput("a/b/c.txt", description_of_origin="tests"), + AddressInput.parse("a/b/c.txt", description_of_origin="tests"), Address("a/b", target_name=None, relative_file_path="c.txt"), ) assert_is_expected( - AddressInput("a/b", description_of_origin="tests"), + AddressInput.parse("a/b", description_of_origin="tests"), Address("a/b", target_name=None, relative_file_path=None), ) assert_is_expected( - AddressInput("a/b", target_component="c", description_of_origin="tests"), + AddressInput.parse("a/b:c", description_of_origin="tests"), Address("a/b", target_name="c"), ) assert_is_expected( - AddressInput("a/b/c.txt", target_component="c", description_of_origin="tests"), + AddressInput.parse("a/b/c.txt:c", description_of_origin="tests"), Address("a/b", relative_file_path="c.txt", target_name="c"), ) # Top-level addresses will not have a path_component, unless they are a file address. assert_is_expected( - AddressInput("f.txt", target_component="original", description_of_origin="tests"), + AddressInput.parse("f.txt:original", description_of_origin="tests"), Address("", relative_file_path="f.txt", target_name="original"), ) assert_is_expected( - AddressInput("", target_component="t", description_of_origin="tests"), + AddressInput.parse("//:t", description_of_origin="tests"), Address("", target_name="t"), ) - bad_address_input = AddressInput("a/b/fake", description_of_origin="tests") + bad_address_input = AddressInput.parse("a/b/fake", description_of_origin="tests") expected_err = "'a/b/fake' does not exist on disk" with engine_error(ResolveError, contains=expected_err): rule_runner.request(Address, [bad_address_input]) diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index c3a4172402d..5768a9674b9 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -4,9 +4,20 @@ from __future__ import annotations from io import RawIOBase -from typing import Any, Generic, Iterable, Sequence, TextIO, Tuple, TypeVar, overload - -from typing_extensions import Protocol +from typing import ( + Any, + FrozenSet, + Generic, + Iterable, + Mapping, + Sequence, + TextIO, + Tuple, + TypeVar, + overload, +) + +from typing_extensions import Protocol, Self from pants.engine.internals.scheduler import Workunit, _PathGlobsAndRootCollection from pants.engine.internals.session import SessionValues @@ -27,13 +38,207 @@ class PyFailure: # Address (parsing) # ------------------------------------------------------------------------------ -class AddressParseException(Exception): - pass +BANNED_CHARS_IN_TARGET_NAME: FrozenSet +BANNED_CHARS_IN_GENERATED_NAME: FrozenSet +BANNED_CHARS_IN_PARAMETERS: FrozenSet def address_spec_parse( spec: str, ) -> tuple[tuple[str, str | None, str | None, tuple[tuple[str, str], ...]], str | None]: ... +class AddressParseException(Exception): + pass + +class InvalidAddressError(Exception): + pass + +class InvalidSpecPathError(Exception): + pass + +class InvalidTargetNameError(Exception): + pass + +class InvalidParametersError(Exception): + pass + +class UnsupportedWildcardError(Exception): + pass + +class AddressInput: + """A string that has been parsed and normalized using the Address syntax. + + An AddressInput must be resolved into an Address using the engine (which involves inspecting + disk to determine the types of its path component). + """ + + def __init__( + self, + original_spec: str, + path_component: str, + description_of_origin: str, + target_component: str | None = None, + generated_component: str | None = None, + parameters: Mapping[str, str] | None = None, + ) -> None: ... + @classmethod + def parse( + cls, + spec: str, + *, + description_of_origin: str, + relative_to: str | None = None, + subproject_roots: Sequence[str] | None = None, + ) -> Self: + """Parse a string into an AddressInput. + + :param spec: Target address spec. + :param relative_to: path to use for sibling specs, ie: ':another_in_same_build_family', + interprets the missing spec_path part as `relative_to`. + :param subproject_roots: Paths that correspond with embedded build roots under + the current build root. + :param description_of_origin: where the AddressInput comes from, e.g. "CLI arguments" or + "the option `--paths-from`". This is used for better error messages. + + For example: + + some_target( + name='mytarget', + dependencies=['path/to/buildfile:targetname'], + ) + + Where `path/to/buildfile:targetname` is the dependent target address spec. + + In there is no target name component, it defaults the default target in the resulting + Address's spec_path. + + Optionally, specs can be prefixed with '//' to denote an absolute spec path. This is + normally not significant except when a spec referring to a root level target is needed + from deeper in the tree. For example, in `path/to/buildfile/BUILD`: + + some_target( + name='mytarget', + dependencies=[':targetname'], + ) + + The `targetname` spec refers to a target defined in `path/to/buildfile/BUILD*`. If instead + you want to reference `targetname` in a root level BUILD file, use the absolute form. + For example: + + some_target( + name='mytarget', + dependencies=['//:targetname'], + ) + + The spec may be for a generated target: `dir:generator#generated`. + + The spec may be a file, such as `a/b/c.txt`. It may include a relative address spec at the + end, such as `a/b/c.txt:original` or `a/b/c.txt:../original`, to disambiguate which target + the file comes from; otherwise, it will be assumed to come from the default target in the + directory, i.e. a target which leaves off `name`. + """ + ... + @property + def spec(self) -> str: ... + @property + def path_component(self) -> str: ... + @property + def target_component(self) -> str | None: ... + @property + def generated_component(self) -> str | None: ... + @property + def parameters(self) -> dict[str, str]: ... + @property + def description_of_origin(self) -> str: ... + def file_to_address(self) -> Address: + """Converts to an Address by assuming that the path_component is a file on disk.""" + ... + def dir_to_address(self) -> Address: + """Converts to an Address by assuming that the path_component is a directory on disk.""" + ... + +class Address: + """The unique address for a `Target`. + + Targets explicitly declared in BUILD files use the format `path/to:tgt`, whereas targets + generated from other targets use the format `path/to:generator#generated`. + """ + + def __init__( + self, + spec_path: str, + *, + target_name: str | None = None, + parameters: Mapping[str, str] | None = None, + generated_name: str | None = None, + relative_file_path: str | None = None, + ) -> None: + """ + :param spec_path: The path from the build root to the directory containing the BUILD file + for the target. If the target is generated, this is the path to the generator target. + :param target_name: The name of the target. For generated targets, this is the name of + its target generator. If the `name` is left off (i.e. the default), set to `None`. + :param parameters: A series of key-value pairs which are incorporated into the identity of + the Address. + :param generated_name: The name of what is generated. You can use a file path if the + generated target represents an entity from the file system, such as `a/b/c` or + `subdir/f.ext`. + :param relative_file_path: The relative path from the spec_path to an addressed file, + if any. Because files must always be located below targets that apply metadata to + them, this will always be relative. + """ + ... + @property + def spec_path(self) -> str: ... + @property + def generated_name(self) -> str | None: ... + @property + def relative_file_path(self) -> str | None: ... + @property + def parameters(self) -> dict[str, str]: ... + @property + def is_generated_target(self) -> bool: ... + @property + def is_file_target(self) -> bool: ... + @property + def is_parametrized(self) -> bool: ... + def is_parametrized_subset_of(self, other: Address) -> bool: + """True if this Address is == to the given Address, but with a subset of its parameters.""" + ... + @property + def filename(self) -> str: ... + @property + def target_name(self) -> str: ... + @property + def parameters_repr(self) -> str: ... + @property + def spec(self) -> str: + """The canonical string representation of the Address. + + Prepends '//' if the target is at the root, to disambiguate build root level targets from + "relative" spec notation. + """ + ... + @property + def path_safe_spec(self) -> str: ... + def parametrize(self, parameters: Mapping[str, str]) -> Address: + """Creates a new Address with the given `parameters` merged over self.parameters.""" + ... + def maybe_convert_to_target_generator(self) -> Address: + """If this address is generated or parametrized, convert it to its generator target. + + Otherwise, return self unmodified. + """ + ... + def create_generated(self, generated_name: str) -> Address: ... + def create_file(self, relative_file_path: str) -> Address: ... + def debug_hint(self) -> str: ... + def metadata(self) -> dict[str, Any]: ... + + # NB: These methods are provided by our `__richcmp__` implementation, but must be declared in + # the stub in order for mypy to accept them as comparable. + def __lt__(self, other: Any) -> bool: ... + def __gt__(self, other: Any) -> bool: ... + # ------------------------------------------------------------------------------ # Scheduler # ------------------------------------------------------------------------------ diff --git a/src/python/pants/engine/internals/specs_rules.py b/src/python/pants/engine/internals/specs_rules.py index 643332f3431..b64e84b53a5 100644 --- a/src/python/pants/engine/internals/specs_rules.py +++ b/src/python/pants/engine/internals/specs_rules.py @@ -84,11 +84,12 @@ async def _determine_literal_addresses_from_raw_specs( Get( Address, AddressInput( + str(spec), spec.path_component, - spec.target_component, - generated_component=spec.generated_component, - parameters=spec.parameters, description_of_origin=description_of_origin, + target_component=spec.target_component, + generated_component=spec.generated_component, + parameters=dict(spec.parameters), ), ) for spec in literal_specs diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index e7b8ae84f0d..5c7ff2725cf 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -1343,7 +1343,7 @@ def gen_tgt(address: Address, full_fp: str, generated_target_fields: dict[str, A fast_relpath(fp, template_address.spec_path) for fp in normalized_overrides ) all_valid_relative_paths = sorted( - cast(str, tgt.address._relative_file_path or tgt.address.generated_name) + cast(str, tgt.address.relative_file_path or tgt.address.generated_name) for tgt in result ) raise InvalidFieldException( diff --git a/src/python/pants/init/specs_calculator.py b/src/python/pants/init/specs_calculator.py index 9ad01aa2516..2f4aa864b4d 100644 --- a/src/python/pants/init/specs_calculator.py +++ b/src/python/pants/init/specs_calculator.py @@ -15,6 +15,7 @@ from pants.engine.rules import QueryRule from pants.option.options import Options from pants.option.options_bootstrapper import OptionsBootstrapper +from pants.util.frozendict import FrozenDict from pants.vcs.changed import ChangedAddresses, ChangedOptions, ChangedRequest from pants.vcs.git import GitWorktreeRequest, MaybeGitWorktree @@ -85,7 +86,7 @@ def calculate_specs( path_component=address_input.path_component, target_component=address_input.target_component, generated_component=address_input.generated_component, - parameters=address_input.parameters, + parameters=FrozenDict(address_input.parameters), ) ) diff --git a/src/rust/engine/Cargo.toml b/src/rust/engine/Cargo.toml index 32ea8709375..413e3ad4480 100644 --- a/src/rust/engine/Cargo.toml +++ b/src/rust/engine/Cargo.toml @@ -155,7 +155,7 @@ parking_lot = "0.12" petgraph = "0.6" process_execution = { path = "process_execution" } # TODO: Change to ^0.18.4 once https://github.com/PyO3/pyo3/pull/3142 is released -pyo3 = { git = "https://github.com/PyO3/pyo3.git", rev = "c7d541b9ad2a9ef0255ab69541e67b22fe3fbf32"} +pyo3 = { git = "https://github.com/PyO3/pyo3.git", rev = "c7d541b9ad2a9ef0255ab69541e67b22fe3fbf32" } rand = "0.8" # See https://github.com/PyO3/pyo3/issues/3151 syn = "1.0.85" diff --git a/src/rust/engine/src/externs/address.rs b/src/rust/engine/src/externs/address.rs index c4b587c39c0..bd8a81109e8 100644 --- a/src/rust/engine/src/externs/address.rs +++ b/src/rust/engine/src/externs/address.rs @@ -1,21 +1,784 @@ // Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). +use std::borrow::Cow; +use std::collections::{BTreeMap, HashSet}; +use std::ffi::OsStr; +use std::hash::{Hash, Hasher}; +use std::path::{Path, PathBuf}; + +use pyo3::basic::CompareOp; use pyo3::create_exception; -use pyo3::exceptions::PyException; +use pyo3::exceptions::{PyAssertionError, PyException}; use pyo3::prelude::*; +use pyo3::types::{PyDict, PyFrozenSet, PyType}; + +use fnv::FnvHasher; +use lazy_static::lazy_static; create_exception!(native_engine, AddressParseException, PyException); +create_exception!(native_engine, InvalidAddressError, AddressParseException); +create_exception!(native_engine, InvalidSpecPathError, InvalidAddressError); +create_exception!(native_engine, InvalidTargetNameError, InvalidAddressError); +create_exception!(native_engine, InvalidParametersError, InvalidAddressError); +create_exception!(native_engine, UnsupportedWildcardError, InvalidAddressError); pub fn register(py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(address_spec_parse, m)?)?; + m.add( "AddressParseException", py.get_type::(), )?; - m.add_function(wrap_pyfunction!(address_spec_parse, m)?)?; + m.add("InvalidAddressError", py.get_type::())?; + m.add( + "InvalidSpecPathError", + py.get_type::(), + )?; + m.add( + "InvalidTargetNameError", + py.get_type::(), + )?; + m.add( + "InvalidParametersError", + py.get_type::(), + )?; + m.add( + "UnsupportedWildcardError", + py.get_type::(), + )?; + + m.add_class::()?; + m.add_class::
()?; + + m.add( + "BANNED_CHARS_IN_TARGET_NAME", + PyFrozenSet::new(py, BANNED_CHARS_IN_TARGET_NAME.iter())?, + )?; + m.add( + "BANNED_CHARS_IN_GENERATED_NAME", + PyFrozenSet::new(py, BANNED_CHARS_IN_GENERATED_NAME.iter())?, + )?; + m.add( + "BANNED_CHARS_IN_PARAMETERS", + PyFrozenSet::new(py, BANNED_CHARS_IN_PARAMETERS.iter())?, + )?; + Ok(()) } +lazy_static! { + // `:`, `#`, `@` are used as delimiters already. Others are reserved for possible future needs. + pub static ref BANNED_CHARS_IN_TARGET_NAME: HashSet = + [':', '#', '!', '@', '?', '/', '\\', '='].into(); + pub static ref BANNED_CHARS_IN_GENERATED_NAME: HashSet = + [':', '#', '!', '@', '?', '='].into(); + pub static ref BANNED_CHARS_IN_PARAMETERS: HashSet = + [':', '#', '!', '@', '?', '=', ',', ' '].into(); +} + +#[pyclass(name = "AddressInput")] +#[derive(Debug, Hash, Eq, PartialEq)] +pub struct AddressInput { + original_spec: String, + path_component: PathBuf, + target_component: Option, + generated_component: Option, + parameters: BTreeMap, + description_of_origin: String, +} + +#[pymethods] +impl AddressInput { + #[new] + fn __new__( + original_spec: String, + path_component: PathBuf, + description_of_origin: String, + target_component: Option, + generated_component: Option, + parameters: Option>, + ) -> PyResult { + if let Some(target) = target_component.as_ref() { + if target.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( + "Address `{original_spec}` from {description_of_origin} sets \ + the name component to the empty string, which is not legal." + ))); + } + } else if path_component.components().next().is_none() { + return Err(InvalidTargetNameError::new_err(format!( + "Address `{original_spec}` from {description_of_origin} has no name part, \ + but it's necessary because the path is the build root." + ))); + } + + if path_component.components().next().is_some() && path_component.is_absolute() { + return Err(InvalidSpecPathError::new_err(format!( + "Invalid address {original_spec} from {description_of_origin}. Cannot use \ + absolute paths." + ))); + } + + if let Some(parameters) = parameters.as_ref() { + for (k, v) in parameters { + let banned = k + .chars() + .chain(v.chars()) + .filter(|c| BANNED_CHARS_IN_PARAMETERS.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidParametersError::new_err(format!( + "Invalid address `{original_spec}` from {description_of_origin}. It has + illegal characters in parameter entries: `{}` in `{k}={v}`.", + banned.join(","), + ))); + } + } + } + + Ok(Self { + original_spec, + path_component, + target_component, + generated_component, + parameters: parameters.unwrap_or_default(), + description_of_origin, + }) + } + + #[classmethod] + fn parse( + _cls: &PyType, + spec: &str, + description_of_origin: &str, + relative_to: Option<&str>, + subproject_roots: Option>, + ) -> PyResult { + let subproject_info = subproject_roots + .zip(relative_to) + .and_then(|(roots, relative_to)| split_on_longest_dir_prefix(relative_to, &roots)); + + let parsed_spec = address::parse_address_spec(spec).map_err(AddressParseException::new_err)?; + if let Some(wildcard) = parsed_spec.wildcard { + return Err(UnsupportedWildcardError::new_err(format!( + "The address `{spec}` from {description_of_origin} ended in a wildcard \ + (`{wildcard}`), which is not supported." + ))); + } + let address = parsed_spec.address; + + let normalized_relative_to = if let Some((_, normalized_relative_to)) = subproject_info { + Some(normalized_relative_to) + } else { + relative_to + }; + + let mut path_component: Cow = address.path.into(); + if let Some(normalized_relative_to) = normalized_relative_to { + if let Some(stripped) = path_component.strip_prefix("./") { + path_component = format!( + "{normalized_relative_to}{}{stripped}", + std::path::MAIN_SEPARATOR + ) + .into(); + } + if path_component.is_empty() { + path_component = normalized_relative_to.into(); + } + } + if let Some(stripped) = path_component.strip_prefix("//") { + path_component = stripped.to_owned().into(); + } + + // NB: We confirm that the path_component is normalized while still in `str` form because + // `Path` hides many of the components we're attempting to validate. + if !path_component.is_empty() { + for component in path_component.split(std::path::MAIN_SEPARATOR) { + if matches!(component, "." | ".." | "") { + return Err(InvalidSpecPathError::new_err(format!( + "Invalid address `{spec}` from {description_of_origin}. It has an \ + un-normalized path part: `{component:?}`." + ))); + } + } + } + + let path_component = if let Some((subproject, _)) = subproject_info { + Path::new(subproject).join(Path::new(&*path_component)) + } else { + PathBuf::from(path_component.into_owned()) + }; + + Self::__new__( + spec.to_owned(), + path_component, + description_of_origin.to_owned(), + address.target.map(|t| t.to_owned()), + address.generated.map(|t| t.to_owned()), + Some( + address + .parameters + .into_iter() + .map(|(k, v)| (k.to_owned(), v.to_owned())) + .collect(), + ), + ) + } + + #[getter] + fn path_component(&self) -> &Path { + &self.path_component + } + + #[getter] + fn target_component(&self) -> Option<&str> { + self.target_component.as_deref() + } + + #[getter] + fn generated_component(&self) -> Option<&str> { + self.generated_component.as_deref() + } + + #[getter] + fn parameters(&self) -> BTreeMap { + // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. + self.parameters.clone() + } + + #[getter] + fn description_of_origin(&self) -> &str { + &self.description_of_origin + } + + fn file_to_address(&self) -> PyResult
{ + let Some(target_component) = self.target_component.as_ref() else { + // Use the default target in the same directory as the file. + match (self.path_component.parent(), self.path_component.file_name()) { + (Some(spec_path), Some(relative_file_path)) if !spec_path.as_os_str().is_empty() => { + return Address::__new__( + spec_path.to_owned(), + None, + Some(self.parameters.clone()), + None, + Some(relative_file_path.into()), + ); + } + _ => { + // We validate that this is not a top-level file. We couldn't do this earlier in the + // AddressSpec constructor because we weren't sure if the path_spec referred to a file + // vs. a directory. + return Err(InvalidTargetNameError::new_err(format!( + "Addresses for generated first-party targets in the build root must include \ + which target generator they come from, such as `{}:original_target`. However, \ + `{}` from {} did not have a target name.", + self.path_component.display(), + self.original_spec, + self.description_of_origin, + ))); + } + } + }; + + // The target component may be "above" (but not below) the file in the filesystem. + // Determine how many levels above the file it is, and validate that the path is relative. + let parent_count = Path::new(&target_component).components().count() - 1; + if parent_count == 0 { + return Address::__new__( + self + .path_component + .parent() + .unwrap_or_else(|| Path::new("")) + .to_owned(), + Some(target_component.clone()), + Some(self.parameters.clone()), + None, + self.path_component.file_name().map(|f| f.into()), + ); + } + + if parent_count + != Path::new(&target_component) + .components() + .take_while(|c| c.as_os_str() == OsStr::new("..")) + .count() + { + return Err(InvalidTargetNameError::new_err(format!( + "Invalid address `{}` from {}. The target name portion of the address must refer \ + to a target defined in the same directory or a parent directory of the file path \ + `{}`, but the value `{target_component}` is a subdirectory.", + self.original_spec, + self.description_of_origin, + self.path_component.display(), + ))); + } + + // Split the path_component into a spec_path and relative_file_path at the appropriate + // position. + let path_components = self + .path_component + .components() + .map(|c| c.as_os_str().to_str().unwrap()) + .collect::>(); + if path_components.len() <= parent_count { + return Err(InvalidTargetNameError::new_err(format!( + "Invalid address `{}` from {}. The target name portion of the address \ + `{target_component}` has too many `../`, which means it refers to a directory \ + above the file path `{}`. Expected no more than {} instances of `../` in + `{target_component}`, but found {parent_count} instances.", + self.original_spec, + self.description_of_origin, + self.path_component.display(), + path_components.len(), + ))); + } + + let offset = path_components.len() - (parent_count + 1); + let spec_path = path_components[..offset].join(std::path::MAIN_SEPARATOR_STR); + let relative_file_path = path_components[offset..].join(std::path::MAIN_SEPARATOR_STR); + let target_name = Path::new(&target_component).file_name(); + Address::__new__( + spec_path.into(), + target_name.and_then(|t| t.to_str()).map(|t| t.to_owned()), + Some(self.parameters.clone()), + None, + Some(relative_file_path.into()), + ) + } + + fn dir_to_address(&self) -> PyResult
{ + Address::__new__( + self.path_component.clone(), + self.target_component.clone(), + Some(self.parameters.clone()), + self.generated_component.clone(), + None, + ) + } + + #[getter] + fn spec(&self) -> &str { + &self.original_spec + } + + fn __hash__(&self) -> u64 { + let mut s = FnvHasher::default(); + self.hash(&mut s); + s.finish() + } + + fn __str__(&self) -> String { + format!("{self:?}") + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python) -> PyObject { + match op { + CompareOp::Eq => (self == other).into_py(py), + CompareOp::Ne => (self != other).into_py(py), + _ => py.NotImplemented(), + } + } +} + +fn split_on_longest_dir_prefix<'a, 'b>( + path: &'a str, + prefixes: &[&'b str], +) -> Option<(&'b str, &'a str)> { + let mut longest_match = 0; + let mut matched = None; + for prefix in prefixes { + if prefix.len() > longest_match { + if let Ok(stripped) = Path::new(path).strip_prefix(prefix) { + longest_match = prefix.len(); + matched = Some((*prefix, stripped.to_str().unwrap())); + } + } + } + matched +} + +#[pyclass(name = "Address")] +#[derive(Clone, Hash, Eq, PartialEq, Ord, PartialOrd)] +pub struct Address { + // NB: Field ordering is deliberate, so that Ord will roughly match `self.spec`. + spec_path: PathBuf, + relative_file_path: Option, + target_name: Option, + parameters: BTreeMap, + generated_name: Option, +} + +#[pymethods] +impl Address { + #[new] + fn __new__( + spec_path: PathBuf, + target_name: Option, + parameters: Option>, + generated_name: Option, + relative_file_path: Option, + ) -> PyResult { + if let Some(generated_name) = generated_name.as_ref() { + if let Some(relative_file_path) = relative_file_path { + return Err(PyAssertionError::new_err(format!( + "Do not use both `generated_name` ({generated_name}) and \ + `relative_file_path` ({}).", + relative_file_path.display() + ))); + } + let banned = generated_name + .chars() + .filter(|c| BANNED_CHARS_IN_GENERATED_NAME.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( + "The generated name `{generated_name}` (defined in directory {}, the part after \ + `#`) contains banned characters (`{}`). Please replace \ + these characters with another separator character like `_`, `-`, or `/`.", + spec_path.display(), + banned.join(","), + ))); + } + } + + let target_name = if let Some(target_name) = target_name { + if Some(OsStr::new(&target_name)) == spec_path.file_name() { + // If the target_name is the same as the default name would be, we normalize to None. + None + } else { + let banned = target_name + .chars() + .filter(|c| BANNED_CHARS_IN_TARGET_NAME.contains(c)) + .map(|c| c.to_string()) + .collect::>(); + if !banned.is_empty() { + return Err(InvalidTargetNameError::new_err(format!( + "The target name {target_name} (defined in directory {}) \ + contains banned characters (`{}`). Please replace \ + these characters with another separator character like `_` or `-`.", + spec_path.display(), + banned.join(","), + ))); + } + Some(target_name) + } + } else { + None + }; + + let address = Self { + spec_path, + target_name, + parameters: parameters.unwrap_or_default(), + generated_name, + relative_file_path, + }; + + if let Some(file_name) = address.spec_path.file_name().and_then(|n| n.to_str()) { + if file_name.starts_with("BUILD") { + return Err(InvalidSpecPathError::new_err(format!( + "The address {address} has {} as the last part of its \ + path, but BUILD is a reserved name. Please make sure that you did not name any \ + directories BUILD.", + Path::new(file_name).display(), + ))); + } + } + + Ok(address) + } + + #[getter] + fn spec_path(&self) -> &Path { + &self.spec_path + } + + #[getter] + fn generated_name(&self) -> Option<&str> { + self.generated_name.as_deref() + } + + #[getter] + fn relative_file_path(&self) -> Option<&Path> { + self.relative_file_path.as_deref() + } + + #[getter] + fn parameters(&self) -> BTreeMap { + // TODO: For some reason, `IntoPy` is not implemented for `&BTreeMap<_, _>`. + self.parameters.clone() + } + + #[getter] + fn is_generated_target(&self) -> bool { + self.generated_name.is_some() || self.is_file_target() + } + + #[getter] + fn is_file_target(&self) -> bool { + self.relative_file_path.is_some() + } + + #[getter] + fn is_parametrized(&self) -> bool { + !self.parameters.is_empty() + } + + fn is_parametrized_subset_of(&self, other: &Address) -> bool { + self.equal_without_parameters(other) + && self + .parameters + .iter() + .all(|(k, v)| other.parameters.get(k) == Some(v)) + } + + #[getter] + fn filename(&self) -> PyResult { + if let Some(relative_file_path) = self.relative_file_path.as_ref() { + Ok(self.spec_path.join(relative_file_path)) + } else { + Err(PyException::new_err(format!( + "Only a file Address (`self.is_file_target`) has a filename: {self}", + ))) + } + } + + #[getter] + fn target_name(&self) -> &str { + if let Some(target_name) = self.target_name.as_ref() { + target_name + } else if let Some(file_name) = self.spec_path.file_name() { + file_name + .to_str() + .unwrap_or_else(|| panic!("{} could not be viewed as UTF8.", self.spec_path.display())) + } else { + // TODO: This case is preserved from the original implementation (because `os.path.basename` + // returns the empty output for an empty input), but should likely be ruled out in the + // constructor. + "" + } + } + + #[getter] + fn parameters_repr(&self) -> Cow { + if self.parameters.is_empty() { + return Cow::from(""); + } + + let rhs = self + .parameters + .iter() + .map(|(k, v)| format!("{k}={v}")) + .collect::>() + .join(","); + Cow::from(format!("@{rhs}")) + } + + #[getter] + fn spec(&self) -> String { + let prefix = if self.spec_path.as_os_str().is_empty() { + "//" + } else { + "" + }; + + let (path, target): (Cow, Cow) = + if let Some(relative_file_path) = self.relative_file_path.as_ref() { + let parent_prefix = "../".repeat(relative_file_path.components().count() - 1); + let target = if self.target_name.is_none() && parent_prefix.is_empty() { + "" + } else { + self.target_name() + }; + ( + self.spec_path.join(relative_file_path).into(), + format!("{parent_prefix}{target}").into(), + ) + } else { + let target_name = if self.target_name.is_none() + && (self.generated_name.is_some() || !self.parameters.is_empty()) + { + "".into() + } else { + self.target_name().into() + }; + ((&self.spec_path).into(), target_name) + }; + + let target_sep = if target.is_empty() { "" } else { ":" }; + let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { + format!("#{generated_name}").into() + } else { + "".into() + }; + + format!( + "{prefix}{}{target_sep}{target}{generated}{}", + path.display(), + self.parameters_repr() + ) + } + + #[getter] + fn path_safe_spec(&self) -> PyResult { + fn sanitize(s: D) -> String { + s.to_string().replace(std::path::MAIN_SEPARATOR, ".") + } + + let (parent_prefix, path): (Cow, Cow) = match self.relative_file_path.as_ref() { + Some(relative_file_path) if !relative_file_path.as_os_str().is_empty() => { + let parent_count = relative_file_path.components().count() - 1; + let parent_prefix = if parent_count > 0 { + "@".repeat(parent_count).into() + } else { + ".".into() + }; + ( + parent_prefix, + format!(".{}", sanitize(relative_file_path.display())).into(), + ) + } + _ => (".".into(), "".into()), + }; + + let target: Cow = if parent_prefix == "." { + if let Some(target_name) = self.target_name.as_ref() { + format!("{parent_prefix}{target_name}").into() + } else { + "".into() + } + } else { + format!("{parent_prefix}{}", self.target_name()).into() + }; + + let params: Cow = if self.parameters.is_empty() { + "".into() + } else { + format!("@{}", sanitize(self.parameters_repr())).into() + }; + + let generated: Cow = if let Some(generated_name) = self.generated_name.as_ref() { + format!("@{}", sanitize(generated_name)).into() + } else { + "".into() + }; + + let prefix = sanitize(self.spec_path.display()); + + Ok(format!("{prefix}{path}{target}{generated}{params}")) + } + + fn parametrize(&self, parameters: BTreeMap) -> Self { + let mut merged_parameters = self.parameters.clone(); + merged_parameters.extend(parameters); + + Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: merged_parameters, + generated_name: self.generated_name.clone(), + relative_file_path: self.relative_file_path.clone(), + } + } + + fn maybe_convert_to_target_generator(self_: PyRef, py: Python) -> PyObject { + if !self_.is_generated_target() && !self_.is_parametrized() { + return self_.into_py(py); + } + + Self { + spec_path: self_.spec_path.clone(), + target_name: self_.target_name.clone(), + parameters: BTreeMap::default(), + generated_name: None, + relative_file_path: None, + } + .into_py(py) + } + + fn create_generated(&self, generated_name: String) -> PyResult { + if self.is_generated_target() { + return Err(PyAssertionError::new_err(format!( + "Cannot call `create_generated` on `{self}`." + ))); + } + + Ok(Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: self.parameters.clone(), + generated_name: Some(generated_name), + relative_file_path: None, + }) + } + + fn create_file(&self, relative_file_path: PathBuf) -> PyResult { + if self.is_generated_target() { + return Err(PyAssertionError::new_err(format!( + "Cannot call `create_file` on `{self}`." + ))); + } + + Ok(Self { + spec_path: self.spec_path.clone(), + target_name: self.target_name.clone(), + parameters: self.parameters.clone(), + generated_name: None, + relative_file_path: Some(relative_file_path), + }) + } + + fn debug_hint(&self) -> String { + self.spec() + } + + fn metadata<'p>(&self, py: Python<'p>) -> PyResult<&'p PyDict> { + let dict = PyDict::new(py); + dict.set_item(pyo3::intern!(py, "address"), self.spec())?; + Ok(dict) + } + + fn __hash__(&self) -> u64 { + let mut s = FnvHasher::default(); + self.hash(&mut s); + s.finish() + } + + fn __str__(&self) -> String { + format!("{self}") + } + + fn __repr__(&self) -> String { + format!("Address({self})") + } + + fn __richcmp__(&self, other: &Self, op: CompareOp) -> bool { + op.matches(self.cmp(other)) + } +} + +impl Address { + fn equal_without_parameters(&self, other: &Address) -> bool { + self.spec_path == other.spec_path + && self.target_name == other.target_name + && self.generated_name == other.generated_name + && self.relative_file_path == other.relative_file_path + } +} + +impl std::fmt::Display for Address { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.spec()) + } +} + /// 1. a path component /// 2. a target component /// 3. a generated component @@ -31,15 +794,7 @@ type ParsedAddress<'a> = ( /// 2. an optional wildcard component (`:` or `::`) type ParsedSpec<'a> = (ParsedAddress<'a>, Option<&'a str>); -/// Parses an "address spec", which may come from the CLI or from a BUILD file. -/// -/// The underlying parser will accept some combinations of syntax which may not (yet) be legal in -/// certain contexts. For example: a `!` ignore or `::` wildcard may be successfully parsed even -/// when other address syntax is used: if the combination of syntax is not legal in a particular -/// context, the caller should validate that. -/// -/// TODO: If more of spec/address validation is ported to Rust, it might be worth defining a -/// pyclass for the return type. +/// Parses an "address spec" from the CLI. #[pyfunction] fn address_spec_parse(spec_str: &str) -> PyResult { let spec = address::parse_address_spec(spec_str).map_err(AddressParseException::new_err)?;