Skip to content

Commit

Permalink
Port Field to Rust (pantsbuild#19143)
Browse files Browse the repository at this point in the history
This change ports `Field` to Rust.

One important takeaway was that `pyo3` gives us less of an advantage
when subclassing is in use, because most methods of the class need to
accept themselves as `PyCell` or `PyRef` (in order to gain access to
their subclass instance, which isn't a member of the wrapped struct).
Those methods can then only be _called_ via `pyo3` as well.

Despite that, I'll likely still port `Target` to Rust for consistency,
and because it has some longer methods which I expect to see actual
performance benefit from porting.
  • Loading branch information
stuhood authored Jun 1, 2023
1 parent b1df245 commit d97e4ef
Show file tree
Hide file tree
Showing 9 changed files with 387 additions and 154 deletions.
2 changes: 1 addition & 1 deletion src/python/pants/backend/awslambda/python/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def test_layer_must_have_dependencies(rule_runner: PythonRuleRunner) -> None:
{"BUILD": "python_aws_lambda_layer(name='lambda', runtime='python3.7')"}
)
with pytest.raises(
ExecutionError, match="The 'dependencies' field in target //:lambda must be defined"
ExecutionError, match="The `dependencies` field in target //:lambda must be defined"
):
create_python_awslambda(
rule_runner,
Expand Down
23 changes: 12 additions & 11 deletions src/python/pants/backend/javascript/goals/test_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
JSTestBatchCompatibilityTagField,
JSTestExtraEnvVarsField,
)
from pants.build_graph.address import Address
from pants.testutil.rule_runner import MockGet, run_rule_with_mocks


Expand All @@ -34,13 +35,13 @@ def given_field_set(


def test_batches_are_seperated_on_owning_packages() -> None:
field_set_1 = given_field_set(sentinel.address_1, batch_compatibility_tag="default")
field_set_2 = given_field_set(sentinel.address_2, batch_compatibility_tag="default")
field_set_1 = given_field_set(Address("1"), batch_compatibility_tag="default")
field_set_2 = given_field_set(Address("2"), batch_compatibility_tag="default")

request = JSTestRequest.PartitionRequest(field_sets=(field_set_1, field_set_2))

def mocked_owning_node_package(r: OwningNodePackageRequest) -> Any:
if r.address == sentinel.address_1:
if r.address == Address("1"):
return OwningNodePackage(sentinel.owning_package_1)
else:
return OwningNodePackage(sentinel.owning_package_2)
Expand All @@ -60,20 +61,20 @@ def mocked_owning_node_package(r: OwningNodePackageRequest) -> Any:
"field_set_1, field_set_2",
[
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag="1"),
given_field_set(sentinel.address_2, batch_compatibility_tag="2"),
given_field_set(Address("1"), batch_compatibility_tag="1"),
given_field_set(Address("2"), batch_compatibility_tag="2"),
id="compatibility_tag",
),
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag="default"),
given_field_set(Address("1"), batch_compatibility_tag="default"),
given_field_set(
sentinel.address_2, batch_compatibility_tag="default", env_vars=("NODE_ENV=dev",)
Address("2"), batch_compatibility_tag="default", env_vars=("NODE_ENV=dev",)
),
id="extra_env_vars",
),
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag=None),
given_field_set(sentinel.address_2, batch_compatibility_tag=None),
given_field_set(Address("1"), batch_compatibility_tag=None),
given_field_set(Address("2"), batch_compatibility_tag=None),
id="no_compatibility_tag",
),
],
Expand All @@ -96,9 +97,9 @@ def mocked_owning_node_package(_: OwningNodePackageRequest) -> Any:


def test_batches_are_the_same_for_same_compat_and_package() -> None:
field_set_1 = given_field_set(sentinel.address_1, batch_compatibility_tag="default")
field_set_1 = given_field_set(Address("1"), batch_compatibility_tag="default")

field_set_2 = given_field_set(sentinel.address_2, batch_compatibility_tag="default")
field_set_2 = given_field_set(Address("2"), batch_compatibility_tag="default")
request = JSTestRequest.PartitionRequest(field_sets=(field_set_1, field_set_2))

def mocked_owning_node_package(_: OwningNodePackageRequest) -> Any:
Expand Down
101 changes: 100 additions & 1 deletion src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ from __future__ import annotations
from io import RawIOBase
from typing import (
Any,
Callable,
ClassVar,
FrozenSet,
Generic,
Iterable,
Expand Down Expand Up @@ -35,7 +37,7 @@ class PyFailure:
def get_error(self) -> Exception | None: ...

# ------------------------------------------------------------------------------
# Address (parsing)
# Address
# ------------------------------------------------------------------------------

BANNED_CHARS_IN_TARGET_NAME: FrozenSet
Expand Down Expand Up @@ -248,6 +250,103 @@ class PyExecutor:
def to_borrowed(self) -> PyExecutor: ...
def shutdown(self, duration_secs: float) -> None: ...

# ------------------------------------------------------------------------------
# Target
# ------------------------------------------------------------------------------

# Type alias to express the intent that the type should be immutable and hashable. There's nothing
# to actually enforce this, outside of convention.
ImmutableValue = Any

class _NoValue:
def __bool__(self) -> bool:
"""NB: Always returns `False`."""
...
def __repr__(self) -> str: ...

# Marker for unspecified field values that should use the default value if applicable.
NO_VALUE: _NoValue

class Field:
"""A Field.
The majority of fields should use field templates like `BoolField`, `StringField`, and
`StringSequenceField`. These subclasses will provide sensible type hints and validation
automatically.
If you are directly subclassing `Field`, you should likely override `compute_value()`
to perform any custom hydration and/or validation, such as converting unhashable types to
hashable types or checking for banned values. The returned value must be hashable
(and should be immutable) so that this Field may be used by the engine. This means, for
example, using tuples rather than lists and using `FrozenOrderedSet` rather than `set`.
If you plan to use the engine to fully hydrate the value, you can also inherit
`AsyncFieldMixin`, which will store an `address: Address` property on the `Field` instance.
Subclasses should also override the type hints for `value` and `raw_value` to be more precise
than `Any`. The type hint for `raw_value` is used to generate documentation, e.g. for
`./pants help $target_type`.
Set the `help` class property with a description, which will be used in `./pants help`. For the
best rendering, use soft wrapping (e.g. implicit string concatenation) within paragraphs, but
hard wrapping (`\n`) to separate distinct paragraphs and/or lists.
Example:
# NB: Really, this should subclass IntField. We only use Field as an example.
class Timeout(Field):
alias = "timeout"
value: Optional[int]
default = None
help = "A timeout field.\n\nMore information."
@classmethod
def compute_value(cls, raw_value: Optional[int], address: Address) -> Optional[int]:
value_or_default = super().compute_value(raw_value, address=address)
if value_or_default is not None and not isinstance(value_or_default, int):
raise ValueError(
"The `timeout` field expects an integer, but was given"
f"{value_or_default} for target {address}."
)
return value_or_default
"""

# Opt-in per field class to use a "no value" marker for the `raw_value` in `compute_value()` in
# case the field was not represented in the BUILD file.
#
# This will allow users to provide `None` as the field value (when applicable) without getting
# the fields default value.
none_is_valid_value: ClassVar[bool] = False

# Subclasses must define these.
alias: ClassVar[str]
help: ClassVar[str | Callable[[], str]]

# Subclasses must define at least one of these two.
default: ClassVar[ImmutableValue]
required: ClassVar[bool] = False

# Subclasses may define these.
removal_version: ClassVar[str | None] = None
removal_hint: ClassVar[str | None] = None

deprecated_alias: ClassVar[str | None] = None
deprecated_alias_removal_version: ClassVar[str | None] = None

value: ImmutableValue | None

def __init__(self, raw_value: Any | None, address: Address) -> None: ...
@classmethod
def compute_value(cls, raw_value: Any | None, address: Address) -> ImmutableValue:
"""Convert the `raw_value` into `self.value`.
You should perform any optional validation and/or hydration here. For example, you may want
to check that an integer is > 0 or convert an `Iterable[str]` to `List[str]`.
The resulting value must be hashable (and should be immutable).
"""
...

# ------------------------------------------------------------------------------
# FS
# ------------------------------------------------------------------------------
Expand Down
158 changes: 20 additions & 138 deletions src/python/pants/engine/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
get_type_hints,
)

from typing_extensions import Protocol, final
from typing_extensions import Protocol, Self, final

from pants.base.deprecated import warn_or_error
from pants.engine.addresses import Address, Addresses, UnparsedAddressInputs, assert_single_address
Expand All @@ -57,6 +57,8 @@
DependencyRuleActionDeniedError,
DependencyRuleApplication,
)
from pants.engine.internals.native_engine import NO_VALUE as NO_VALUE # noqa: F401
from pants.engine.internals.native_engine import Field as Field # noqa: F401
from pants.engine.unions import UnionMembership, UnionRule, distinct_union_type_per_subclass, union
from pants.option.global_options import UnmatchedBuildFileGlobs
from pants.source.filespec import Filespec, FilespecMatcher
Expand All @@ -79,141 +81,8 @@
ImmutableValue = Any


class _NoValue:
def __bool__(self) -> bool:
return False

def __repr__(self) -> str:
return "<NO_VALUE>"


# Marker for unspecified field values that should use the default value if applicable.
NO_VALUE = _NoValue()


@dataclass(frozen=True)
class Field:
"""A Field.
The majority of fields should use field templates like `BoolField`, `StringField`, and
`StringSequenceField`. These subclasses will provide sensible type hints and validation
automatically.
If you are directly subclassing `Field`, you should likely override `compute_value()`
to perform any custom hydration and/or validation, such as converting unhashable types to
hashable types or checking for banned values. The returned value must be hashable
(and should be immutable) so that this Field may be used by the engine. This means, for
example, using tuples rather than lists and using `FrozenOrderedSet` rather than `set`.
If you plan to use the engine to fully hydrate the value, you can also inherit
`AsyncFieldMixin`, which will store an `address: Address` property on the `Field` instance.
Subclasses should also override the type hints for `value` and `raw_value` to be more precise
than `Any`. The type hint for `raw_value` is used to generate documentation, e.g. for
`./pants help $target_type`.
Set the `help` class property with a description, which will be used in `./pants help`. For the
best rendering, use soft wrapping (e.g. implicit string concatenation) within paragraphs, but
hard wrapping (`\n`) to separate distinct paragraphs and/or lists.
Example:
# NB: Really, this should subclass IntField. We only use Field as an example.
class Timeout(Field):
alias = "timeout"
value: Optional[int]
default = None
help = "A timeout field.\n\nMore information."
@classmethod
def compute_value(cls, raw_value: Optional[int], address: Address) -> Optional[int]:
value_or_default = super().compute_value(raw_value, address=address)
if value_or_default is not None and not isinstance(value_or_default, int):
raise ValueError(
"The `timeout` field expects an integer, but was given"
f"{value_or_default} for target {address}."
)
return value_or_default
"""

# Opt-in per field class to use a "no value" marker for the `raw_value` in `compute_value()` in
# case the field was not represented in the BUILD file.
#
# This will allow users to provide `None` as the field value (when applicable) without getting
# the fields default value.
none_is_valid_value: ClassVar[bool] = False

# Subclasses must define these.
alias: ClassVar[str]
help: ClassVar[str | Callable[[], str]]

# Subclasses must define at least one of these two.
default: ClassVar[ImmutableValue]
required: ClassVar[bool] = False

# Subclasses may define these.
removal_version: ClassVar[str | None] = None
removal_hint: ClassVar[str | None] = None

deprecated_alias: ClassVar[str | None] = None
deprecated_alias_removal_version: ClassVar[str | None] = None

value: Optional[ImmutableValue]

@final
def __init__(self, raw_value: Optional[Any], address: Address) -> None:
if raw_value is NO_VALUE and not self.none_is_valid_value:
raw_value = None
self._check_deprecated(raw_value, address)

object.__setattr__(self, "value", self.compute_value(raw_value, address))

@classmethod
def compute_value(cls, raw_value: Optional[Any], address: Address) -> ImmutableValue:
"""Convert the `raw_value` into `self.value`.
You should perform any optional validation and/or hydration here. For example, you may want
to check that an integer is > 0 or convert an `Iterable[str]` to `List[str]`.
The resulting value must be hashable (and should be immutable).
"""
if raw_value is (NO_VALUE if cls.none_is_valid_value else None):
if cls.required:
raise RequiredFieldMissingException(address, cls.alias)
return cls.default
return raw_value

@classmethod
def _check_deprecated(cls, raw_value: Optional[Any], address: Address) -> None:
if not cls.removal_version or address.is_generated_target or raw_value in (NO_VALUE, None):
return
if not cls.removal_hint:
raise ValueError(
f"You specified `removal_version` for {cls}, but not the class property "
"`removal_hint`."
)
warn_or_error(
cls.removal_version,
entity=f"the {repr(cls.alias)} field",
hint=(f"Using the `{cls.alias}` field in the target {address}. {cls.removal_hint}"),
)

def __repr__(self) -> str:
params = [f"alias={self.alias!r}", f"value={self.value!r}"]
if hasattr(self, "default"):
params.append(f"default={self.default!r}")
return f"{self.__class__}({', '.join(params)})"

def __str__(self) -> str:
return f"{self.alias}={self.value}"

def __hash__(self) -> int:
return hash((self.__class__, self.value))


# NB: By subclassing `Field`, MyPy understands our type hints, and it means it doesn't matter which
# order you use for inheriting the field template vs. the mixin.
@dataclass(frozen=True)
class AsyncFieldMixin(Field):
"""A mixin to store the field's original `Address` for use during hydration by the engine.
Expand Down Expand Up @@ -262,13 +131,14 @@ def hydrate_sources(request: HydrateSourcesRequest) -> HydratedSources:

address: Address

@final # type: ignore[misc]
def __init__(self, raw_value: Optional[Any], address: Address) -> None:
@final
def __new__(cls, raw_value: Optional[Any], address: Address) -> Self:
obj = super().__new__(cls, raw_value, address) # type: ignore[call-arg]
# N.B.: We store the address here and not in the Field base class, because the memory usage
# of storing this value in every field was shown to be excessive / lead to performance
# issues.
object.__setattr__(self, "address", address)
super().__init__(raw_value, address)
object.__setattr__(obj, "address", address)
return obj

def __repr__(self) -> str:
params = [
Expand All @@ -283,6 +153,18 @@ def __repr__(self) -> str:
def __hash__(self) -> int:
return hash((self.__class__, self.value, self.address))

def __eq__(self, other: Any) -> bool:
if not isinstance(other, AsyncFieldMixin):
return False
return (
self.__class__ == other.__class__
and self.value == other.value
and self.address == other.address
)

def __ne__(self, other: Any) -> bool:
return not (self == other)


@union
@dataclass(frozen=True)
Expand Down
Loading

0 comments on commit d97e4ef

Please sign in to comment.