Skip to content

Commit

Permalink
Add one-partition-per-input partitioning strategy. (pantsbuild#17255)
Browse files Browse the repository at this point in the history
I would like to implement batched testing using the same partitioning infrastructure as `lint`/`fmt`/`fix`. Unlike those goals, it's not necessarily safe/possible for Pants to assume that all tests can run in the same process by default - instead, Pants will need to continue testing one-file-per-process unless a plugin specifically implements the logic for batching compatible test files.

This commit adds a new partitioning type to our generic types to support this use-case. In addition to extending the enum with a new value, it:

  * Adds default `@rule`s for the new value, and
  * Consolidates the logic for deciding which default rules (if any) should be registered into a new method on the enum type

[ci skip-build-wheels]
  • Loading branch information
danxmoran authored Oct 24, 2022
1 parent aa4a99f commit 48250f6
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 30 deletions.
5 changes: 1 addition & 4 deletions src/python/pants/core/goals/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pants.core.goals.multi_tool_goal_helper import BatchSizeOption, OnlyOption
from pants.core.util_rules.partitions import PartitionerType, PartitionMetadataT
from pants.core.util_rules.partitions import Partitions as UntypedPartitions
from pants.core.util_rules.partitions import _single_partition_field_sets_by_file_partitioner_rules
from pants.engine.collection import Collection
from pants.engine.console import Console
from pants.engine.engine_aware import EngineAwareReturnType
Expand Down Expand Up @@ -137,9 +136,7 @@ def _get_rules(cls) -> Iterable[UnionRule]:
class FixTargetsRequest(FixRequest, LintTargetsRequest):
@classmethod
def _get_rules(cls) -> Iterable:
if cls.partitioner_type is PartitionerType.DEFAULT_SINGLE_PARTITION:
yield from _single_partition_field_sets_by_file_partitioner_rules(cls)

yield from cls.partitioner_type.default_rules(cls, by_file=True)
yield from (
rule
for rule in super()._get_rules()
Expand Down
5 changes: 1 addition & 4 deletions src/python/pants/core/goals/fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pants.core.goals.multi_tool_goal_helper import BatchSizeOption, OnlyOption
from pants.core.util_rules.partitions import PartitionerType, PartitionMetadataT
from pants.core.util_rules.partitions import Partitions as UntypedPartitions
from pants.core.util_rules.partitions import _single_partition_field_sets_by_file_partitioner_rules
from pants.engine.collection import Collection
from pants.engine.console import Console
from pants.engine.engine_aware import EngineAwareReturnType
Expand Down Expand Up @@ -140,9 +139,7 @@ def _get_rules(cls) -> Iterable[UnionRule]:
class FmtTargetsRequest(FmtRequest, LintTargetsRequest):
@classmethod
def _get_rules(cls) -> Iterable:
if cls.partitioner_type is PartitionerType.DEFAULT_SINGLE_PARTITION:
yield from _single_partition_field_sets_by_file_partitioner_rules(cls)

yield from cls.partitioner_type.default_rules(cls, by_file=True)
yield from (
rule
for rule in super()._get_rules()
Expand Down
5 changes: 1 addition & 4 deletions src/python/pants/core/goals/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
_BatchBase,
_PartitionFieldSetsRequestBase,
_PartitionFilesRequestBase,
_single_partition_field_sets_partitioner_rules,
)
from pants.engine.console import Console
from pants.engine.engine_aware import EngineAwareParameter, EngineAwareReturnType
Expand Down Expand Up @@ -200,9 +199,7 @@ class PartitionRequest(_PartitionFieldSetsRequestBase[_FieldSetT]):

@classmethod
def _get_rules(cls) -> Iterable:
if cls.partitioner_type is PartitionerType.DEFAULT_SINGLE_PARTITION:
yield from _single_partition_field_sets_partitioner_rules(cls)

yield from cls.partitioner_type.default_rules(cls, by_file=False)
yield from super()._get_rules()
yield UnionRule(LintTargetsRequest.PartitionRequest, cls.PartitionRequest)

Expand Down
143 changes: 125 additions & 18 deletions src/python/pants/core/util_rules/partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,42 @@ class PartitionerType(Enum):
"""The plugin author has a rule to go from `RequestType.PartitionRequest` -> `Partitions`."""

DEFAULT_SINGLE_PARTITION = "default_single_partition"
"""Registers a partitioner which returns the inputs as a single partition."""
"""Registers a partitioner which returns the inputs as a single partition
The returned partition will have no metadata.
"""

DEFAULT_ONE_PARTITION_PER_INPUT = "default_one_partition_per_input"
"""Registers a partitioner which returns a single-element partition per input.
Each of the returned partitions will have no metadata.
"""

def default_rules(self, cls, *, by_file: bool) -> Iterable:
"""Return an iterable of rules defining the default partitioning logic for this
`PartitionerType`.
NOTE: Not all `PartitionerType`s have default logic, so this method can return an empty iterable.
:param by_file: If `True`, rules returned from this method (if any) will compute partitions with
`str`-type elements, where each `str` value is the path to the resolved source field. If `False`, rule will compute
partitions with `FieldSet`-type elements.
"""
if self == PartitionerType.CUSTOM:
# No default rules.
return
elif self == PartitionerType.DEFAULT_SINGLE_PARTITION:
rules_generator = (
_single_partition_file_rules if by_file else _single_partition_field_set_rules
)
yield from rules_generator(cls)
elif self == PartitionerType.DEFAULT_ONE_PARTITION_PER_INPUT:
rules_generator = (
_partition_per_input_file_rules if by_file else _partition_per_input_field_set_rules
)
yield from rules_generator(cls)
else:
raise NotImplementedError(f"Partitioner type {self} is missing default rules!")


class PartitionMetadata(Protocol):
Expand Down Expand Up @@ -137,7 +172,7 @@ class _PartitionFilesRequestBase:


@memoized
def _single_partition_field_sets_partitioner_rules(cls) -> Iterable:
def _single_partition_field_set_rules(cls) -> Iterable:
"""Returns a rule that implements a "partitioner" for `PartitionFieldSetsRequest`, which returns
one partition."""

Expand All @@ -156,27 +191,13 @@ async def partitioner(


@memoized
def _single_partition_field_sets_by_file_partitioner_rules(cls) -> Iterable:
def _single_partition_file_rules(cls) -> Iterable:
"""Returns a rule that implements a "partitioner" for `PartitionFieldSetsRequest`, which returns
one partition."""

# NB: This only works if the FieldSet has a single `SourcesField` field. We check here for
# a better user experience.
sources_field_name = None
for fieldname, fieldtype in _get_field_set_fields(cls.field_set_type).items():
if issubclass(fieldtype, SourcesField):
if sources_field_name is None:
sources_field_name = fieldname
break
raise TypeError(
f"Type {cls.field_set_type} has multiple `SourcesField` fields."
+ " Pants can't provide a default partitioner."
)
else:
raise TypeError(
f"Type {cls.field_set_type} has does not have a `SourcesField` field."
+ " Pants can't provide a default partitioner."
)
sources_field_name = _get_sources_field_name(cls.field_set_type)

@rule(
_param_type_overrides={
Expand Down Expand Up @@ -204,3 +225,89 @@ async def partitioner(
)

return collect_rules(locals())


@memoized
def _partition_per_input_field_set_rules(cls) -> Iterable:
"""Returns a rule that implements a "partitioner" for `PartitionFieldSetsRequest`, which returns
a single-element partition per input."""

@rule(
_param_type_overrides={
"request": cls.PartitionRequest,
"subsystem": cls.tool_subsystem,
}
)
async def partitioner(
request: _PartitionFieldSetsRequestBase, subsystem: SkippableSubsystem
) -> Partitions[FieldSet, Any]:
return (
Partitions()
if subsystem.skip
else Partitions(Partition((field_set,), None) for field_set in request.field_sets)
)

return collect_rules(locals())


@memoized
def _partition_per_input_file_rules(cls) -> Iterable:
"""Returns a rule that implements a "partitioner" for `PartitionFieldSetsRequest`, which returns
a single-element partition per input."""

# NB: This only works if the FieldSet has a single `SourcesField` field. We check here for
# a better user experience.
sources_field_name = _get_sources_field_name(cls.field_set_type)

@rule(
_param_type_overrides={
"request": cls.PartitionRequest,
"subsystem": cls.tool_subsystem,
}
)
async def partitioner(
request: _PartitionFieldSetsRequestBase, subsystem: SkippableSubsystem
) -> Partitions[str, Any]:
assert sources_field_name is not None
all_sources_paths = await MultiGet(
Get(SourcesPaths, SourcesPathsRequest(getattr(field_set, sources_field_name)))
for field_set in request.field_sets
)

return (
Partitions()
if subsystem.skip
else Partitions(
Partition((path,), None)
for path in itertools.chain.from_iterable(
sources_paths.files for sources_paths in all_sources_paths
)
)
)

return collect_rules(locals())


def _get_sources_field_name(field_set_type: type[FieldSet]) -> str:
"""Get the name of the one `SourcesField` belonging to the given target type.
NOTE: The input target type's fields must contain exactly one `SourcesField`.
Otherwise this method will raise a `TypeError`.
"""

sources_field_name = None
for fieldname, fieldtype in _get_field_set_fields(field_set_type).items():
if issubclass(fieldtype, SourcesField):
if sources_field_name is None:
sources_field_name = fieldname
break
raise TypeError(
f"Type {field_set_type} has multiple `SourcesField` fields."
+ " Pants can't provide a default partitioner."
)
else:
raise TypeError(
f"Type {field_set_type} has does not have a `SourcesField` field."
+ " Pants can't provide a default partitioner."
)
return sources_field_name
154 changes: 154 additions & 0 deletions src/python/pants/core/util_rules/partitions_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from dataclasses import dataclass
from typing import Any

import pytest

from pants.build_graph.address import Address
from pants.core.goals.fix import Partitions
from pants.core.util_rules.partitions import (
Partition,
PartitionerType,
_PartitionFieldSetsRequestBase,
)
from pants.engine.rules import QueryRule
from pants.engine.target import FieldSet, MultipleSourcesField, SingleSourceField
from pants.option.option_types import SkipOption
from pants.option.subsystem import Subsystem
from pants.testutil.rule_runner import RuleRunner


class KitchenSource(SingleSourceField):
pass


@dataclass(frozen=True)
class KitchenSingleUtensilFieldSet(FieldSet):
required_fields = (KitchenSource,)

utensil: SingleSourceField


@dataclass(frozen=True)
class KitchenMultipleUtensilsFieldSet(FieldSet):
required_fields = (KitchenSource,)

utensils: MultipleSourcesField


class KitchenSubsystem(Subsystem):
options_scope = "kitchen"
help = "a cookbook might help"
name = "The Kitchen"
skip = SkipOption("cook")


@pytest.mark.parametrize(
"kitchen_field_set_type, field_sets",
[
(
KitchenSingleUtensilFieldSet,
(
KitchenSingleUtensilFieldSet(
Address("//:bowl"), SingleSourceField("bowl.utensil", Address(""))
),
KitchenSingleUtensilFieldSet(
Address("//:knife"), SingleSourceField("knife.utensil", Address(""))
),
),
),
(
KitchenMultipleUtensilsFieldSet,
(
KitchenMultipleUtensilsFieldSet(
Address("//:utensils"),
MultipleSourcesField(["*.utensil"], Address("")),
),
),
),
],
)
def test_default_single_partition_partitioner(kitchen_field_set_type, field_sets) -> None:
class CookRequest:
class PartitionRequest(_PartitionFieldSetsRequestBase[Any]):
pass

tool_subsystem = KitchenSubsystem
field_set_type = kitchen_field_set_type

rules = [
*PartitionerType.DEFAULT_SINGLE_PARTITION.default_rules(CookRequest, by_file=True),
QueryRule(Partitions, [CookRequest.PartitionRequest]),
]
rule_runner = RuleRunner(rules=rules)
rule_runner.write_files({"BUILD": "", "knife.utensil": "", "bowl.utensil": ""})
partitions = rule_runner.request(Partitions, [CookRequest.PartitionRequest(field_sets)])
assert partitions == Partitions(
[
Partition(
(
"bowl.utensil",
"knife.utensil",
),
None,
)
]
)

rule_runner.set_options(["--kitchen-skip"])
partitions = rule_runner.request(Partitions, [CookRequest.PartitionRequest(field_sets)])
assert partitions == Partitions([])


@pytest.mark.parametrize(
"kitchen_field_set_type, field_sets",
[
(
KitchenSingleUtensilFieldSet,
(
KitchenSingleUtensilFieldSet(
Address("//:bowl"), SingleSourceField("bowl.utensil", Address(""))
),
KitchenSingleUtensilFieldSet(
Address("//:knife"), SingleSourceField("knife.utensil", Address(""))
),
),
),
(
KitchenMultipleUtensilsFieldSet,
(
KitchenMultipleUtensilsFieldSet(
Address("//:utensils"),
MultipleSourcesField(["*.utensil"], Address("")),
),
),
),
],
)
def test_default_one_partition_per_input_partitioner(kitchen_field_set_type, field_sets) -> None:
class CookRequest:
class PartitionRequest(_PartitionFieldSetsRequestBase[Any]):
pass

tool_subsystem = KitchenSubsystem
field_set_type = kitchen_field_set_type

rules = [
*PartitionerType.DEFAULT_ONE_PARTITION_PER_INPUT.default_rules(CookRequest, by_file=True),
QueryRule(Partitions, [CookRequest.PartitionRequest]),
]
rule_runner = RuleRunner(rules=rules)
rule_runner.write_files({"BUILD": "", "knife.utensil": "", "bowl.utensil": ""})
partitions = rule_runner.request(Partitions, [CookRequest.PartitionRequest(field_sets)])
assert partitions == Partitions(
[
Partition(("bowl.utensil",), None),
Partition(("knife.utensil",), None),
]
)

rule_runner.set_options(["--kitchen-skip"])
partitions = rule_runner.request(Partitions, [CookRequest.PartitionRequest(field_sets)])
assert partitions == Partitions([])

0 comments on commit 48250f6

Please sign in to comment.