Skip to content

Commit

Permalink
Resolve Python dep inference ambiguity via locality. (pantsbuild#17931)
Browse files Browse the repository at this point in the history
When there are multiple providers of a symbol, prefer the one with the closest common ancestor to the requester.

This supports the case of multiple projects merged into a single monorepo that have overlapping requirements.txt or repeated first-party module paths.

This is opt-in, so that users are aware that there are multiple providers of the same symbol, when there often need not be.
  • Loading branch information
benjyw authored Jan 10, 2023
1 parent 0dec547 commit cabc705
Show file tree
Hide file tree
Showing 9 changed files with 334 additions and 155 deletions.
21 changes: 19 additions & 2 deletions src/python/pants/backend/awslambda/python/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
PythonModuleOwners,
PythonModuleOwnersRequest,
)
from pants.backend.python.dependency_inference.rules import PythonInferSubsystem, import_rules
from pants.backend.python.dependency_inference.rules import import_rules
from pants.backend.python.dependency_inference.subsystem import (
AmbiguityResolution,
PythonInferSubsystem,
)
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import PexCompletePlatformsField, PythonResolveField
from pants.core.goals.package import OutputPathField
Expand Down Expand Up @@ -177,10 +181,23 @@ async def infer_lambda_handler_dependency(
),
)
module, _, _func = handler.val.partition(":")

# Only set locality if needed, to avoid unnecessary rule graph memoization misses.
# When set, use the source root, which is useful in practice, but incurs fewer memoization
# misses than using the full spec_path.
locality = None
if python_infer_subsystem.ambiguity_resolution == AmbiguityResolution.by_source_root:
source_root = await Get(
SourceRoot, SourceRootRequest, SourceRootRequest.for_address(request.field_set.address)
)
locality = source_root.path

owners = await Get(
PythonModuleOwners,
PythonModuleOwnersRequest(
module, resolve=request.field_set.resolve.normalized_value(python_setup)
module,
resolve=request.field_set.resolve.normalized_value(python_setup),
locality=locality,
),
)
address = request.field_set.address
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
PythonModuleOwners,
PythonModuleOwnersRequest,
)
from pants.backend.python.dependency_inference.rules import PythonInferSubsystem, import_rules
from pants.backend.python.dependency_inference.rules import import_rules
from pants.backend.python.dependency_inference.subsystem import (
AmbiguityResolution,
PythonInferSubsystem,
)
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import PexCompletePlatformsField, PythonResolveField
from pants.core.goals.package import OutputPathField
Expand Down Expand Up @@ -168,10 +172,23 @@ async def infer_cloud_function_handler_dependency(
),
)
module, _, _func = handler.val.partition(":")

# Only set locality if needed, to avoid unnecessary rule graph memoization misses.
# When set, use the source root, which is useful in practice, but incurs fewer memoization
# misses than using the full spec_path.
locality = None
if python_infer_subsystem.ambiguity_resolution == AmbiguityResolution.by_source_root:
source_root = await Get(
SourceRoot, SourceRootRequest, SourceRootRequest.for_address(request.field_set.address)
)
locality = source_root.path

owners = await Get(
PythonModuleOwners,
PythonModuleOwnersRequest(
module, resolve=request.field_set.resolve.normalized_value(python_setup)
module,
resolve=request.field_set.resolve.normalized_value(python_setup),
locality=locality,
),
)
address = request.field_set.address
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import enum
import itertools
import logging
import os
from collections import defaultdict
from dataclasses import dataclass
from functools import total_ordering
Expand Down Expand Up @@ -410,6 +411,9 @@ def __post_init__(self) -> None:
class PythonModuleOwnersRequest:
module: str
resolve: str | None
# If specified, resolve ambiguity by choosing the symbol provider with the
# closest common ancestor to this path. Must be a path relative to the build root.
locality: str | None = None


@rule
Expand All @@ -423,10 +427,11 @@ async def map_module_to_address(
*first_party_mapping.providers_for_module(request.module, resolve=request.resolve),
)

# We attempt to disambiguate conflicting providers by taking - for each provider type -
# the providers for the closest ancestors to the requested modules. This prevents
# issues with namespace packages that are split between first-party and third-party
# (e.g., https://github.com/pantsbuild/pants/discussions/17286).
# We first attempt to disambiguate conflicting providers by taking - for each provider type -
# the providers of the closest ancestors to the requested modules.
# E.g., if we have a provider for foo.bar and for foo.bar.baz, prefer the latter.
# This prevents issues with namespace packages that are split between first-party and
# third-party (e.g., https://github.com/pantsbuild/pants/discussions/17286).

# Map from provider type to mutable pair of
# [closest ancestry, list of provider of that type at that ancestry level].
Expand All @@ -441,12 +446,31 @@ async def map_module_to_address(
if possible_provider.ancestry == val[0]:
val[1].append(possible_provider.provider)

closest_providers: list[ModuleProvider] = list(
if request.locality:
# For each provider type, if we have more than one provider left, prefer
# the one with the closest common ancestor to the requester.
for val in type_to_closest_providers.values():
providers = val[1]
if len(providers) < 2:
continue
providers_with_closest_common_ancestor: list[ModuleProvider] = []
closest_common_ancestor_len = 0
for provider in providers:
common_ancestor_len = len(
os.path.commonpath([request.locality, provider.addr.spec_path])
)
if common_ancestor_len > closest_common_ancestor_len:
closest_common_ancestor_len = common_ancestor_len
providers_with_closest_common_ancestor = []
if common_ancestor_len == closest_common_ancestor_len:
providers_with_closest_common_ancestor.append(provider)
providers[:] = providers_with_closest_common_ancestor

remaining_providers: list[ModuleProvider] = list(
itertools.chain(*[val[1] for val in type_to_closest_providers.values()])
)
addresses = tuple(provider.addr for provider in closest_providers)

# Check that we have at most one closest provider for each provider type.
addresses = tuple(provider.addr for provider in remaining_providers)
# Check that we have at most one remaining provider for each provider type.
# If we have more than one, signal ambiguity.
if any(len(val[1]) > 1 for val in type_to_closest_providers.values()):
return PythonModuleOwners((), ambiguous=addresses)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,58 @@ def assert_owners(
)


def test_resolving_ambiguity_by_filesystem_proximity(rule_runner: RuleRunner) -> None:
rule_runner.set_options(
[
"--source-root-patterns=['root1', 'root2', 'root3']",
"--python-infer-ambiguity-resolution=by_source_root",
]
)
rule_runner.write_files(
{
"root1/aa/bb/BUILD": "python_sources()",
"root1/aa/bb/foo.py": "",
"root1/aa/cc/BUILD": "python_sources()",
"root1/aa/cc/bar.py": "from aa.bb import foo",
"root2/aa/bb/BUILD": "python_sources()",
"root2/aa/bb/foo.py": "",
"root2/aa/dd/baz.py": "from aa.bb import foo",
"root3/aa/ee/BUILD": "python_sources()",
"root3/aa/ee/foo.py": "from aa.bb import foo",
}
)

owners = rule_runner.request(
PythonModuleOwners, [PythonModuleOwnersRequest("aa.bb.foo", None, locality=None)]
)
assert list(owners.unambiguous) == []
assert list(owners.ambiguous) == [
Address("root1/aa/bb", relative_file_path="foo.py"),
Address("root2/aa/bb", relative_file_path="foo.py"),
]

owners = rule_runner.request(
PythonModuleOwners, [PythonModuleOwnersRequest("aa.bb.foo", None, locality="root1/")]
)
assert list(owners.unambiguous) == [Address("root1/aa/bb", relative_file_path="foo.py")]
assert list(owners.ambiguous) == []

owners = rule_runner.request(
PythonModuleOwners, [PythonModuleOwnersRequest("aa.bb.foo", None, locality="root2/")]
)
assert list(owners.unambiguous) == [Address("root2/aa/bb", relative_file_path="foo.py")]
assert list(owners.ambiguous) == []

owners = rule_runner.request(
PythonModuleOwners, [PythonModuleOwnersRequest("aa.bb.foo", None, locality="root3/")]
)
assert list(owners.unambiguous) == []
assert list(owners.ambiguous) == [
Address("root1/aa/bb", relative_file_path="foo.py"),
Address("root2/aa/bb", relative_file_path="foo.py"),
]


def test_map_module_considers_resolves(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
Expand Down Expand Up @@ -702,7 +754,7 @@ def get_owners(resolve: str | None) -> PythonModuleOwners:


def test_issue_15111(rule_runner: RuleRunner) -> None:
"""Ensure we can handle when a single address implement multiple modules.
"""Ensure we can handle when a single address provides multiple modules.
This is currently only possible with third-party targets.
"""
Expand Down
Loading

0 comments on commit cabc705

Please sign in to comment.