Skip to content

Commit

Permalink
JVM: Adds jar field to jvm_artifact (pantsbuild#13834)
Browse files Browse the repository at this point in the history
Adds the `jar` field to `jvm_artifact`, which allows users to check a JAR file into the repository, and use that to fulfil dependencies as needed. This is because sandboxing is generally incompatible with file URLs, so we make use of Pants' existing filesystem support.

This requires a small bit of weirdness -- firstly, Coursier requires absolute file URLs, which Pants' sandboxing doesn't really like, so so `coursier_wrapper.sh` now preprocesses any args containing a placeholder to use the actual current working directory. Secondly, `ArtifactRequirements` are now preprocessed to check if a requirement uses a local JVM artifact, and builds a digest creating those JARs and sets the URL to be the file URL with the working directory placeholder.

This closes pantsbuild#13820
  • Loading branch information
Christopher Neugebauer authored Dec 8, 2021
1 parent c1632cc commit f6a46d7
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 7 deletions.
92 changes: 87 additions & 5 deletions src/python/pants/jvm/resolve/coursier_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@

from __future__ import annotations

import dataclasses
import json
import logging
import operator
import os
from dataclasses import dataclass
from functools import reduce
from typing import Any, Iterable, Iterator
from typing import Any, Iterable, Iterator, List
from urllib.parse import quote_plus as url_quote_plus
from urllib.parse import unquote as url_unquote

from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.engine.collection import Collection, DeduplicatedCollection
from pants.engine.fs import (
AddPrefix,
Expand All @@ -28,7 +30,13 @@
)
from pants.engine.process import BashBinary, Process, ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import Target, Targets, TransitiveTargets, TransitiveTargetsRequest
from pants.engine.target import (
AllTargets,
Target,
Targets,
TransitiveTargets,
TransitiveTargetsRequest,
)
from pants.engine.unions import UnionRule
from pants.jvm.compile import (
ClasspathEntry,
Expand All @@ -44,12 +52,14 @@
JvmArtifactArtifactField,
JvmArtifactFieldSet,
JvmArtifactGroupField,
JvmArtifactJarSourceField,
JvmArtifactUrlField,
JvmArtifactVersionField,
JvmCompatibleResolveNamesField,
JvmRequirementsField,
)
from pants.jvm.util_rules import ExtractFileDigest
from pants.util.frozendict import FrozenDict
from pants.util.logging import LogLevel
from pants.util.strutil import pluralize

Expand Down Expand Up @@ -136,8 +146,11 @@ def from_jvm_artifact_target(target: Target) -> Coordinate:
version = target[JvmArtifactVersionField].value
url = target[JvmArtifactUrlField].value

if url and url.startswith("file:/"):
raise CoursierError("Pants does not currently support `file:` URLS")
if url and url.startswith("file:"):
raise CoursierError(
"Pants does not support `file:` URLS. Instead, use the `jar` field to specify the "
"relative path to the local jar file."
)

# These are all required, but mypy doesn't think so.
assert group is not None and artifact is not None and version is not None
Expand All @@ -148,6 +161,25 @@ class Coordinates(DeduplicatedCollection[Coordinate]):
"""An ordered list of `Coordinate`s."""


@dataclass(frozen=True)
class AllJarTargets:
"""A dictionary of targets that provide JAR files, indexed by coordinate."""

by_coordinate: FrozenDict[Coordinate, Target]


@rule
async def all_jar_targets(all_targets: AllTargets) -> AllJarTargets:
jars = [
tgt
for tgt in all_targets
if tgt.has_field(JvmArtifactJarSourceField)
and tgt[JvmArtifactJarSourceField].value is not None
]
jars_by_coordinate = FrozenDict((Coordinate.from_jvm_artifact_target(tgt), tgt) for tgt in jars)
return AllJarTargets(by_coordinate=jars_by_coordinate)


# TODO: Consider whether to carry classpath scope in some fashion via ArtifactRequirements.
class ArtifactRequirements(DeduplicatedCollection[Coordinate]):
"""An ordered list of Coordinates used as requirements."""
Expand Down Expand Up @@ -312,6 +344,46 @@ def classpath_dest_filename(coord: str, src_filename: str) -> str:
return f"{dest_name}{ext}"


@dataclass(frozen=True)
class ArtifactRequirementsWithLocalFiles:
artifact_requirements: ArtifactRequirements
digest: Digest


@rule
async def use_local_artifacts_where_possible(
input_requirements: ArtifactRequirements,
all_jars: AllJarTargets,
) -> ArtifactRequirementsWithLocalFiles:
output: List[Coordinate] = []
further_processing: List[Target] = []

for req in input_requirements:
tgt = all_jars.by_coordinate.get(req)
if not tgt:
output.append(req)
else:
further_processing.append(tgt)

files = await Get(
SourceFiles,
SourceFilesRequest(tgt[JvmArtifactJarSourceField] for tgt in further_processing),
)

for target, file in zip(further_processing, files.files):
coord = Coordinate.from_jvm_artifact_target(target)
coord = dataclasses.replace(
coord,
# coursier requires absolute url
url=f"file:{Coursier.working_directory_placeholder}/{file}",
)
output.append(coord)

return ArtifactRequirementsWithLocalFiles(
artifact_requirements=ArtifactRequirements(output), digest=files.snapshot.digest
)


@rule(level=LogLevel.DEBUG)
async def coursier_resolve_lockfile(
bash: BashBinary,
Expand Down Expand Up @@ -345,6 +417,16 @@ async def coursier_resolve_lockfile(
if len(artifact_requirements) == 0:
return CoursierResolvedLockfile(entries=())

# Transform requirements that correspond to local JAR files into coordinates with `file:/`
# URLs, and put the files in the place specified by the URLs.
artifacts_with_local_files = await Get(
ArtifactRequirementsWithLocalFiles, ArtifactRequirements, artifact_requirements
)
artifact_requirements = artifacts_with_local_files.artifact_requirements
input_digest = await Get(
Digest, MergeDigests([artifacts_with_local_files.digest, coursier.digest])
)

coursier_report_file_name = "coursier_report.json"
process_result = await Get(
ProcessResult,
Expand All @@ -364,7 +446,7 @@ async def coursier_resolve_lockfile(
],
wrapper=[bash.path, coursier.wrapper_script],
),
input_digest=coursier.digest,
input_digest=input_digest,
output_directories=("classpath",),
output_files=(coursier_report_file_name,),
append_only_caches=coursier.append_only_caches,
Expand Down
7 changes: 6 additions & 1 deletion src/python/pants/jvm/resolve/coursier_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class Coursier:
post_processing_script: ClassVar[str] = "coursier_post_processing_script.py"
cache_name: ClassVar[str] = "coursier"
cache_dir: ClassVar[str] = ".cache"
working_directory_placeholder: ClassVar[str] = "___COURSIER_WORKING_DIRECTORY___"

def args(self, args: Iterable[str], *, wrapper: Iterable[str] = ()) -> tuple[str, ...]:
return tuple((*wrapper, self.coursier.exe, *args))
Expand Down Expand Up @@ -142,8 +143,12 @@ async def setup_coursier(
json_output_file="$1"
shift
"$coursier_exe" fetch {repos_args} --json-output-file="$json_output_file" "$@"
WORKING_DIRECTORY=$(pwd)
ARGS=$*
ARGS=$(echo $ARGS | /usr/bin/sed 's|{Coursier.working_directory_placeholder}|'$WORKING_DIRECTORY'|g')
"$coursier_exe" fetch {repos_args} --json-output-file="$json_output_file" $ARGS
/bin/mkdir -p classpath
{python.path} coursier_post_processing_script.py "$json_output_file"
"""
Expand Down
12 changes: 11 additions & 1 deletion src/python/pants/jvm/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pants.engine.target import (
COMMON_TARGET_FIELDS,
FieldSet,
SingleSourceField,
SpecialCasedDependencies,
StringField,
StringSequenceField,
Expand Down Expand Up @@ -51,10 +52,18 @@ class JvmArtifactUrlField(StringField):
help = (
"A URL that points to the location of this artifact. If specified, Pants will not fetch this artifact "
"from default maven repositories, and instead fetch the artifact from this URL. To use default maven "
"repositories, do not set this value. \n\nNote that `file:` URLs are not presently supported."
"repositories, do not set this value. \n\nNote that `file:` URLs are not supported due to Pants' "
"sandboxing feature. To use a local `JAR` file, use the `jar` field instead."
)


class JvmArtifactJarSourceField(SingleSourceField):
alias = "jar"
expected_file_extensions = (".jar",)
required = False
help = "A JAR file that provides this artifact to the lockfile resolver, instead of a maven repository."


class JvmArtifactPackagesField(StringSequenceField):
alias = "packages"
help = (
Expand Down Expand Up @@ -106,6 +115,7 @@ class JvmArtifact(Target):
*COMMON_TARGET_FIELDS,
*JvmArtifactFieldSet.required_fields,
JvmArtifactUrlField, # TODO: should `JvmArtifactFieldSet` have an `all_fields` field?
JvmArtifactJarSourceField,
)
help = (
"Represents a third-party JVM artifact as identified by its Maven-compatible coordinate, "
Expand Down

0 comments on commit f6a46d7

Please sign in to comment.