Skip to content

Commit

Permalink
Fix JVM resources jar file to be determinstic by default (pantsbuild#…
Browse files Browse the repository at this point in the history
…16950)

At the moment, java or scala projects won't benefit much from the remote cache because the resources jar local process file will usually win the remote cache read. As zip is non-deterministic, which means that the resources jar file would invalidate any dependents in the graph.

Using reproducible_jars would solve that, but the price would be running the strip jar for all jars.

This PR solves this by changing the dates of all the files in the zip file before archiving.

[ci skip-rust]
[ci skip-build-wheels]
  • Loading branch information
somdoron authored Sep 27, 2022
1 parent 81d07b3 commit f055fc7
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
12 changes: 12 additions & 0 deletions src/python/pants/core/util_rules/system_binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ class MkdirBinary(BinaryPath):
pass


class TouchBinary(BinaryPath):
pass


class CpBinary(BinaryPath):
pass

Expand Down Expand Up @@ -713,6 +717,14 @@ async def find_mkdir() -> MkdirBinary:
return MkdirBinary(first_path.path, first_path.fingerprint)


@rule(desc="Finding the `touch` binary", level=LogLevel.DEBUG)
async def find_touch() -> TouchBinary:
request = BinaryPathRequest(binary_name="touch", search_path=SEARCH_PATHS)
paths = await Get(BinaryPaths, BinaryPathRequest, request)
first_path = paths.first_path_or_raise(request, rationale="touch file")
return TouchBinary(first_path.path, first_path.fingerprint)


@rule(desc="Finding the `cp` binary", level=LogLevel.DEBUG)
async def find_cp() -> CpBinary:
request = BinaryPathRequest(binary_name="cp", search_path=SEARCH_PATHS)
Expand Down
26 changes: 22 additions & 4 deletions src/python/pants/jvm/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

import itertools
import logging
import shlex
from itertools import chain
from pathlib import Path

from pants.core.target_types import ResourcesFieldSet, ResourcesGeneratorFieldSet
from pants.core.util_rules import stripped_source_files
from pants.core.util_rules.source_files import SourceFilesRequest
from pants.core.util_rules.stripped_source_files import StrippedSourceFiles
from pants.core.util_rules.system_binaries import ZipBinary
from pants.core.util_rules.system_binaries import BashBinary, TouchBinary, ZipBinary
from pants.engine.fs import Digest, MergeDigests
from pants.engine.internals.selectors import MultiGet
from pants.engine.process import Process, ProcessResult
Expand Down Expand Up @@ -44,6 +45,8 @@ class JvmResourcesRequest(ClasspathEntryRequest):
@rule(desc="Assemble resources")
async def assemble_resources_jar(
zip: ZipBinary,
bash: BashBinary,
touch: TouchBinary,
jvm: JvmSubsystem,
request: JvmResourcesRequest,
) -> FallibleClasspathEntry:
Expand Down Expand Up @@ -85,13 +88,28 @@ async def assemble_resources_jar(
input_files = {str(path) for path in chain(paths, directories)}

resources_jar_input_digest = source_files.snapshot.digest

input_filenames = " ".join(shlex.quote(file) for file in sorted(input_files))

resources_jar_result = await Get(
ProcessResult,
Process(
argv=[
zip.path,
output_filename,
*sorted(input_files),
bash.path,
"-c",
" ".join(
[
touch.path,
"-d 1980-01-01T00:00:00Z",
input_filenames,
"&&",
"TZ=UTC",
zip.path,
"-oX",
output_filename,
input_filenames,
]
),
],
description="Build resources JAR for {request.component}",
input_digest=resources_jar_input_digest,
Expand Down
25 changes: 25 additions & 0 deletions src/python/pants/jvm/resources_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,28 @@ def test_resources(rule_runner: RuleRunner) -> None:
contents = rule_runner.request(DigestContents, list(classpath.digests()))
assert contents[0].path == ".one.txt.root.resources.jar"
assert filenames_from_zip(contents[0]) == ["one.txt"]


@maybe_skip_jdk_test
def test_resources_jar_is_determinstic(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
"BUILD": "resources(name='root', sources=['**/*.txt'])",
"one.txt": "",
"two.txt": "",
"three/four.txt": "",
"three/five.txt": "",
"three/six/seven/eight.txt": "",
"3rdparty/jvm/default.lock": EMPTY_JVM_LOCKFILE,
}
)

classpath = rule_runner.request(
Classpath, [Addresses([Address(spec_path="", target_name="root")])]
)

contents = rule_runner.request(DigestContents, list(classpath.digests()))

z = ZipFile(BytesIO(contents[0].content))
for info in z.infolist():
assert info.date_time == (1980, 1, 1, 0, 0, 0)

0 comments on commit f055fc7

Please sign in to comment.