Skip to content

Commit

Permalink
Improve Noop resolution performance (pantsbuild#6577)
Browse files Browse the repository at this point in the history
### Problem

In preparation for remote execution, pantsbuild#6544 introduced many calls to `capture_snapshots`, which added a significant overhead to coursier fetching.

### Solution

A partial solution is to unify all the snapshot capturing into a single call to `capture_snapshots`.

### Result

- Both coursier and ivy resolvers now make a single call to `capture_snapshots`, which reduced the overhead somewhat, though not completely. The synchronous call to `capture_snapshots` adds a 4-5 seconds of overhead to fetching from coursier on a relatively big project. A solution is discussed in a separate comment, below.

**EDIT** This is not true anymore, it was a faulty measurement on my part:
> 
> These are the noop measured times in my laptop. They all clean all and reset the daemon after every run:
> ```
> Measured execution time: 73s for commit 46e9dd9 (this pr)
> Measured execution time: 96s for commit 6268a77 (master with the regression)
> Measured execution time: 38s for commit f60c40a (master straight before the regression) 
> ```


- Separate test cases to cover ivy and coursier resolution (coursier wasn't covered before).
- The test cases now include 2 3rdparty dependencies, to cover for a logic bug introduced while developing this.



### Acknowledgements
Credit to @dotordogh for pairing on the issue and coming up with the final fix.
  • Loading branch information
blorente authored and illicitonion committed Oct 3, 2018
1 parent fd7c2ee commit d86c3cc
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 24 deletions.
16 changes: 12 additions & 4 deletions src/python/pants/backend/jvm/tasks/coursier_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from pants.backend.jvm.tasks.classpath_products import ClasspathProducts
from pants.backend.jvm.tasks.coursier.coursier_subsystem import CoursierSubsystem
from pants.backend.jvm.tasks.nailgun_task import NailgunTask
from pants.backend.jvm.tasks.resolve_shared import ResolveBase
from pants.backend.jvm.tasks.resolve_shared import JvmResolverBase
from pants.base.exceptions import TaskError
from pants.base.fingerprint_strategy import FingerprintStrategy
from pants.base.workunit import WorkUnitLabel
Expand All @@ -38,7 +38,7 @@ class CoursierResultNotFound(Exception):
pass


class CoursierMixin(NailgunTask, ResolveBase):
class CoursierMixin(NailgunTask, JvmResolverBase):
"""
Experimental 3rdparty resolver using coursier.
Expand Down Expand Up @@ -443,8 +443,11 @@ def _load_json_result(self, conf, compile_classpath, coursier_cache_path, invali
for coord in coord_to_resolved_jars.keys():
org_name_to_org_name_rev['{}:{}'.format(coord.org, coord.name)] = coord

jars_per_target = []

for vt in invalidation_check.all_vts:
t = vt.target
jars_to_digest = []
if isinstance(t, JarLibrary):
def get_transitive_resolved_jars(my_coord, resolved_jars):
transitive_jar_path_for_coord = []
Expand Down Expand Up @@ -479,8 +482,13 @@ def get_transitive_resolved_jars(my_coord, resolved_jars):
for coord in coord_candidates:
transitive_resolved_jars = get_transitive_resolved_jars(coord, coord_to_resolved_jars)
if transitive_resolved_jars:
jars_to_add = self.add_directory_digests_for_jars(transitive_resolved_jars)
compile_classpath.add_jars_for_targets([t], conf, jars_to_add)
for jar in transitive_resolved_jars:
jars_to_digest.append(jar)

jars_per_target.append((t, jars_to_digest))

for target, jars_to_add in self.add_directory_digests_for_jars(jars_per_target):
compile_classpath.add_jars_for_targets([target], conf, jars_to_add)

def _populate_results_dir(self, vts_results_dir, results):
mode = 'w' if PY3 else 'wb'
Expand Down
11 changes: 5 additions & 6 deletions src/python/pants/backend/jvm/tasks/ivy_task_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
from pants.backend.jvm.subsystems.jar_dependency_management import JarDependencyManagement
from pants.backend.jvm.targets.jar_library import JarLibrary
from pants.backend.jvm.targets.jvm_target import JvmTarget
from pants.backend.jvm.tasks.resolve_shared import ResolveBase
from pants.backend.jvm.tasks.resolve_shared import JvmResolverBase
from pants.base.exceptions import TaskError
from pants.base.fingerprint_strategy import FingerprintStrategy
from pants.invalidation.cache_manager import VersionedTargetSet
from pants.ivy.ivy_subsystem import IvySubsystem
from pants.task.task import TaskBase
from pants.util.memo import memoized_property


Expand Down Expand Up @@ -70,7 +69,7 @@ def __eq__(self, other):
return type(self) == type(other) and self._confs == other._confs


class IvyTaskMixin(TaskBase, ResolveBase):
class IvyTaskMixin(JvmResolverBase):
"""A mixin for Tasks that execute resolves via Ivy.
Must be mixed in to a task that registers a --jvm-options option (typically by
Expand Down Expand Up @@ -178,9 +177,9 @@ def _resolve_subset(self, executor, targets, classpath_products, confs=None, ext
# appropriately.
classpath_products.add_excludes_for_targets(targets)
for conf in confs:
for target, resolved_jars in result.resolved_jars_for_each_target(conf, targets):
jars_to_add = self.add_directory_digests_for_jars(resolved_jars)
classpath_products.add_jars_for_targets([target], conf, jars_to_add)
resolved_jars_per_target = result.resolved_jars_for_each_target(conf, targets)
for target, resolved_jars in self.add_directory_digests_for_jars(resolved_jars_per_target):
classpath_products.add_jars_for_targets([target], conf, resolved_jars)

return result

Expand Down
61 changes: 48 additions & 13 deletions src/python/pants/backend/jvm/tasks/resolve_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,59 @@
from pants.base.build_environment import get_buildroot
from pants.engine.fs import PathGlobs, PathGlobsAndRoot
from pants.java.jar.jar_dependency_utils import ResolvedJar
from pants.task.task import TaskBase
from pants.util.dirutil import fast_relpath


class ResolveBase(object):
class JvmResolverBase(TaskBase):
"""Common methods for both Ivy and Coursier resolves."""

def add_directory_digests_for_jars(self, jars):
"""Get DirectoryDigests for jars and return them zipped with the jars.
@classmethod
def register_options(cls, register):
"""Register an option to make capturing snapshots optional.
This class is intended to be extended by Jvm resolvers (coursier and ivy), and the option name should reflect that.
"""
super(JvmResolverBase, cls).register_options(register)
# TODO This flag should be defaulted to True when we are doing hermetic execution,
# and should probably go away as we move forward into that direction.
register('--capture-snapshots', type=bool, default=False,
help='Enable capturing snapshots to add directory digests to dependency jars.'
'Note that this is necessary when hermetic execution is enabled.')

def add_directory_digests_for_jars(self, targets_and_jars):
"""For each target, get DirectoryDigests for its jars and return them zipped with the jars.
:param jars: List of pants.java.jar.jar_dependency_utils.ResolveJar
:return: List of ResolveJars.
:param targets_and_jars: List of tuples of the form (Target, [pants.java.jar.jar_dependency_utils.ResolveJar])
:return: list[tuple[(Target, list[pants.java.jar.jar_dependency_utils.ResolveJar])]
"""

targets_and_jars=list(targets_and_jars)

if not targets_and_jars or not self.get_options().capture_snapshots:
return targets_and_jars

jar_paths = []
for target, jars_to_snapshot in targets_and_jars:
for jar in jars_to_snapshot:
jar_paths.append(fast_relpath(jar.pants_path, get_buildroot()))

snapshots = self.context._scheduler.capture_snapshots(
tuple(PathGlobsAndRoot(
PathGlobs([fast_relpath(jar.pants_path, get_buildroot())]), get_buildroot()) for jar in jars)
)
return [ResolvedJar(coordinate=jar.coordinate,
cache_path=jar.cache_path,
pants_path=jar.pants_path,
directory_digest=directory_digest) for jar, directory_digest in
list(zip(jars, [snapshot.directory_digest for snapshot in snapshots]))]
tuple(
PathGlobsAndRoot(PathGlobs([jar]), get_buildroot()) for jar in jar_paths
))

# We want to map back the list[Snapshot] to targets_and_jars
# We assume that (1) jars_to_snapshot has the same number of ResolveJars as snapshots does Snapshots,
# and that (2) capture_snapshots preserves ordering.
digests = [snapshot.directory_digest for snapshot in snapshots]
digest_iterator = iter(digests)

snapshotted_targets_and_jars = []
for target, jars_to_snapshot in targets_and_jars:
snapshotted_jars = [ResolvedJar(coordinate=jar.coordinate,
cache_path=jar.cache_path,
pants_path=jar.pants_path,
directory_digest=digest_iterator.next()) for jar in jars_to_snapshot]
snapshotted_targets_and_jars.append((target, snapshotted_jars))

return snapshotted_targets_and_jars
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ jvm_binary(
main='org.pantsbuild.testproject.cwdexample.ExampleCwd',
dependencies=[
"3rdparty:guava",
"testprojects/3rdparty/checkstyle"
]
)
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,68 @@ def main(args: Array[String]) {
path = os.path.join(compile_dir, path_suffix)
self.assertTrue(os.path.exists(path), "Want path {} to exist".format(path))

def test_hermetic_binary_with_3rdparty_dependencies(self):
def test_hermetic_binary_with_capturing_off(self):
capture_snapshots = False
config = {
'resolve.ivy': {'capture_snapshots': capture_snapshots},
'resolve.coursier': {'capture_snapshots': capture_snapshots},
'compile.zinc': {
'execution_strategy': 'hermetic',
'use_classpath_jars': False,
'incremental': False,
},
}
with self.temporary_workdir() as workdir:
with self.temporary_file_content("readme.txt", "yo"):
pants_run = self.run_pants_with_workdir(
[
'run',
'testprojects/src/java/org/pantsbuild/testproject/cwdexample',
],
workdir,
config,
)
self.assert_failure(pants_run)

def test_hermetic_binary_with_3rdparty_dependencies_ivy(self):
config = {
'resolve.ivy': {'capture_snapshots': True},
'compile.zinc': {
'execution_strategy': 'hermetic',
'use_classpath_jars': False,
'incremental': False,
},
'resolver': {
'resolver': 'ivy',
}
}

with self.temporary_workdir() as workdir:
with self.temporary_file_content("readme.txt", "yo"):
pants_run = self.run_pants_with_workdir(
[
'run',
'testprojects/src/java/org/pantsbuild/testproject/cwdexample',
],
workdir,
config,
)
self.assert_success(pants_run)
self.assertIn(
'Found readme.txt',
pants_run.stdout_data,
)

def test_hermetic_binary_with_3rdparty_dependencies_coursier(self):
config = {
'resolve.coursier': {'capture_snapshots': True},
'compile.zinc': {
'execution_strategy': 'hermetic',
'use_classpath_jars': False,
'incremental': False,
},
'resolver': {
'resolver': 'coursier',
}
}

Expand Down

0 comments on commit d86c3cc

Please sign in to comment.