Skip to content

Commit

Permalink
Add optional support for auto-shading jvm tools.
Browse files Browse the repository at this point in the history
When registering a jvm tool, the main classname of the tool can be
specified to trigger auto-shading of the tool classpath.

This functionality is dogfooded by JUnitRun for its junit-runner tool.

This changes adds a test for bootstrap_jvm_tools.py that exercises the
normal (legacy) and shading paths as well as updating the
jvm_tool_task_test_base.py infra to work with this new shading option.
The latter enables the pre-existing JUnitRunTest to operate, now using
an auto-shaded jar.

Testing Done:
I manually tested shaded jar invalidation.  It is costly (~6s for the
junit-runner tool), so having the invalidation work correctly is
important.

Decent coverage for the bulk of the new code:
```
$ PANTS_PY_COVERAGE=modules:pants.backend.jvm.tasks.bootstrap_jvm_tools pants.dev test tests/python/pants_test/backend/jvm/tasks:bootstrap_jvm_tools
...
02:29:21 00:00   [test]
02:29:21 00:00     [run_prep_command]
02:29:21 00:00     [test]
02:29:21 00:00     [pytest]
02:29:21 00:00       [run]
                     ============== test session starts ===============
                     platform linux2 -- Python 2.7.8 -- py-1.4.26 -- pytest-2.6.4
                     plugins: timeout
                     collected 1 items

                     tests/python/pants_test/backend/jvm/tasks/test_bootstrap_jvm_tools.py .

                     =========== 1 passed in 10.86 seconds ============
                     Name                                                     Stmts   Miss Branch BrMiss  Cover
                     ------------------------------------------------------------------------------------------
                     src/python/pants/backend/jvm/tasks/bootstrap_jvm_tools     125     16     38     12    83%

02:29:34 00:13     [junit]
02:29:34 00:13     [specs]
               SUCCESS
```

CI went green here:
  https://travis-ci.org/pantsbuild/pants/builds/57984477

Bugs closed: 663, 1362, 1391

Reviewed at https://rbcommons.com/s/twitter/r/2052/
  • Loading branch information
jsirois committed Apr 10, 2015
1 parent b60fe25 commit dcb2de2
Show file tree
Hide file tree
Showing 12 changed files with 356 additions and 67 deletions.
7 changes: 6 additions & 1 deletion src/python/pants/backend/jvm/tasks/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,14 @@ python_library(
sources = ['bootstrap_jvm_tools.py'],
dependencies = [
':ivy_task_mixin',
':jar_task',
':jvm_tool_task_mixin',
'src/python/pants/backend/core/tasks:task',
'src/python/pants/base:exceptions',
'src/python/pants/base:workunit',
'src/python/pants/java:executor',
'src/python/pants/java:util',
'src/python/pants/java/jar:shader',
'src/python/pants/util:dirutil',
],
)

Expand Down Expand Up @@ -249,6 +253,7 @@ python_library(
'src/python/pants/base:build_environment',
'src/python/pants/base:workunit',
'src/python/pants/java:util',
'src/python/pants/java/jar:shader',
'src/python/pants/util:contextutil',
'src/python/pants/util:dirutil',
'src/python/pants:binary_util',
Expand Down
154 changes: 140 additions & 14 deletions src/python/pants/backend/jvm/tasks/bootstrap_jvm_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,80 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import hashlib
import os
import shutil
import threading
from collections import defaultdict

from pants.backend.core.tasks.task import Task
from pants.backend.jvm.tasks.ivy_task_mixin import IvyTaskMixin
from pants.backend.jvm.tasks.ivy_task_mixin import IvyResolveFingerprintStrategy, IvyTaskMixin
from pants.backend.jvm.tasks.jar_task import JarTask
from pants.backend.jvm.tasks.jvm_tool_task_mixin import JvmToolTaskMixin
from pants.base.address_lookup_error import AddressLookupError
from pants.base.exceptions import TaskError
from pants.java import util
from pants.java.executor import Executor
from pants.java.jar.shader import Shader
from pants.util.dirutil import safe_mkdir_for


class BootstrapJvmTools(IvyTaskMixin, Task):
class ShadedToolFingerprintStrategy(IvyResolveFingerprintStrategy):
def __init__(self, key, scope, main, custom_rules=None):
# The bootstrapper uses no custom confs in its resolves.
super(ShadedToolFingerprintStrategy, self).__init__(confs=None)

self._key = key
self._scope = scope
self._main = main
self._custom_rules = custom_rules

def compute_fingerprint(self, target):
hasher = hashlib.sha1()
base_fingerprint = super(ShadedToolFingerprintStrategy, self).compute_fingerprint(target)
if base_fingerprint is None:
return None

hasher.update(base_fingerprint)

# NB: this series of updates must always cover the same fields that populate `_tuple`'s slots
# to ensure proper invalidation.
hasher.update(self._key)
hasher.update(self._scope)
hasher.update(self._main)
if self._custom_rules:
for rule in self._custom_rules:
hasher.update(rule.render())

return hasher.hexdigest()

def _tuple(self):
# NB: this tuple's slots - used for `==/hash()` - must be kept in agreement with the hashed
# fields in `compute_fingerprint` to ensure proper invalidation.
return self._key, self._scope, self._main, tuple(self._custom_rules or ())

def __hash__(self):
return hash((type(self),) + self._tuple())

def __eq__(self, other):
return type(self) == type(other) and self._tuple() == other._tuple()


class BootstrapJvmTools(IvyTaskMixin, JarTask):

@classmethod
def product_types(cls):
return ['jvm_build_tools_classpath_callbacks']

@classmethod
def register_options(cls, register):
super(BootstrapJvmTools, cls).register_options(register)
cls.register_jvm_tool(register, 'jarjar')

def __init__(self, *args, **kwargs):
super(BootstrapJvmTools, self).__init__(*args, **kwargs)
self._shader = None
self._tool_cache_path = os.path.join(self.workdir, 'tool_cache')

def execute(self):
context = self.context
if JvmToolTaskMixin.get_registered_tools():
Expand All @@ -34,11 +92,11 @@ def execute(self):
# the bootstrap tools. It would be awkward and possibly incorrect to call
# self.invalidated twice on a Task that does meaningful invalidation on its
# targets. -pl
for scope, key in JvmToolTaskMixin.get_registered_tools():
for scope, key, main, custom_rules in JvmToolTaskMixin.get_registered_tools():
option = key.replace('-', '_')
deplist = self.context.options.for_scope(scope)[option]
callback_product_map[scope][key] = \
self.cached_bootstrap_classpath_callback(key, scope, deplist)
callback_product_map[scope][key] = self.cached_bootstrap_classpath_callback(
key, scope, deplist, main=main, custom_rules=custom_rules)
context.products.safe_create_data('jvm_build_tools_classpath_callbacks',
lambda: callback_product_map)

Expand All @@ -52,27 +110,95 @@ def _resolve_tool_targets(self, tools, key, scope):
targets = list(self.context.resolve(tool))
if not targets:
raise KeyError
except (KeyError, AddressLookupError) as e:
except (KeyError, AddressLookupError):
self.context.log.error("Failed to resolve target for tool: {tool}.\n"
"This target was obtained from option {option} in scope {scope}.\n"
"You probably need to add this target to your tools "
"BUILD file(s), usually located in the workspace root.\n"
"".format(tool=tool, e=e, scope=scope, option=key))
"".format(tool=tool, scope=scope, option=key))
raise TaskError()
for target in targets:
yield target

def cached_bootstrap_classpath_callback(self, key, scope, tools):
def _bootstrap_classpath(self, key, targets):
workunit_name = 'bootstrap-{}'.format(key)
classpath, _ = self.ivy_resolve(targets, silent=True, workunit_name=workunit_name)
return classpath

def _bootstrap_tool_classpath(self, key, scope, tools):
targets = list(self._resolve_tool_targets(tools, key, scope))
return self._bootstrap_classpath(key, targets)

@property
def shader(self):
if self._shader is None:
jarjar_classpath = self.tool_classpath('jarjar')
if len(jarjar_classpath) != 1:
raise TaskError('Expected jarjar to resolve to one jar, instead found {}:\n\t{}'
.format(len(jarjar_classpath), '\n\t'.join(jarjar_classpath)))
self._shader = Shader(jarjar_classpath.pop())
return self._shader

def _bootstrap_shaded_jvm_tool(self, key, scope, tools, main, custom_rules=None):
shaded_jar = os.path.join(self._tool_cache_path,
'shaded_jars', scope, key, '{}.jar'.format(main))

targets = list(self._resolve_tool_targets(tools, key, scope))
fingerprint_strategy = ShadedToolFingerprintStrategy(key, scope, main,
custom_rules=custom_rules)
with self.invalidated(targets,
# We're the only dependent in reality since we shade.
invalidate_dependents=False,
fingerprint_strategy=fingerprint_strategy) as invalidation_check:

if not invalidation_check.invalid_vts and os.path.exists(shaded_jar):
return [shaded_jar]

# Ensure we have a single binary jar we can shade.
binary_jar = os.path.join(self._tool_cache_path,
'binary_jars', scope, key, '{}.jar'.format(main))
safe_mkdir_for(binary_jar)

classpath = self._bootstrap_classpath(key, targets)
if len(classpath) == 1:
shutil.copy(classpath[0], binary_jar)
else:
with self.open_jar(binary_jar) as jar:
for classpath_jar in classpath:
jar.writejar(classpath_jar)
jar.main(main)

# Now shade the binary jar and return that single jar as the safe tool classpath.
safe_mkdir_for(shaded_jar)
with self.shader.binary_shader(shaded_jar, main, binary_jar,
custom_rules=custom_rules) as shader:
try:
result = util.execute_runner(shader,
workunit_factory=self.context.new_workunit,
workunit_name='shade-{}'.format(key))
if result != 0:
raise TaskError("Shading of tool '{key}' with main class {main} for {scope} failed "
"with exit code {result}".format(key=key, main=main, scope=scope,
result=result))
except Executor.Error as e:
raise TaskError("Shading of tool '{key}' with main class {main} for {scope} failed "
"with: {exception}".format(key=key, main=main, scope=scope, exception=e))
return [shaded_jar]

def _bootstrap_jvm_tool(self, key, scope, tools, main, custom_rules=None):
if main is None:
return self._bootstrap_tool_classpath(key, scope, tools)
else:
return self._bootstrap_shaded_jvm_tool(key, scope, tools, main, custom_rules=custom_rules)

def cached_bootstrap_classpath_callback(self, key, scope, tools, main=None, custom_rules=None):
cache = {}
cache_lock = threading.Lock()

def bootstrap_classpath():
with cache_lock:
if 'classpath' not in cache:
targets = list(self._resolve_tool_targets(tools, key, scope))
workunit_name = 'bootstrap-{}'.format(key)
cache['classpath'] = self.ivy_resolve(targets,
silent=True,
workunit_name=workunit_name)[0]
cache['classpath'] = self._bootstrap_jvm_tool(key, scope, tools,
main=main, custom_rules=custom_rules)
return cache['classpath']
return bootstrap_classpath
9 changes: 7 additions & 2 deletions src/python/pants/backend/jvm/tasks/jar_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,15 @@ def register_options(cls, register):
def product_types(cls):
return ['jars']

@classmethod
def prepare(cls, options, round_manager):
super(JarCreate, cls).prepare(options, round_manager)
cls.JarBuilder.prepare(round_manager)

def __init__(self, *args, **kwargs):
super(JarCreate, self).__init__(*args, **kwargs)

self.compressed = self.get_options().compressed
self._jar_builder = self.prepare_jar_builder()
self._jars = {}

def execute(self):
Expand All @@ -71,7 +75,8 @@ def execute(self):
jar_name = jarname(target)
jar_path = os.path.join(self.workdir, jar_name)
with self.create_jar(target, jar_path) as jarfile:
if target in self._jar_builder.add_target(jarfile, target):
jar_builder = self.create_jar_builder(jarfile)
if target in jar_builder.add_target(target):
self.context.products.get('jars').add(target, self.workdir).append(jar_name)

@contextmanager
Expand Down
49 changes: 27 additions & 22 deletions src/python/pants/backend/jvm/tasks/jar_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def as_cli_entry(entry):

yield args


class JarTask(NailgunTask):
"""A baseclass for tasks that need to create or update jars.
Expand Down Expand Up @@ -228,12 +229,6 @@ def register_options(cls, register):
super(JarTask, cls).register_options(register)
cls.register_jvm_tool(register, 'jar-tool')

@classmethod
def prepare(cls, options, round_manager):
super(JarTask, cls).prepare(options, round_manager)
round_manager.require_data('resources_by_target')
round_manager.require_data('classes_by_target')

def __init__(self, *args, **kwargs):
super(JarTask, self).__init__(*args, **kwargs)
self.set_distribution(jdk=True)
Expand Down Expand Up @@ -319,11 +314,24 @@ def _write_agent_manifest(agent, jar):
manifest.addentry('Can-Set-Native-Method-Prefix', 'true')
jar.writestr(Manifest.PATH, manifest.contents())

@abstractproperty
def _context(self):
"""Implementations must supply a context."""
@staticmethod
def prepare(round_manager):
"""Prepares the products needed to use `create_jar_builder`.
This method should be called during task preparation to ensure the classes and resources
needed for jarring targets are mapped by upstream tasks that generate these.
def add_target(self, jar, target, recursive=False):
Later, in execute context, the `create_jar_builder` method can be called to get back a
prepared ``JarTask.JarBuilder`` ready for use.
"""
round_manager.require_data('resources_by_target')
round_manager.require_data('classes_by_target')

def __init__(self, context, jar):
self._context = context
self._jar = jar

def add_target(self, target, recursive=False):
"""Adds the classes and resources for a target to an open jar.
:param jar: An open jar to add to.
Expand Down Expand Up @@ -358,14 +366,14 @@ def add_products(target_products):
if target_products:
for root, products in target_products.rel_paths():
for prod in products:
jar.write(os.path.join(root, prod), prod)
self._jar.write(os.path.join(root, prod), prod)

add_products(target_classes)
for resources_target in target_resources:
add_products(resources_target)

if isinstance(tgt, JavaAgent):
self._write_agent_manifest(tgt, jar)
self._write_agent_manifest(tgt, self._jar)

if recursive:
target.walk(add_to_jar)
Expand All @@ -374,15 +382,12 @@ def add_products(target_products):

return targets_added

def prepare_jar_builder(self):
"""Prepares a ``JarTask.JarBuilder`` for use during ``execute``.
def create_jar_builder(self, jar):
"""Creates a ``JarTask.JarBuilder`` ready for use.
This method should be called during task preparation to ensure the classes and resources needed
for jarring targets are mapped by upstream tasks that generate these.
"""
class PreparedJarBuilder(self.JarBuilder):
@property
def _context(me):
return self.context
This method should be called during in `execute` context and only after ensuring
`JarTask.JarBuilder.prepare` has already been called in `prepare` context.
return PreparedJarBuilder()
:param jar: An opened ``pants.backend.jvm.tasks.jar_task.Jar`.
"""
return self.JarBuilder(self.context, jar)
10 changes: 9 additions & 1 deletion src/python/pants/backend/jvm/tasks/junit_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
from pants.base.workunit import WorkUnit
from pants.java.jar.shader import Shader
from pants.java.util import execute_java
from pants.util.contextutil import temporary_file_path
from pants.util.dirutil import (relativize_paths, safe_delete, safe_mkdir, safe_open, safe_rmtree,
Expand Down Expand Up @@ -90,7 +91,14 @@ def register_options(cls, register, register_jvm_tool):
help='Redirect test output to files in .pants.d/test/junit. Implied by --xml-report.')
register('--cwd', default=_CWD_NOT_PRESENT, nargs='?',
help='Set the working directory. If no argument is passed, use the first target path.')
register_jvm_tool(register, 'junit')
register_jvm_tool(register,
'junit',
main=JUnitRun._MAIN,
# TODO(John Sirois): Investigate how much less we can get away with.
# Clearly both tests and the runner need access to the same @Test, @Before,
# as well as other annotations, but there is also the Assert class and some
# subset of the @Rules, @Theories and @RunWith APIs.
custom_rules=[Shader.exclude_package('org.junit', recursive=True)])

def __init__(self, task_exports, context):
self._task_exports = task_exports
Expand Down
7 changes: 2 additions & 5 deletions src/python/pants/backend/jvm/tasks/jvm_binary_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ def add_main_manifest_entry(jar, binary):
def prepare(cls, options, round_manager):
super(JvmBinaryTask, cls).prepare(options, round_manager)
round_manager.require('jar_dependencies', predicate=cls.is_binary)

def __init__(self, *args, **kwargs):
super(JvmBinaryTask, self).__init__(*args, **kwargs)
self._jar_builder = self.prepare_jar_builder()
cls.JarBuilder.prepare(round_manager)

def list_external_jar_dependencies(self, binary, confs=None):
"""Returns the external jar dependencies of the given binary.
Expand Down Expand Up @@ -72,7 +69,7 @@ def monolithic_jar(self, binary, path, with_external_deps):
compressed=True) as jar:

with self.context.new_workunit(name='add-internal-classes'):
self._jar_builder.add_target(jar, binary, recursive=True)
self.create_jar_builder(jar).add_target(binary, recursive=True)

if with_external_deps:
with self.context.new_workunit(name='add-dependency-jars'):
Expand Down
Loading

0 comments on commit dcb2de2

Please sign in to comment.