Skip to content

Commit

Permalink
Improved artifact cache usability
Browse files Browse the repository at this point in the history
Provided a simpler interface for caching, which tasks may opt into: each VersionedTarget (VT) is given a directory in which to place its results, so when executing some task on invalid VersionedTargets, the task may place the artifacts of a VT within its corresponding directory. The act of writing the VT / artifact files to the cache is factored out of individual tasks, and is done "automatically" by Task->execute.

Implemented the above functionality in various tasks to showcase various use cases (CppBinaryCreate, CppCompile, CppLibraryCreate, Checkstyle, Scalastyle, JarCreate). Note: this interface only works on isolated strategies, not global.

Wrote a test utility for verifying arbitrary unit tests are caching results.

Testing Done:
CI: https://travis-ci.org/pantsbuild/pants/builds/70269784

Added C++ integration tests for caching, and applied above mentioned test utility to various tests to verify caching.

Note: There was an issue with Antlr unit tests, which began to fail when using the artifact cache. These tests were disabled: see lengthy comment in tests/python/pants_test/backend/codegen/tasks/test_antlr_gen.py for more details.

Reviewed at https://rbcommons.com/s/twitter/r/2449/
  • Loading branch information
codygibb authored and stuhood committed Jul 12, 2015
1 parent f17710c commit b220248
Show file tree
Hide file tree
Showing 22 changed files with 280 additions and 145 deletions.
58 changes: 30 additions & 28 deletions contrib/cpp/src/python/pants/contrib/cpp/tasks/cpp_binary_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os

from pants.base.workunit import WorkUnit
from pants.util.dirutil import safe_mkdir

from pants.contrib.cpp.tasks.cpp_task import CppTask

Expand All @@ -31,57 +30,60 @@ def prepare(cls, options, round_manager):
round_manager.require('lib')
round_manager.require('objs')

@property
def cache_target_dirs(self):
return True

def execute(self):
with self.context.new_workunit(name='cpp-binary', labels=[WorkUnit.TASK]):
targets = self.context.targets(self.is_binary)
for target in targets:
target.workdir = self._workdir

with self.invalidated(targets, invalidate_dependents=True) as invalidation_check:
invalid_targets = []
for vt in invalidation_check.invalid_vts:
invalid_targets.extend(vt.targets)
for target in invalid_targets:
binary = self._create_binary(target)
self.context.products.get('exe').add(target, self.workdir).append(binary)

def _create_binary(self, binary):
binary_mapping = self.context.products.get('exe')
for vt in invalidation_check.all_vts:
binary_path = os.path.join(vt.results_dir, vt.target.name)
if not vt.valid:
self._create_binary(vt.target, binary_path)
binary_mapping.add(vt.target, vt.results_dir).append(binary_path)

def _create_binary(self, target, binary_path):
objects = []
for basedir, objs in self.context.products.get('objs').get(binary).items():
for basedir, objs in self.context.products.get('objs').get(target).items():
objects.extend([os.path.join(basedir, obj) for obj in objs])
output = self._link_binary(binary, objects)
self.context.log.info('Built c++ binary: {0}'.format(output))
return output
self._link_binary(target, binary_path, objects)
self.context.log.info('Built c++ binary: {0}'.format(binary_path))

def _link_binary(self, target, objects):
output = os.path.join(self.workdir, target.id, target.name)
safe_mkdir(os.path.dirname(output))
def _libname(self, libpath):
"""Converts a full library filepath to the library's name.
Ex: /path/to/libhello.a --> hello
"""
# Cut off 'lib' at the beginning of filename, and '.a' at end.
return os.path.basename(libpath)[3:-2]

def _link_binary(self, target, binary_path, objects):
cmd = [self.cpp_toolchain.compiler]

library_dirs = []
libraries = []

# TODO(dhamon): should this use self.context.products.get('lib').get(binary).items()
def add_library(tgt):
for dep in tgt.dependencies:
if self.is_library(dep):
library_dirs.extend([os.path.join(dep.workdir, dep.id)])
libraries.extend([dep.name])
def add_library(target):
product_map = self.context.products.get('lib').get(target)
if product_map:
for dir, libs in product_map.items():
library_dirs.append(dir)
libraries.extend((self._libname(l) for l in libs))

target.walk(add_library)

if target.libraries != None:
if target.libraries:
libraries.extend(target.libraries)

cmd.extend(objects)
cmd.extend(('-L{0}'.format(L) for L in library_dirs))
cmd.extend(('-l{0}'.format(l) for l in libraries))
cmd.extend(['-o' + output])
cmd.extend(['-o' + binary_path])
if self.get_options().ld_options != None:
cmd.extend(('-Wl,{0}'.format(o) for o in self.get_options().ld_options.split(' ')))

with self.context.new_workunit(name='cpp-link', labels=[WorkUnit.COMPILER]) as workunit:
self.run_command(cmd, workunit)

return output
46 changes: 22 additions & 24 deletions contrib/cpp/src/python/pants/contrib/cpp/tasks/cpp_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,49 +31,47 @@ def register_options(cls, register):
def product_types(cls):
return ['objs']

@property
def cache_target_dirs(self):
return True

def execute(self):
"""Compile all sources in a given target to object files."""

def is_cc(source):
_, ext = os.path.splitext(source)
return ext[1:] in self.get_options().cc_extensions

# Collect all the products we might build.
targets = self.context.targets(self.is_cpp)
for target in targets:
for source in target.sources_relative_to_buildroot():
if is_cc(source):
self.context.products.get('objs').add(target, self.workdir).append(
self._objpath(target, source))

# Compile source files to objects.
with self.invalidated(targets, invalidate_dependents=True) as invalidation_check:
invalid_targets = []
for vt in invalidation_check.invalid_vts:
invalid_targets.extend(vt.targets)
for target in invalid_targets:
with self.context.new_workunit(name='cpp-compile', labels=[WorkUnit.MULTITOOL]):
for source in target.sources_relative_to_buildroot():
if is_cc(source):
# TODO: Parallelise the compilation.
# TODO: Only recompile source files that have changed since the
# object file was last written. Also use the output from
# gcc -M to track dependencies on headers.
self._compile(target, source)

def _objpath(self, target, source):
obj_mapping = self.context.products.get('objs')
for vt in invalidation_check.all_vts:
for source in vt.target.sources_relative_to_buildroot():
if is_cc(source):
if not vt.valid:
with self.context.new_workunit(name='cpp-compile', labels=[WorkUnit.MULTITOOL]):
# TODO: Parallelise the compilation.
# TODO: Only recompile source files that have changed since the
# object file was last written. Also use the output from
# gcc -M to track dependencies on headers.
self._compile(vt.target, vt.results_dir, source)
objpath = self._objpath(vt.target, vt.results_dir, source)
obj_mapping.add(vt.target, vt.results_dir).append(objpath)

def _objpath(self, target, results_dir, source):
abs_source_root = os.path.join(get_buildroot(), target.target_base)
abs_source = os.path.join(get_buildroot(), source)
rel_source = os.path.relpath(abs_source, abs_source_root)
root, _ = os.path.splitext(rel_source)
obj_name = root + '.o'

return os.path.join(self.workdir, target.id, obj_name)
return os.path.join(results_dir, obj_name)

def _compile(self, target, source):
def _compile(self, target, results_dir, source):
"""Compile given source to an object file."""
obj = self._objpath(target, source)
safe_mkdir(os.path.dirname(obj))
obj = self._objpath(target, results_dir, source)

abs_source = os.path.join(get_buildroot(), source)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,36 +28,34 @@ def prepare(cls, options, round_manager):
def __init__(self, *args, **kwargs):
super(CppLibraryCreate, self).__init__(*args, **kwargs)

@property
def cache_target_dirs(self):
return True

def execute(self):
with self.context.new_workunit(name='cpp-library', labels=[WorkUnit.TASK]):
targets = self.context.targets(self.is_library)
for target in targets:
target.workdir = self._workdir
self.context.products.get('lib').add(target, self.workdir).append(self._libpath(target))

with self.invalidated(targets, invalidate_dependents=True) as invalidation_check:
invalid_targets = []
for vt in invalidation_check.invalid_vts:
invalid_targets.extend(vt.targets)
for target in invalid_targets:
self._create_library(target)
lib_mapping = self.context.products.get('lib')
for vt in invalidation_check.all_vts:
if not vt.valid:
self._create_library(vt.target, vt.results_dir)
lib_mapping.add(vt.target, vt.results_dir).append(self._libpath(vt.target, vt.results_dir))

def _create_library(self, library):
def _create_library(self, target, results_dir):
objects = []
for basedir, objs in self.context.products.get('objs').get(library).items():
for basedir, objs in self.context.products.get('objs').get(target).items():
objects = [os.path.join(basedir, obj) for obj in objs]
# TODO: copy public headers to work dir.
output = self._link_library(library, objects)
output = self._link_library(target, results_dir, objects)
self.context.log.info('Built c++ library: {0}'.format(output))
return output

def _libpath(self, target):
output_dir = os.path.join(self.workdir, target.id)
return os.path.join(output_dir, 'lib' + target.name + '.a')
def _libpath(self, target, results_dir):
return os.path.join(results_dir, 'lib' + target.name + '.a')

def _link_library(self, target, objects):
output = self._libpath(target)
safe_mkdir(os.path.dirname(output))
def _link_library(self, target, results_dir, objects):
output = self._libpath(target, results_dir)

cmd = [self.cpp_toolchain.register_tool('ar')]
cmd.extend(['rcs'])
Expand Down
7 changes: 4 additions & 3 deletions contrib/cpp/src/python/pants/contrib/cpp/tasks/cpp_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ def prepare(cls, options, round_manager):
round_manager.require_data('exe')

def execute(self):
binary = self.require_single_root_target()
if isinstance(binary, CppBinary):
binary_target = self.require_single_root_target()
if isinstance(binary_target, CppBinary):
with self.context.new_workunit(name='cpp-run', labels=[WorkUnit.RUN]) as workunit:
cmd = [os.path.join(binary.workdir, binary.id, binary.name)]
cmd = [self.context.products.get_only('exe', binary_target)]

args = self.get_options().args + self.get_passthru_args()
if args != None:
cmd.extend(args)
Expand Down
1 change: 1 addition & 0 deletions contrib/cpp/tests/python/pants_test/contrib/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ python_tests(
],
dependencies=[
'contrib/cpp/src/python/pants/contrib/cpp/toolchain:toolchain',
'src/python/pants/util:contextutil',
'tests/python/pants_test:int-test',
],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
unicode_literals, with_statement)

import pytest
from pants.util.contextutil import temporary_dir
from pants_test.pants_run_integration_test import PantsRunIntegrationTest

from pants.contrib.cpp.toolchain.cpp_toolchain import CppToolchain
Expand Down Expand Up @@ -67,10 +68,29 @@ def test_cpp_run(self):
self.assertIn('[cpp-run]\nHello, pants!\nGoodbye, pants!\n',
pants_run.stdout_data)

def _run_with_cache(self, task, target):
with temporary_dir(root_dir=self.workdir_root()) as cache:
args = [
'clean-all',
task,
"--cache-write-to=['{}']".format(cache),
"--cache-read-from=['{}']".format(cache),
target,
'-ldebug',
]

pants_run = self.run_pants(args)
self.assert_success(pants_run)
self.assertIn('No cached artifacts', pants_run.stdout_data)
self.assertIn('Caching artifacts', pants_run.stdout_data)

pants_run = self.run_pants(args)
self.assert_success(pants_run)
self.assertIn('Using cached artifacts', pants_run.stdout_data)
self.assertNotIn('No cached artifacts', pants_run.stdout_data)

def _binary_test(self, target):
pants_run = self.run_pants(['binary', target])
self.assert_success(pants_run)
self._run_with_cache('binary', target)

def _compile_test(self, target):
pants_run = self.run_pants(['compile', target])
self.assert_success(pants_run)
self._run_with_cache('compile', target)
26 changes: 26 additions & 0 deletions src/python/pants/backend/core/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,18 @@ def create_cache_manager(self, invalidate_dependents, fingerprint_strategy=None)
invalidate_dependents,
fingerprint_strategy=fingerprint_strategy)

@property
def cache_target_dirs(self):
"""Whether to cache files in VersionedTarget's results_dir after exiting an invalidated block.
Subclasses may override this method to return True if they wish to use this style
of "automated" caching, where each VersionedTarget is given an associated results directory,
which will automatically be uploaded to the cache. Tasks should place the output files
for each VersionedTarget in said results directory. It is highly suggested to follow this
schema for caching, rather than manually making updates to the artifact cache.
"""
return False

@contextmanager
def invalidated(self,
targets,
Expand Down Expand Up @@ -304,6 +316,10 @@ def invalidated(self,
invalidation_check = \
InvalidationCheck(invalidation_check.all_vts, uncached_vts, partition_size_hint, colors)

if self.cache_target_dirs:
for vt in invalidation_check.all_vts:
vt.create_results_dir(os.path.join(self.workdir, vt.cache_key.hash))

if not silent:
targets = []
num_invalid_partitions = len(invalidation_check.invalid_vts_partitioned)
Expand All @@ -320,9 +336,19 @@ def invalidated(self,

# Yield the result, and then mark the targets as up to date.
yield invalidation_check

for vt in invalidation_check.invalid_vts:
vt.update() # In case the caller doesn't update.

write_to_cache = (self.cache_target_dirs
and self.artifact_cache_writes_enabled()
and invalidation_check.invalid_vts)
if write_to_cache:
def result_files(vt):
return [os.path.join(vt.results_dir, f) for f in os.listdir(vt.results_dir)]
pairs = [(vt, result_files(vt)) for vt in invalidation_check.invalid_vts]
self.update_artifact_cache(pairs)

def check_artifact_cache_for(self, invalidation_check):
"""Decides which VTS to check the artifact cache for.
Expand Down
21 changes: 4 additions & 17 deletions src/python/pants/backend/jvm/tasks/checkstyle.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,41 +45,28 @@ def prepare(cls, options, round_manager):
super(Checkstyle, cls).prepare(options, round_manager)
round_manager.require_data('compile_classpath')

def __init__(self, *args, **kwargs):
super(Checkstyle, self).__init__(*args, **kwargs)

self._results_dir = os.path.join(self.workdir, 'results')

def _is_checked(self, target):
return (isinstance(target, Target) and
target.has_sources(self._JAVA_SOURCE_EXTENSION) and
(not target.is_synthetic))

def _create_result_file(self, target):
result_file = os.path.join(self._results_dir, target.id)
touch(result_file)
return result_file
@property
def cache_target_dirs(self):
return True

def execute(self):
if self.get_options().skip:
return
targets = self.context.targets(self._is_checked)
with self.invalidated(targets) as invalidation_check:
invalid_targets = []
for vt in invalidation_check.invalid_vts:
invalid_targets.extend(vt.targets)
invalid_targets = [vt.target for vt in invalidation_check.invalid_vts]
sources = self.calculate_sources(invalid_targets)
if sources:
result = self.checkstyle(targets, sources)
if result != 0:
raise TaskError('java {main} ... exited non-zero ({result})'.format(
main=self._CHECKSTYLE_MAIN, result=result))

if self.artifact_cache_writes_enabled():
result_files = lambda vt: map(lambda t: self._create_result_file(t), vt.targets)
pairs = [(vt, result_files(vt)) for vt in invalidation_check.invalid_vts]
self.update_artifact_cache(pairs)

def calculate_sources(self, targets):
sources = set()
for target in targets:
Expand Down
Loading

0 comments on commit b220248

Please sign in to comment.