Skip to content

Commit

Permalink
Add native support for incremental caching, and use it in jvm_compile
Browse files Browse the repository at this point in the history
This moves `jvm_compile` to the `cache_target_dirs` setting in order to help kill the manual caching API before the engine refactor. In order to do that, it was necessary to add native support for incremental compilation to `cache_manager.py` and `task.py`.

Optional incremental builds are implemented by copying from the previous `results_dir` for a VersionedTarget to the current one. This has the advantage that `results_dir`s are immutable (concurrent `./pants run.jvm` invocations should now be safe, regardless of changing ivy resolves).

- Implement optional incrementalism for tasks, with copy-on-write `results_dir`s
- Switch jvm_compile to cache_target_dirs, and drop all usage of `self.workdir`
- Replace the cache_hit_callback with a rmtree of a targets' `results_dir` before extracting a cache hit
- Privatize-stale and delete-unused methods for `task.py`
- Move the `no_cache` tag from jvm_target to target in order to implement it in `task.py`
- Add `vt.id` to the path for `vt.results_dir`, which fixed an issue where two targets with identical fingerprints would share a `results_dir` (even thought that will "usually" work, it's confusing)
- Add `mark_dirty` methods to Payload and PayloadField, for testing purposes.

Testing Done:
https://travis-ci.org/pantsbuild/pants/builds/86336104

Bugs closed: 2394

Reviewed at https://rbcommons.com/s/twitter/r/2991/
  • Loading branch information
stuhood committed Oct 22, 2015
1 parent 64d6484 commit ab571f1
Show file tree
Hide file tree
Showing 26 changed files with 468 additions and 434 deletions.
9 changes: 9 additions & 0 deletions pants.cache.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# A config to globally enable caching for the purposes of manual testing.

[DEFAULT]
local_artifact_cache = %(buildroot)s/.buildcache


[cache]
read_from = ["%(local_artifact_cache)s"]
write_to = ["%(local_artifact_cache)s"]
7 changes: 0 additions & 7 deletions src/python/pants/backend/codegen/tasks/apache_thrift_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,6 @@ def _thrift_binary(self):
thrift_binary = ThriftBinary.Factory.scoped_instance(self).create()
return thrift_binary.path

def invalidate_for_files(self):
# TODO: This will prevent artifact caching across platforms.
# Find some cross-platform way to assert the thrift binary version.
# NB: We have access to the version via the ThriftBinary instance's `version`, we just need
# support for invalidation based on non-files.
return [self._thrift_binary]

@memoized_property
def _deps(self):
deps = self.get_options().deps
Expand Down
69 changes: 47 additions & 22 deletions src/python/pants/backend/core/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,6 @@ def artifact_cache_reads_enabled(self):
def artifact_cache_writes_enabled(self):
return self._cache_factory.write_cache_available()

def invalidate_for_files(self):
"""Provides extra files that participate in invalidation.
Subclasses can override and return a list of full paths to extra, non-source files that should
be checked for changes when managing target invalidation. This is useful for tracking
changes to pre-built build tools, e.g., the thrift compiler.
"""
return []

def invalidate(self):
"""Invalidates all targets for this task."""
BuildInvalidator(self._build_invalidator_dir).force_invalidate_all()
Expand Down Expand Up @@ -279,6 +270,26 @@ def cache_target_dirs(self):
"""
return False

@property
def incremental(self):
"""Whether this Task implements incremental building of individual targets.
Incremental tasks with `cache_target_dirs` set will have the results_dir of the previous build
for a target cloned into the results_dir for the current build (where possible). This
copy-on-write behaviour allows for immutability of the results_dir once a target has been
marked valid.
"""
return False

@property
def cache_incremental(self):
"""For incremental tasks, indicates whether the results of incremental builds should be cached.
Deterministic per-target incremental compilation is a relatively difficult thing to implement,
so this property provides an escape hatch to avoid caching things in that riskier case.
"""
return False

@contextmanager
def invalidated(self,
targets,
Expand Down Expand Up @@ -355,9 +366,7 @@ def invalidated(self,
invalidation_check = \
InvalidationCheck(invalidation_check.all_vts, uncached_vts, partition_size_hint, colors)

if self.cache_target_dirs:
for vt in invalidation_check.all_vts:
vt.create_results_dir(os.path.join(self.workdir, vt.cache_key.hash))
self._maybe_create_results_dirs(invalidation_check.all_vts)

if not silent:
targets = []
Expand Down Expand Up @@ -393,11 +402,27 @@ def invalidated(self,
and self.artifact_cache_writes_enabled()
and invalidation_check.invalid_vts)
if write_to_cache:
def result_files(vt):
return [os.path.join(vt.results_dir, f) for f in os.listdir(vt.results_dir)]
pairs = [(vt, result_files(vt)) for vt in invalidation_check.invalid_vts]
pairs = []
for vt in invalidation_check.invalid_vts:
if self._should_cache(vt):
pairs.append((vt, [vt.results_dir]))
self.update_artifact_cache(pairs)

def _should_cache(self, vt):
"""Return true if the given vt should be written to a cache (if configured)."""
if vt.target.has_label('no_cache'):
return False
elif not vt.is_incremental or self.cache_incremental:
return True
else:
return False

def _maybe_create_results_dirs(self, vts):
"""If `cache_target_dirs`, create results_dirs for the given versioned targets."""
if self.cache_target_dirs:
for vt in vts:
vt.create_results_dir(self.workdir, allow_incremental=self.incremental)

def check_artifact_cache_for(self, invalidation_check):
"""Decides which VTS to check the artifact cache for.
Expand All @@ -414,14 +439,11 @@ def check_artifact_cache(self, vts):
"""
return self.do_check_artifact_cache(vts)

def do_check_artifact_cache(self, vts, post_process_cached_vts=None, cache_hit_callback=None):
def do_check_artifact_cache(self, vts, post_process_cached_vts=None):
"""Checks the artifact cache for the specified list of VersionedTargetSets.
Returns a pair (cached, uncached) of VersionedTargets that were
satisfied/unsatisfied from the cache.
:param cache_hit_callback: A serializable function that expects a CacheKey as an argument.
Called after a cache hit, but before the cached artifact is extracted.
"""
if not vts:
return [], []
Expand All @@ -430,7 +452,8 @@ def do_check_artifact_cache(self, vts, post_process_cached_vts=None, cache_hit_c
uncached_vts = OrderedSet(vts)

read_cache = self._cache_factory.get_read_cache()
items = [(read_cache, vt.cache_key, cache_hit_callback) for vt in vts]
items = [(read_cache, vt.cache_key, vt.results_dir if vt.has_results_dir else None)
for vt in vts]

res = self.context.subproc_map(call_use_cached_files, items)

Expand All @@ -441,6 +464,8 @@ def do_check_artifact_cache(self, vts, post_process_cached_vts=None, cache_hit_c
elif isinstance(was_in_cache, UnreadableArtifact):
self._cache_key_errors.update(was_in_cache.key)

self._maybe_create_results_dirs(vts)

# Note that while the input vts may represent multiple targets (for tasks that overrride
# check_artifact_cache_for), the ones we return must represent single targets.
def flatten(vts):
Expand All @@ -459,12 +484,12 @@ def update_artifact_cache(self, vts_artifactfiles_pairs):
- vts is single VersionedTargetSet.
- artifactfiles is a list of absolute paths to artifacts for the VersionedTargetSet.
"""
update_artifact_cache_work = self.get_update_artifact_cache_work(vts_artifactfiles_pairs)
update_artifact_cache_work = self._get_update_artifact_cache_work(vts_artifactfiles_pairs)
if update_artifact_cache_work:
self.context.submit_background_work_chain([update_artifact_cache_work],
parent_workunit_name='cache')

def get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs):
"""Create a Work instance to update an artifact cache, if we're configured to.
vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where
Expand Down
4 changes: 0 additions & 4 deletions src/python/pants/backend/jvm/targets/jvm_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self,
provides=None,
excludes=None,
resources=None,
no_cache=False,
services=None,
platform=None,
**kwargs):
Expand All @@ -42,7 +41,6 @@ def __init__(self,
:param sources: Source code files to build. Paths are relative to the BUILD
file's directory.
:type sources: ``Fileset`` (from globs or rglobs) or list of strings
:param no_cache: If True, this should not be stored in the artifact cache
:param services: A dict mapping service interface names to the classes owned by this target
that implement them. Keys are fully qualified service class names, values are
lists of strings, each string the fully qualified class name of a class owned
Expand Down Expand Up @@ -74,8 +72,6 @@ def __init__(self,
self._services = services or {}

self.add_labels('jvm')
if no_cache:
self.add_labels('no_cache')

@property
def platform(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,21 +122,20 @@ def parse_num_items(self, line):
raise ParseError('Expected: "<num> items". Found: "{0}"'.format(line))
return int(matchobj.group(1))

def rebase_from_path(self, infile_path, outfile_path, pants_home_from, pants_home_to,
java_home=None):
def rebase_from_path(self, infile_path, outfile_path, old_base, new_base, java_home=None):
"""Rebase an analysis at infile_path, writing the result to outfile_path.
See rebase() below for an explanation of rebasing.
"""
with open(infile_path, 'rb') as infile:
with open(outfile_path, 'wb') as outfile:
self.rebase(infile, outfile, pants_home_from, pants_home_to, java_home)
self.rebase(infile, outfile, old_base, new_base, java_home)

def rebase(self, infile, outfile, pants_home_from, pants_home_to, java_home=None):
def rebase(self, infile, outfile, old_base, new_base, java_home=None):
"""Rebase an analysis read from infile and write the result to outfile.
Rebasing means replacing references to paths under pants_home_from with references to
equivalent paths under pants_home_to.
Rebasing means replacing references to paths under old_base with references to
equivalent paths under new_base.
If java_home is specified then any references to paths under it are scrubbed entirely.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def merge_from_paths(self, analysis_paths, merged_analysis_path):
merged_analysis = self._analysis_cls.merge(analyses)
merged_analysis.write_to_path(merged_analysis_path)

def rebase_from_path(self, infile_path, outfile_path, old_base, new_base):
self.parser.rebase_from_path(infile_path, outfile_path, old_base, new_base, java_home=None)

def relativize(self, src_analysis, relativized_analysis):
with temporary_dir() as tmp_analysis_dir:
tmp_analysis_file = os.path.join(tmp_analysis_dir, 'analysis.relativized')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ class CompileContext(object):
and a finalized compile in its permanent location.
"""

def __init__(self, target, analysis_file, classes_dir, jar_file, sources):
def __init__(self, target, analysis_file, portable_analysis_file, classes_dir, jar_file,
log_file, sources):
self.target = target
self.analysis_file = analysis_file
self.portable_analysis_file = portable_analysis_file
self.classes_dir = classes_dir
self.jar_file = jar_file
self.log_file = log_file
self.sources = sources

@contextmanager
Expand Down
Loading

0 comments on commit ab571f1

Please sign in to comment.