Skip to content

Commit

Permalink
Populate classes_by_target using the context jar for the isolated str…
Browse files Browse the repository at this point in the history
…ategy

- Remove the option to not jar classpath outputs in the isolated strategy
- Populate classes_by_target using the context jar
- Add a simplified `relpath` implementation, because it was showing up inordinately hot on profiles

This avoids a filesystem walk during the population of products. That means that we do one less filesystem walk when we hit the cache, or when we're populating products for an already built target.

Testing Done:
pantsbuild#2082

----

For a large classpath:
    $ find .pants.d/compile/jvm -name '*.class' | wc -l
      180463

Before:

    21:31:49 00:14         [apt-pre]
    21:31:49 00:14         [java-pre]
    21:31:49 00:14         [zinc-pre]
    21:31:51 00:16         [zinc-prepare]
    21:31:51 00:16           [validate-zinc-analysis]
    21:31:51 00:16           [isolation-zinc-pool-bootstrap]
    21:31:51 00:16         [apt-prepare]
    21:31:51 00:16           [validate-java-analysis]
    21:31:51 00:16           [isolation-java-pool-bootstrap]
    21:31:51 00:16         [zinc-execute]
    21:32:17 00:42         [apt-execute]
    21:32:17 00:42         [zinc-execute]
    21:32:25 00:50         [zinc-finalize]
    21:32:26 00:51         [apt-finalize]
    21:32:26 00:51         [apt-post]
    21:32:26 00:51         [java-post]
    21:32:26 00:51         [zinc-post]

After:

    23:26:01 00:13         [apt-pre]
    23:26:01 00:13         [java-pre]
    23:26:01 00:13         [zinc-pre]
    23:26:02 00:14         [apt-prepare]
    23:26:02 00:14           [validate-java-analysis]
    23:26:02 00:14           [isolation-java-pool-bootstrap]
    23:26:02 00:14         [zinc-prepare]
    23:26:02 00:14           [validate-zinc-analysis]
    23:26:03 00:15           [isolation-zinc-pool-bootstrap]
    23:26:03 00:15         [apt-execute]
    23:26:03 00:15         [zinc-execute]
    23:26:16 00:28         [apt-finalize]
    23:26:16 00:28         [zinc-finalize]
    23:26:16 00:28         [apt-post]
    23:26:16 00:28         [java-post]
    23:26:16 00:28         [zinc-post]

Reviewed at https://rbcommons.com/s/twitter/r/2720/
  • Loading branch information
stuhood committed Aug 29, 2015
1 parent b23edfe commit f619b46
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 29 deletions.
11 changes: 11 additions & 0 deletions migrations/options/src/python/migrate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@
('compile.cpp-compile', 'cc_extensions'): ('compile.cpp', 'cc_extensions'),

('test.junit', 'coverage_html_open'): ('test.junit', 'coverage_open'),

# On by default.
('compile.apt', 'jar'): None,
('compile.java', 'jar'): None,
('compile.zinc', 'jar'): None,
}

ng_daemons_note = ('The global "ng_daemons" option has been replaced by a "use_nailgun" option '
Expand All @@ -277,6 +282,8 @@

scrooge_gen_deps_note = ('The scrooge-gen per-language config fields have been refactored into '
'two options: one for service deps, and one for structs deps.')
compile_jar_note = ('The isolated jvm compile `jar` option is critical to performant operation '
'and can no longer be disabled.')

notes = {
('jvm', 'missing_deps_target_whitelist'): 'This should be split into compile.java or '
Expand Down Expand Up @@ -369,6 +376,10 @@
('test.junit', 'coverage_console'): 'Option no longer exists. Coverage always written to stdout.',
('test.junit', 'coverage_html'): 'Option no longer exists. Coverage always written to html file.',
('test.junit', 'coverage_xml'): 'Option no longer exists. Coverage always written to xml file.',

('compile.apt', 'jar'): compile_jar_note,
('compile.java', 'jar'): compile_jar_note,
('compile.zinc', 'jar'): compile_jar_note,
}


Expand Down
3 changes: 3 additions & 0 deletions src/python/pants/backend/jvm/tasks/jvm_compile/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ python_library(
python_library(
name = 'compile_context',
sources = ['compile_context.py'],
dependencies = [
'src/python/pants/util:contextutil',
]
)

# TODO(Eric Ayers) Create a BUILD file in the java/ subdirectory?
Expand Down
10 changes: 10 additions & 0 deletions src/python/pants/backend/jvm/tasks/jvm_compile/compile_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import zipfile
from contextlib import contextmanager

from pants.util.contextutil import open_zip


class CompileContext(object):
"""A context for the compilation of a target.
Expand Down Expand Up @@ -37,3 +42,8 @@ class IsolatedCompileContext(CompileContext):
def __init__(self, target, analysis_file, classes_dir, jar_file, sources):
super(IsolatedCompileContext, self).__init__(target, analysis_file, classes_dir, sources)
self.jar_file = jar_file

@contextmanager
def open_jar(self, mode):
with open_zip(self.jar_file, mode=mode, compression=zipfile.ZIP_STORED) as jar:
yield jar
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ def register_options(cls, register, compile_task_name, supports_concurrent_execu
register('--capture-log', advanced=True, action='store_true', default=False,
fingerprint=True,
help='Capture compilation output to per-target logs.')
register('--jar', advanced=True, action='store_true', default=True,
fingerprint=True,
help='Place jar\'d outputs on the classpath after compilation. Because the JVM '
'performs poorly with directories on the classpath, this generally improves runtime '
'performance.')

def __init__(self, context, options, workdir, analysis_tools, compile_task_name,
sources_predicate):
Expand All @@ -55,7 +50,6 @@ def __init__(self, context, options, workdir, analysis_tools, compile_task_name,
self._jars_dir = os.path.join(workdir, 'jars')

self._capture_log = options.capture_log
self._jar = options.jar

try:
worker_count = options.worker_count
Expand Down Expand Up @@ -120,14 +114,13 @@ def prepare_compile(self, cache_manager, all_targets, relevant_targets):
self._worker_count)

def finalize_compile(self, targets):
if self._jar:
# Replace the classpath entry for each target with its jar'd representation.
compile_classpaths = self.context.products.get_data('compile_classpath')
for target in targets:
cc = self.compile_context(target)
for conf in self._confs:
compile_classpaths.remove_for_target(target, [(conf, cc.classes_dir)])
compile_classpaths.add_for_target(target, [(conf, cc.jar_file)])
# Replace the classpath entry for each target with its jar'd representation.
compile_classpaths = self.context.products.get_data('compile_classpath')
for target in targets:
cc = self.compile_context(target)
for conf in self._confs:
compile_classpaths.remove_for_target(target, [(conf, cc.classes_dir)])
compile_classpaths.add_for_target(target, [(conf, cc.jar_file)])

def invalidation_hints(self, relevant_targets):
# No partitioning.
Expand All @@ -138,10 +131,11 @@ def compute_classes_by_source(self, compile_contexts):
# Build a mapping of srcs to classes for each context.
classes_by_src_by_context = defaultdict(dict)
for compile_context in compile_contexts:
# Walk the class directory to build a set of unclaimed classfiles.
# Walk the context's jar to build a set of unclaimed classfiles.
unclaimed_classes = set()
for dirpath, _, filenames in safe_walk(compile_context.classes_dir):
unclaimed_classes.update(os.path.join(dirpath, f) for f in filenames)
with compile_context.open_jar(mode='r') as jar:
for name in jar.namelist():
unclaimed_classes.add(os.path.join(compile_context.classes_dir, name))

# Grab the analysis' view of which classfiles were generated.
classes_by_src = classes_by_src_by_context[compile_context]
Expand Down Expand Up @@ -225,13 +219,12 @@ def work():
target.platform)
atomic_copy(tmp_analysis_file, compile_context.analysis_file)

# Jar the compiled output.
self._create_context_jar(compile_context)

# Update the products with the latest classes.
register_vts([compile_context])

# If requested, jar the output.
if self._jar:
self._create_context_jar(compile_context)

# Kick off the background artifact cache write.
if update_artifact_cache_vts_work:
self._write_to_artifact_cache(vts, compile_context, update_artifact_cache_vts_work)
Expand Down Expand Up @@ -323,7 +316,7 @@ def _create_context_jar(self, compile_context):
see https://github.com/twitter-forks/sbt/tree/stuhood/output-jars
"""
root = compile_context.classes_dir
with open_zip(compile_context.jar_file, mode='w', compression=zipfile.ZIP_STORED) as jar:
with compile_context.open_jar(mode='w') as jar:
for abs_sub_dir, dirnames, filenames in safe_walk(root):
for name in dirnames + filenames:
abs_filename = os.path.join(abs_sub_dir, name)
Expand Down Expand Up @@ -362,8 +355,7 @@ def add_abs_products(p):
if log_file and os.path.exists(log_file):
artifacts.append(log_file)
# Jar.
if self._jar:
artifacts.append(compile_context.jar_file)
artifacts.append(compile_context.jar_file)

# Get the 'work' that will publish these artifacts to the cache.
# NB: the portable analysis_file won't exist until we finish.
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/goal/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ python_library(
sources = ['products.py'],
dependencies = [
'3rdparty/python/twitter/commons:twitter.common.collections',
'src/python/pants/util:dirutil',
],
)

Expand Down
6 changes: 3 additions & 3 deletions src/python/pants/goal/products.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from twitter.common.collections import OrderedSet

from pants.util.dirutil import fast_relpath


class ProductError(Exception): pass

Expand Down Expand Up @@ -78,9 +80,7 @@ def __init__(self, root):

def add_abs_paths(self, abs_paths):
for abs_path in abs_paths:
if not abs_path.startswith(self._root):
raise Exception('{} is not under {}'.format(abs_path, self._root))
self._rel_paths.add(os.path.relpath(abs_path, self._root))
self._rel_paths.add(fast_relpath(abs_path, self._root))

def add_rel_paths(self, rel_paths):
self._rel_paths.update(rel_paths)
Expand Down
16 changes: 16 additions & 0 deletions src/python/pants/util/dirutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@
from pants.util.strutil import ensure_text


def fast_relpath(path, start):
"""A prefix-based relpath, with no normalization or support for returning `..`."""
if not path.startswith(start):
raise ValueError('{} is not a prefix of {}'.format(start, path))

# Confirm that the split occurs on a directory boundary.
if start[-1] == '/':
slash_offset = 0
elif path[len(start)] == '/':
slash_offset = 1
else:
raise ValueError('{} is not a directory containing {}'.format(start, path))

return path[len(start)+slash_offset:]


def safe_mkdir(directory, clean=False):
"""Ensure a directory is present.
Expand Down
18 changes: 16 additions & 2 deletions tests/python/pants_test/util/test_dirutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

from pants.util import dirutil
from pants.util.contextutil import temporary_dir
from pants.util.dirutil import (_mkdtemp_unregister_cleaner, get_basedir, relative_symlink,
relativize_paths, safe_mkdir)
from pants.util.dirutil import (_mkdtemp_unregister_cleaner, fast_relpath, get_basedir,
relative_symlink, relativize_paths, safe_mkdir)


class DirutilTest(unittest.TestCase):
Expand All @@ -28,6 +28,20 @@ def setUp(self):
def tearDown(self):
self._mox.UnsetStubs()

def test_fast_relpath(self):
def assertRelpathC(path, start):
self.assertEquals('c', fast_relpath(path, start))
assertRelpathC('/a/b/c', '/a/b')
assertRelpathC('/a/b/c', '/a/b/')
assertRelpathC('b/c', 'b')
assertRelpathC('b/c', 'b/')

def test_fast_relpath_invalid(self):
with self.assertRaises(ValueError):
fast_relpath('/a/b', '/a/baseball')
with self.assertRaises(ValueError):
fast_relpath('/a/baseball', '/a/b')

def test_mkdtemp_setup_teardown(self):
def faux_cleaner():
pass
Expand Down

0 comments on commit f619b46

Please sign in to comment.