Skip to content

Commit

Permalink
Support caching chroots for reuse across pants runs.
Browse files Browse the repository at this point in the history
- Creates huge speedups e.g., when running tests.

- Not turned on for any code yet.  To work safely
  this requires a change to the pex library to support
  creating chroots via copying instead of hard-linking,
  and we're stil waiting on upgrading to a version of pex
  with that change. However this has undergone enough
  "unsafe" testing to verify that it works and provides
  peformance benefits, so I'm submitting it now, to prevent
  it from drifting.

- Incidental changes that this commit required:
  * PythonChroot no longer deletes itself when GC'd. It's generally
    a bad idea to rely on cleanup in __del__ anyway, as there's no
    guarantee it'll ever be called.
  * Simplified some of the interface to PythonChroot. For example,
    you no longer specify an executable name - the chroot creation
    code hard-codes one for you.  The only caller we had for this
    was hard-coding a name anyway, so no real loss of functionality
    there.
  * Modified the backtrace munging trick in python_eval to rely only
    on the parent of the chroot dir, not the chroot dir itself, as
    that is no longer known when we generate the eval's entry point: we need
    to generate that entry point so we can hash it and use that hash
    as input to the function that generates well-known chroot paths.

Testing Done:
CI passes: https://travis-ci.org/pantsbuild/pants/builds/66601000.

Bugs closed: 1669

Reviewed at https://rbcommons.com/s/twitter/r/2349/
  • Loading branch information
benjyw authored and Benjy committed Jun 12, 2015
1 parent 141fe61 commit f593380
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 68 deletions.
13 changes: 2 additions & 11 deletions src/python/pants/backend/python/python_chroot.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,15 @@ def delete(self):
"""Deletes this chroot from disk if it has been dumped."""
safe_rmtree(self.path())

def __del__(self):
if os.getenv('PANTS_LEAVE_CHROOT') is None:
self.delete()
else:
self.debug('Left chroot at {}'.format(self.path()))

def debug(self, msg, indent=0):
if os.getenv('PANTS_VERBOSE') is not None:
print('{}{}'.format(' ' * indent, msg))

def path(self):
return os.path.realpath(self._builder.path())

def set_executable(self, filename, env_filename=None):
self._builder.set_executable(filename, env_filename)

def pex(self):
return PEX(os.path.realpath(self._builder.path()), interpreter=self._interpreter)
return PEX(self.path(), interpreter=self._interpreter)

def package_pex(self, filename):
"""Package into a PEX zipfile.
Expand All @@ -121,7 +112,7 @@ def copy_to_chroot(base, path, add_function):
for relpath in library.sources_relative_to_source_root():
try:
copy_to_chroot(library.target_base, relpath, self._builder.add_source)
except OSError as e:
except OSError:
logger.error("Failed to copy {path} for library {library}"
.format(path=os.path.join(library.target_base, relpath),
library=library))
Expand Down
11 changes: 10 additions & 1 deletion src/python/pants/backend/python/python_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ def register_options(cls, register):
register('--interpreter-cache-dir', advanced=True, default=None, metavar='<dir>',
help='The parent directory for the interpreter cache. '
'If unspecified, a standard path under the workdir is used.')
register('--chroot-cache-dir', advanced=True, default=None, metavar='<dir>',
help='The parent directory for the chroot cache. '
'If unspecified, a standard path under the workdir is used.')
register('--resolver-cache-dir', advanced=True, default=None, metavar='<dir>',
help='The parent directory for the requirement resolver cache. '
'If unspecified, a standard path under the workdir is used.')
Expand All @@ -44,7 +47,8 @@ def register_options(cls, register):
help='The time in seconds before we consider re-resolving an open-ended requirement, '
'e.g. "flask>=0.2" if a matching distribution is available on disk.')
register('--artifact-cache-dir', advanced=True, default=None, metavar='<dir>',
help='The parent directory for the python artifact cache. ')
help='The parent directory for the python artifact cache. '
'If unspecified, a standard path under the workdir is used.')

@property
def interpreter_requirement(self):
Expand All @@ -67,6 +71,11 @@ def interpreter_cache_dir(self):
return (self.get_options().interpreter_cache_dir or
os.path.join(self.scratch_dir, 'interpreters'))

@property
def chroot_cache_dir(self):
return (self.get_options().chroot_cache_dir or
os.path.join(self.scratch_dir, 'chroots'))

@property
def resolver_cache_dir(self):
return (self.get_options().resolver_cache_dir or
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/backend/python/tasks/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ python_library(
'src/python/pants/base:deprecated',
'src/python/pants/base:exceptions',
'src/python/pants/base:generator',
'src/python/pants/base:hash_utils',
'src/python/pants/base:target',
'src/python/pants/base:workunit',
'src/python/pants/console:stty_utils',
Expand Down
39 changes: 16 additions & 23 deletions src/python/pants/backend/python/tasks/python_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@
from pants.base.exceptions import TaskError
from pants.base.generator import Generator, TemplateData
from pants.base.workunit import WorkUnit
from pants.util.contextutil import temporary_file


class PythonEval(PythonTask):
class Error(TaskError):
"""A richer failure exception type useful for tests."""

def __init__(self, *args, **kwargs):
compiled = kwargs.pop('compiled')
failed = kwargs.pop('failed')
Expand Down Expand Up @@ -131,24 +129,19 @@ def _compile_target(self, target):
else:
pexinfo, platforms = None, None

with temporary_file() as imports_file:
def pre_freeze(chroot):
generator = Generator(pkgutil.get_data(__name__, self._EVAL_TEMPLATE_PATH),
chroot=chroot.path(),
modules=modules)
generator.write(imports_file)
imports_file.close()
chroot.set_executable(imports_file.name, '__pants_python_eval__.py')

with self.temporary_chroot(interpreter=interpreter, pex_info=pexinfo,
targets=[target], platforms=platforms,
pre_freeze=pre_freeze) as chroot:
pex = chroot.pex()
with self.context.new_workunit(name='eval',
labels=[WorkUnit.COMPILER, WorkUnit.RUN, WorkUnit.TOOL],
cmd=' '.join(pex.cmdline())) as workunit:
returncode = pex.run(stdout=workunit.output('stdout'), stderr=workunit.output('stderr'))
workunit.set_outcome(WorkUnit.SUCCESS if returncode == 0 else WorkUnit.FAILURE)
if returncode != 0:
self.context.log.error('Failed to eval {}'.format(target.address.spec))
return returncode
generator = Generator(pkgutil.get_data(__name__, self._EVAL_TEMPLATE_PATH),
chroot_parent=self.chroot_cache_dir, modules=modules)
executable_file_content = generator.render()

with self.temporary_chroot(interpreter=interpreter, pex_info=pexinfo,
targets=[target], platforms=platforms,
executable_file_content=executable_file_content) as chroot:
pex = chroot.pex()
with self.context.new_workunit(name='eval',
labels=[WorkUnit.COMPILER, WorkUnit.RUN, WorkUnit.TOOL],
cmd=' '.join(pex.cmdline())) as workunit:
returncode = pex.run(stdout=workunit.output('stdout'), stderr=workunit.output('stderr'))
workunit.set_outcome(WorkUnit.SUCCESS if returncode == 0 else WorkUnit.FAILURE)
if returncode != 0:
self.context.log.error('Failed to eval {}'.format(target.address.spec))
return returncode
104 changes: 94 additions & 10 deletions src/python/pants/backend/python/tasks/python_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,27 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import os
import shutil
import tempfile
from contextlib import contextmanager

from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo
from twitter.common.collections import OrderedSet

from pants.backend.core.tasks.task import Task
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.python_chroot import PythonChroot
from pants.backend.python.python_setup import PythonRepos, PythonSetup
from pants.base import hash_utils
from pants.base.exceptions import TaskError


class PythonTask(Task):
# If needed, we set this as the executable entry point of any chroots we create.
CHROOT_EXECUTABLE_NAME = '__pants_executable__'

@classmethod
def global_subsystems(cls):
return super(PythonTask, cls).global_subsystems() + (PythonSetup, PythonRepos)
Expand Down Expand Up @@ -87,15 +94,66 @@ def select_interpreter(self, filters):
self.context.log.debug('Selected {}'.format(interpreter))
return interpreter

@property
def chroot_cache_dir(self):
return PythonSetup.global_instance().chroot_cache_dir

@contextmanager
def temporary_chroot(self, interpreter, pex_info, targets, platforms,
extra_requirements=None, pre_freeze=None):
"""Yields a temporary PythonChroot created with the specified args.
def cached_chroot(self, interpreter, pex_info, targets, platforms,
extra_requirements=None, executable_file_content=None):
"""Returns a cached PythonChroot created with the specified args.
The returned chroot will be cached for future use.
pre_freeze is an optional function run on the chroot just before freezing its builder,
to allow for any extra modification.
TODO: Garbage-collect old chroots, so they don't pile up?
TODO: Ideally chroots would just be products produced by some other task. But that's
a bit too complicated to implement right now, as we'd need a way to request
chroots for a variety of sets of targets.
"""
path = tempfile.mkdtemp()
# This PexInfo contains any customizations specified by the caller.
# The process of building a pex modifies it further.
pex_info = pex_info or PexInfo.default()

path = self._chroot_path(PythonSetup.global_instance(), interpreter, pex_info, targets,
platforms, extra_requirements, executable_file_content)
if not os.path.exists(path):
path_tmp = path + '.tmp'
self._build_chroot(path_tmp, interpreter, pex_info, targets, platforms,
extra_requirements, executable_file_content)
shutil.move(path_tmp, path)

# We must read the PexInfo that was frozen into the pex, so we get the modifications
# created when that pex was built.
pex_info = PexInfo.from_pex(path)
# Now create a PythonChroot wrapper without dumping it.
builder = PEXBuilder(path=path, interpreter=interpreter, pex_info=pex_info)
chroot = PythonChroot(
context=self.context,
python_setup=PythonSetup.global_instance(),
python_repos=PythonRepos.global_instance(),
interpreter=interpreter,
builder=builder,
targets=targets,
platforms=platforms,
extra_requirements=extra_requirements)
# TODO: Doesn't really need to be a contextmanager, but it's convenient to make it so
# while transitioning calls to temporary_chroot to calls to cached_chroot.
# We can revisit after that transition is complete.
yield chroot

@contextmanager
def temporary_chroot(self, interpreter, pex_info, targets, platforms,
extra_requirements=None, executable_file_content=None):
path = tempfile.mkdtemp() # Not a contextmanager: chroot.delete() will clean this up anyway.
pex_info = pex_info or PexInfo.default()
chroot = self._build_chroot(path, interpreter, pex_info, targets, platforms,
extra_requirements, executable_file_content)
yield chroot
chroot.delete()

def _build_chroot(self, path, interpreter, pex_info, targets, platforms,
extra_requirements=None, executable_file_content=None):
"""Create a PythonChroot with the specified args."""
builder = PEXBuilder(path=path, interpreter=interpreter, pex_info=pex_info)
with self.context.new_workunit('chroot'):
chroot = PythonChroot(
Expand All @@ -108,8 +166,34 @@ def temporary_chroot(self, interpreter, pex_info, targets, platforms,
platforms=platforms,
extra_requirements=extra_requirements)
chroot.dump()
if pre_freeze:
pre_freeze(chroot)
if executable_file_content is not None:
with open(os.path.join(path, '{}.py'.format(self.CHROOT_EXECUTABLE_NAME)), 'w') as outfile:
outfile.write(executable_file_content)
# Override any user-specified entry point, under the assumption that the
# executable_file_content does what the user intends (including, probably, calling that
# underlying entry point).
pex_info.entry_point = self.CHROOT_EXECUTABLE_NAME
builder.freeze()
yield chroot
chroot.delete()
return chroot

def _chroot_path(self, python_setup, interpreter, pex_info, targets, platforms,
extra_requirements, executable_file_content):
"""Pick a unique, well-known directory name for the chroot with the specified parameters.
TODO: How many of these do we expect to have? Currently they are all under a single
directory, and some filesystems (E.g., HFS+) don't handle directories with thousands of
entries well. GC'ing old chroots may be enough of a solution, assuming this is even a problem.
"""
fingerprint_components = [str(interpreter.identity)]
if pex_info:
fingerprint_components.append(pex_info.dump())
fingerprint_components.extend(filter(None, [t.payload.fingerprint() for t in targets]))
if platforms:
fingerprint_components.extend(platforms)
if extra_requirements:
fingerprint_components.extend([r.cache_key() for r in extra_requirements])
if executable_file_content is not None:
fingerprint_components.append(executable_file_content)

fingerprint = hash_utils.hash_all(fingerprint_components)
return os.path.join(python_setup.chroot_cache_dir, fingerprint)
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ def backtrace_to_here():
frame = info[0]
tb = inspect.getframeinfo(frame)
filename = tb.filename
if filename.startswith('{{chroot}}'):
relpath = os.path.relpath(filename, '{{chroot}}')
if filename.startswith('{{chroot_parent}}'):
relpath_parent = os.path.relpath(filename, '{{chroot_parent}}')
relpath = relpath_parent.split(os.sep, 1)[1]
filename = os.path.join('[srcroot]', relpath)
line_text = tb.code_context[tb.index]
pre_processed.append((filename, tb.lineno, tb.function, line_text))
Expand Down
3 changes: 1 addition & 2 deletions src/python/pants/backend/python/thrift_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def install_requires(self):
return ['thrift']

def run_thrifts(self):
"""
Generate Python thrift code.
"""Generate Python thrift code.
Thrift fields conflicting with Python keywords are suffixed with a trailing
underscore (e.g.: from_).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def setUp(self):
self.set_options_for_scope('python-setup',
interpreter_cache_dir=os.path.join(self.real_build_root, '.pants.d',
'python-setup', 'interpreters'),
chroot_cache_dir=os.path.join(self.real_build_root, '.pants.d',
'python-setup', 'chroots'),
resolver_cache_ttl=1000000000) # TODO: Do we need this now that there's a default?

@property
Expand Down
32 changes: 13 additions & 19 deletions tests/python/pants_test/backend/python/tasks/test_python_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,15 @@ def task_type(cls):

def setUp(self):
super(PythonEvalTest, self).setUp()

SourceRoot.register('src', PythonBinary, PythonLibrary)
self._create_graph(broken_b_library=True)

def tearDown(self):
super(PythonEvalTest, self).tearDown()
SourceRoot.reset()

def _create_graph(self, broken_b_library):
self.reset_build_graph()

self.a_library = self.create_python_library('src/a', 'a', {'a.py': dedent("""
import inspect
Expand All @@ -43,25 +50,16 @@ class BarB(object):
pass
""")})

# TODO: Presumably this was supposed to be c_library, not override b_library. Unravel and fix.
self.b_library = self.create_python_library('src/c', 'c', {'c.py': dedent("""
from a.a import compile_time_check_decorator
@compile_time_check_decorator
def baz_c():
{}:
pass
""")}, dependencies=['//src/a'])

def fix_c_source():
self.create_file('src/c/c.py', contents=dedent("""
from a.a import compile_time_check_decorator
# Change from decorated function baz_c to decorated class BazC.
@compile_time_check_decorator
class BazC(object):
pass
"""))
self.fix_c_source = fix_c_source
""".format('def baz_c()' if broken_b_library else 'class BazC(object)')
)}, dependencies=['//src/a'])

self.d_library = self.create_python_library('src/d', 'd', { 'd.py': dedent("""
from a.a import compile_time_check_decorator
Expand All @@ -79,10 +77,6 @@ class BazD(object):
dependencies=['//src/a'])
self.h_binary = self.create_python_binary('src/h', 'h', 'a.a')

def tearDown(self):
super(PythonEvalTest, self).tearDown()
SourceRoot.reset()

def _create_task(self, target_roots, options=None):
if options:
self.set_options(**options)
Expand Down Expand Up @@ -128,7 +122,7 @@ def test_compile_incremental_progress(self):
self.assertEqual([self.a_library], e.exception.compiled)
self.assertEqual([self.b_library], e.exception.failed)

self.fix_c_source()
self._create_graph(broken_b_library=False)
python_eval = self._create_task(target_roots=[self.b_library], options={'closure': True})

compiled = python_eval.execute()
Expand Down

0 comments on commit f593380

Please sign in to comment.