Skip to content

Commit

Permalink
Speed up typical go builds. (pantsbuild#4362)
Browse files Browse the repository at this point in the history
Resolving remote Go libraries involves injecting targets representing
the remote sources into the build graph, so they can be compiled locally.
This must happen for every target, valid or not, in every build.

Previously, however, this involved two time-consuming operations
per item in the transitive closure of all remote deps:

- A network call to fetch the go meta tag, to figure out the
  remote library root from the import path (at least for remote libraries
  fetched by cloning).

- Shelling out to `go list` to get a library's imports.

This change caches both results in workdir files, for reuse in
subsequent pants runs.  This restores some of the snappiness of
Go when run in Pants.

Also tweaks a few other go concerns, such as more expressive logging.
  • Loading branch information
benjyw authored Mar 23, 2017
1 parent 58b724d commit 9a0066b
Showing 8 changed files with 110 additions and 48 deletions.
5 changes: 3 additions & 2 deletions contrib/go/src/python/pants/contrib/go/tasks/go_checkstyle.py
Original file line number Diff line number Diff line change
@@ -38,9 +38,10 @@ def execute(self):
try:
output = subprocess.check_output(args)
except subprocess.CalledProcessError as e:
raise TaskError('{} failed with exit code {}'.format(args, e.returncode), exit_code=e.returncode)
raise TaskError('{} failed with exit code {}'.format(' '.join(args), e.returncode),
exit_code=e.returncode)
if output:
raise TaskError('gofmt command {} failed with output {}'.format(args, output))
raise TaskError('gofmt command {} failed with output {}'.format(' '.join(args), output))

def calculate_sources(self, targets):
sources = set()
8 changes: 5 additions & 3 deletions contrib/go/src/python/pants/contrib/go/tasks/go_compile.py
Original file line number Diff line number Diff line change
@@ -56,9 +56,11 @@ def execute(self):

def _go_install(self, target, gopath):
args = self.get_options().build_flags.split() + [target.import_path]
result, go_cmd = self.go_dist.execute_go_cmd('install', gopath=gopath, args=args,
workunit_factory=self.context.new_workunit,
workunit_labels=[WorkUnitLabel.COMPILER])
result, go_cmd = self.go_dist.execute_go_cmd(
'install', gopath=gopath, args=args,
workunit_factory=self.context.new_workunit,
workunit_name='install {}'.format(target.address.spec),
workunit_labels=[WorkUnitLabel.COMPILER])
if result != 0:
raise TaskError('{} failed with exit code {}'.format(go_cmd, result))

94 changes: 77 additions & 17 deletions contrib/go/src/python/pants/contrib/go/tasks/go_fetch.py
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@
from pants.build_graph.address import Address
from pants.build_graph.address_lookup_error import AddressLookupError
from pants.util.contextutil import temporary_dir
from pants.util.dirutil import safe_mkdir
from pants.util.dirutil import safe_mkdir, safe_concurrent_creation

from pants.contrib.go.subsystems.fetcher_factory import FetcherFactory
from pants.contrib.go.targets.go_remote_library import GoRemoteLibrary
@@ -23,6 +23,10 @@
class GoFetch(GoTask):
"""Fetches third-party Go libraries."""

@classmethod
def implementation_version(cls):
return super(GoFetch, cls).implementation_version() + [('GoFetch', 2)]

@classmethod
def subsystem_dependencies(cls):
return super(GoFetch, cls).subsystem_dependencies() + (FetcherFactory,)
@@ -37,8 +41,7 @@ def register_options(cls, register):

@property
def cache_target_dirs(self):
# TODO(John Sirois): See TODO in _transitive_download_remote_libs, re-consider how artifact
# caching works for fetches.
# TODO(John Sirois): See TODO in _fetch_pkg, re-consider how artifact caching works for fetches.
return True

def execute(self):
@@ -61,7 +64,8 @@ def _log_undeclared_deps(self, undeclared_deps):
'at {address})'.format(import_path=dep_import_path,
address=address.reference()))

def _get_fetcher(self, import_path):
@staticmethod
def _get_fetcher(import_path):
return FetcherFactory.global_instance().get_fetcher(import_path)

def _fetch_pkg(self, gopath, pkg, rev):
@@ -73,10 +77,11 @@ def _fetch_pkg(self, gopath, pkg, rev):
# Only fetch each remote root once.
if not os.path.exists(root_dir):
with temporary_dir() as tmp_fetch_root:
fetcher.fetch(dest=tmp_fetch_root, rev=rev)
safe_mkdir(root_dir)
for path in os.listdir(tmp_fetch_root):
shutil.move(os.path.join(tmp_fetch_root, path), os.path.join(root_dir, path))
with self.context.new_workunit('fetch {}'.format(pkg)):
fetcher.fetch(dest=tmp_fetch_root, rev=rev)
safe_mkdir(root_dir)
for path in os.listdir(tmp_fetch_root):
shutil.move(os.path.join(tmp_fetch_root, path), os.path.join(root_dir, path))

# TODO(John Sirois): Circle back and get get rid of this symlink tree.
# GoWorkspaceTask will further symlink a single package from the tree below into a
@@ -93,10 +98,29 @@ def _fetch_pkg(self, gopath, pkg, rev):
for path in os.listdir(root_dir):
os.symlink(os.path.join(root_dir, path), os.path.join(dest_dir, path))

def _map_fetched_remote_source(self, go_remote_lib, gopath, all_known_remote_libs, resolved_remote_libs, undeclared_deps):
for remote_import_path in self._get_remote_import_paths(go_remote_lib.import_path, gopath=gopath):
fetcher = self._get_fetcher(remote_import_path)
remote_root = fetcher.root()
# Note: Will update import_root_map.
def _map_fetched_remote_source(self, go_remote_lib, gopath, all_known_remote_libs,
resolved_remote_libs, undeclared_deps, import_root_map):
# See if we've computed the remote import paths for this rev of this lib in a previous run.
remote_import_paths_cache = os.path.join(os.path.dirname(gopath), 'remote_import_paths.txt')
if os.path.exists(remote_import_paths_cache):
with open(remote_import_paths_cache, 'r') as fp:
remote_import_paths = [line.decode('utf8').strip() for line in fp.readlines()]
else:
remote_import_paths = self._get_remote_import_paths(go_remote_lib.import_path,
gopath=gopath)
with safe_concurrent_creation(remote_import_paths_cache) as safe_path:
with open(safe_path, 'w') as fp:
for path in remote_import_paths:
fp.write('{}\n'.format(path).encode('utf8'))

for remote_import_path in remote_import_paths:
remote_root = import_root_map.get(remote_import_path)
if remote_root is None:
fetcher = self._get_fetcher(remote_import_path)
remote_root = fetcher.root()
import_root_map[remote_import_path] = remote_root

spec_path = os.path.join(go_remote_lib.target_base, remote_root)

package_path = GoRemoteLibrary.remote_package_path(remote_root, remote_import_path)
@@ -107,7 +131,8 @@ def _map_fetched_remote_source(self, go_remote_lib, gopath, all_known_remote_lib
try:
# If we've already resolved a package from this remote root, its ok to define an
# implicit synthetic remote target for all other packages in the same remote root.
same_remote_libs = [lib for lib in all_known_remote_libs if spec_path == lib.address.spec_path]
same_remote_libs = [lib for lib in all_known_remote_libs
if spec_path == lib.address.spec_path]
implicit_ok = any(same_remote_libs)

# If we're creating a synthetic remote target, we should pin it to the same
@@ -150,17 +175,34 @@ def _transitive_download_remote_libs(self, go_remote_libs, all_known_remote_libs
go_remote_lib_src = self.context.products.get_data('go_remote_lib_src')

with self.invalidated(go_remote_libs) as invalidation_check:
# We accumulate mappings from import path to root (e.g., example.org/pkg/foo -> example.org)
# from all targets in this map, so that targets share as much of this information as
# possible during this run.
# We cache these mappings. to avoid repeatedly fetching them over the network via the
# meta tag protocol. Note that this mapping is unversioned: It's defined as "whatever meta
# tag is currently being served at the relevant URL", which is inherently independent of
# the rev of the remote library. We (and the entire Go ecosystem) assume that this mapping
# never changes, in practice.
import_root_map = {}
for vt in invalidation_check.all_vts:
go_remote_lib = vt.target
gopath = vt.results_dir
import_root_map_path = os.path.join(vt.results_dir, 'pkg_root_map.txt')
import_root_map.update(self._read_import_root_map_file(import_root_map_path))

go_remote_lib = vt.target
gopath = os.path.join(vt.results_dir, 'gopath')
if not vt.valid:
self._fetch_pkg(gopath, go_remote_lib.import_path, go_remote_lib.rev)
# _map_fetched_remote_source() will modify import_root_map.
self._map_fetched_remote_source(go_remote_lib, gopath, all_known_remote_libs,
resolved_remote_libs, undeclared_deps)

resolved_remote_libs, undeclared_deps, import_root_map)
go_remote_lib_src[go_remote_lib] = os.path.join(gopath, 'src', go_remote_lib.import_path)

# Cache the mapping against this target's key. Note that because we accumulate
# mappings across targets, the file may contain mappings that this target doesn't
# need or care about (although it will contain all the mappings this target does need).
# But the file is small, so there's no harm in this redundancy.
self._write_import_root_map_file(import_root_map_path, import_root_map)

# Recurse after the invalidated block, so the libraries we downloaded are now "valid"
# and thus we don't try to download a library twice.
trans_undeclared_deps = self._transitive_download_remote_libs(resolved_remote_libs,
@@ -222,3 +264,21 @@ def _get_remote_import_paths(self, pkg, gopath=None):
# We assume relative imports are local to the package and skip attempts to
# recursively resolve them.
not self._is_relative(imp))]

@staticmethod
def _read_import_root_map_file(path):
"""Reads a file mapping import paths to roots (e.g., example.org/pkg/foo -> example.org)."""
if os.path.exists(path):
with open(path, 'r') as fp:
return dict({import_path: root for import_path, root in
(x.decode('utf8').strip().split('\t') for x in fp.readlines())})
else:
return {}

@staticmethod
def _write_import_root_map_file(path, import_root_map):
"""Writes a file mapping import paths to roots."""
with safe_concurrent_creation(path) as safe_path:
with open(safe_path, 'w') as fp:
for import_path, root in sorted(import_root_map.items()):
fp.write('{}\t{}\n'.format(import_path, root).encode('utf8'))
3 changes: 2 additions & 1 deletion contrib/go/src/python/pants/contrib/go/tasks/go_task.py
Original file line number Diff line number Diff line change
@@ -143,7 +143,8 @@ def list_imports(self, pkg, gopath=None):
of `pkg`.
"""
go_cmd = self._go_dist.create_go_cmd('list', args=['-json', pkg], gopath=gopath)
with self._workunit_factory(pkg, cmd=str(go_cmd), labels=[WorkUnitLabel.TOOL]) as workunit:
with self._workunit_factory('list {}'.format(pkg), cmd=str(go_cmd),
labels=[WorkUnitLabel.TOOL]) as workunit:
# TODO(John Sirois): It would be nice to be able to tee the stdout to the workunit to we have
# a capture of the json available for inspection in the server console.
process = go_cmd.spawn(stdout=subprocess.PIPE, stderr=workunit.output('stderr'))
2 changes: 1 addition & 1 deletion src/python/pants/backend/jvm/tasks/bootstrap_jvm_tools.py
Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ def _tool_resolve_error(cls, error, dep_spec, jvm_tool):
return cls.ToolResolveError(msg)

@classmethod
def _alternate_target_roots(cls, options, address_mapper, build_graph):
def get_alternate_target_roots(cls, options, address_mapper, build_graph):
processed = set()
for jvm_tool in JvmToolMixin.get_registered_tools():
dep_spec = jvm_tool.dep_spec(options)
8 changes: 4 additions & 4 deletions src/python/pants/engine/round_engine.py
Original file line number Diff line number Diff line change
@@ -143,13 +143,13 @@ def _visit_goal(self, goal, context, goal_info_by_goal, target_roots_replacement
tasktypes_by_name[task_name] = task_type
visited_task_types.add(task_type)

alternate_target_roots = task_type._alternate_target_roots(context.options,
context.address_mapper,
context.build_graph)
alternate_target_roots = task_type.get_alternate_target_roots(context.options,
context.address_mapper,
context.build_graph)
target_roots_replacement.propose_alternates(task_type, alternate_target_roots)

round_manager = RoundManager(context)
task_type._prepare(context.options, round_manager)
task_type.invoke_prepare(context.options, round_manager)
try:
dependencies = round_manager.get_dependencies()
for producer_info in dependencies:
32 changes: 15 additions & 17 deletions src/python/pants/task/task.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,6 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import logging
import os
from abc import abstractmethod
from contextlib import contextmanager
@@ -28,9 +27,6 @@
from pants.util.meta import AbstractClass


logger = logging.getLogger(__name__)


class TaskBase(SubsystemClientMixin, Optionable, AbstractClass):
"""Defines a lifecycle that prepares a task for execution and provides the base machinery
needed to execute it.
@@ -126,9 +122,8 @@ def _scoped_options(cls, options):
return options[cls.options_scope]

@classmethod
def _alternate_target_roots(cls, options, address_mapper, build_graph):
def get_alternate_target_roots(cls, options, address_mapper, build_graph):
# Subclasses should not generally need to override this method.
# TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
return cls.alternate_target_roots(cls._scoped_options(options), address_mapper, build_graph)

@classmethod
@@ -144,9 +139,8 @@ def alternate_target_roots(cls, options, address_mapper, build_graph):
"""

@classmethod
def _prepare(cls, options, round_manager):
def invoke_prepare(cls, options, round_manager):
# Subclasses should not generally need to override this method.
# TODO(John Sirois): Kill when killing GroupTask as part of RoundEngine parallelization.
return cls.prepare(cls._scoped_options(options), round_manager)

@classmethod
@@ -317,10 +311,13 @@ def invalidated(self,
:API: public
:param targets: The targets to check for changes.
:param invalidate_dependents: If True then any targets depending on changed targets are invalidated.
:param fingerprint_strategy: A FingerprintStrategy instance, which can do per task, finer grained
fingerprinting of a given Target.
:param targets: The targets to check for changes.
:param invalidate_dependents: If True then any targets depending on changed targets are
invalidated.
:param silent: If true, suppress logging information about target invalidation.
:param fingerprint_strategy: A FingerprintStrategy instance, which can do per task,
finer grained fingerprinting of a given Target.
:param topological_order: Whether to invalidate in dependency order.
If no exceptions are thrown by work in the block, the build cache is updated for the targets.
Note: the artifact cache is not updated. That must be done manually.
@@ -374,8 +371,8 @@ def invalidated(self,

if len(targets):
msg_elements = ['Invalidated ',
items_to_report_element([t.address.reference() for t in targets], 'target')]
msg_elements.append('.')
items_to_report_element([t.address.reference() for t in targets], 'target'),
'.']
self.context.log.info(*msg_elements)

invalidation_report = self.context.invalidation_report
@@ -568,9 +565,10 @@ def determine_target_roots(self, goal_name, predicate=None):
:param callable predicate: The predicate to pass to `context.scan().targets(predicate=X)`.
"""
if not self.context.target_roots and not self.get_options().enable_v2_engine:
logger.warn('The behavior of `./pants {0}` (no explicit targets) will soon become a no-op. '
'To remove this warning, please specify one or more explicit target specs (e.g. '
'`./pants {0} ::`).'.format(goal_name))
self.context.log.warn(
'The behavior of `./pants {0}` (no explicit targets) will soon become a no-op. '
'To remove this warning, please specify one or more explicit target specs (e.g. '
'`./pants {0} ::`).'.format(goal_name))
# For the v1 path, continue the behavior of e.g. `./pants list` implies `./pants list ::`.
return self.context.scan().targets(predicate=predicate)

6 changes: 3 additions & 3 deletions tests/python/pants_test/jvm/jvm_tool_task_test_base.py
Original file line number Diff line number Diff line change
@@ -115,9 +115,9 @@ def prepare_execute(self, context):
# Bootstrap the tools needed by the task under test.
# We need the bootstrap task's workdir to be under the test's .pants.d, so that it can
# use artifact caching. Making it a sibling of the main task's workdir achieves this.
self.bootstrap_task_type._alternate_target_roots(context.options,
self.address_mapper,
self.build_graph)
self.bootstrap_task_type.get_alternate_target_roots(context.options,
self.address_mapper,
self.build_graph)
bootstrap_workdir = os.path.join(os.path.dirname(task.workdir), 'bootstrap_jvm_tools')
self.bootstrap_task_type(context, bootstrap_workdir).execute()
return task

0 comments on commit 9a0066b

Please sign in to comment.