diff --git a/src/python/pants/backend/core/tasks/group_task.py b/src/python/pants/backend/core/tasks/group_task.py index 16ec374598a..1e39715bd99 100644 --- a/src/python/pants/backend/core/tasks/group_task.py +++ b/src/python/pants/backend/core/tasks/group_task.py @@ -7,12 +7,10 @@ import os from abc import abstractmethod, abstractproperty -from collections import defaultdict - -from six.moves import range +from collections import defaultdict, deque from pants.backend.core.tasks.task import Task, TaskBase -from pants.base.build_graph import sort_targets +from pants.base.build_graph import invert_dependencies from pants.base.workunit import WorkUnit from pants.goal.goal import Goal @@ -77,7 +75,7 @@ class GroupIterator(object): @staticmethod def coalesce_targets(targets, discriminator): - """Returns a list of Targets that `targets` depend on sorted from most dependent to least. + """Returns a list of Targets that `targets` depend on sorted from least dependent to most. The targets are grouped where possible by target type as categorized by the given discriminator. @@ -85,50 +83,65 @@ def coalesce_targets(targets, discriminator): optionally enabled by appending a '!' (bang) to the command line target. """ - sorted_targets = filter(discriminator, sort_targets(targets)) - - # can do no better for any of these: - # [] - # [a] - # [a,b] - if len(sorted_targets) <= 2: - return sorted_targets - - # For these, we'd like to coalesce if possible, like: - # [a,b,a,c,a,c] -> [a,a,a,b,c,c] - # adopt a quadratic worst case solution, when we find a type change edge, scan forward for - # the opposite edge and then try to swap dependency pairs to move the type back left to its - # grouping. If the leftwards migration fails due to a dependency constraint, we just stop - # and move on leaving "type islands". + # We want to sort targets topologically, grouping targets of the same type if possible. + # Algorithm: BFS on the dependency graph with a separate queue per each type. + # First, enqueue the least dependent targets (roots). Choose a type with a non-empty queue, + # and process nodes from this queue until it's exhausted, then move on to the next non-empty + # queue. "To process" means to add the node to the resulting list, and to increment + # the number of "satisfied" dependencies for all its direct dependees. For every dependee + # that has all its dependencies satisfied, enqueue it in the corresponding queue. + # Since it's a directed acyclic graph, eventually all targets will be processed and added + # to the resulting list. + # + # This linear-complexity algorithm replaces the worst-case-quadratic-complexity algorithm + # that used DFS for topological sort, then trying to rearrange the targets in the resulting + # list without breaking the sorting order, repeatedly computing full dependency closure + # for the targets in the list. + # + # For benchmarking, "./pants compile" command was executed on a large target with about 1K nodes + # in the dependency graph. The machine was 2013 MPB with SSD. + # The quadratic implementation took on average about 18 seconds. The linear implementation + # took on average about 1 second. + + roots, inverted_deps = invert_dependencies(targets) + + queues = defaultdict(deque) + queues_total_size = 0 + + # Enqueue roots. + for root in roots: + root_type = discriminator(root) + queues[root_type].append(root) + queues_total_size += 1 + + sorted_targets = [] + satisfied_deps = defaultdict(int) current_type = None - - # main scan left to right no backtracking - for i in range(len(sorted_targets) - 1): - current_target = sorted_targets[i] - if current_type != discriminator(current_target): - scanned_back = False - - # scan ahead for next type match - for j in range(i + 1, len(sorted_targets)): - look_ahead_target = sorted_targets[j] - if current_type == discriminator(look_ahead_target): - scanned_back = True - - # swap this guy as far back as we can - for k in range(j, i, -1): - previous_target = sorted_targets[k - 1] - mismatching_types = current_type != discriminator(previous_target) - not_a_dependency = look_ahead_target not in previous_target.closure() - if mismatching_types and not_a_dependency: - sorted_targets[k] = sorted_targets[k - 1] - sorted_targets[k - 1] = look_ahead_target - else: - break # out of k - - break # out of j - - if not scanned_back: # done with coalescing the current type, move on to next - current_type = discriminator(current_target) + # Is there anything left to process? + while queues_total_size > 0: + # Choose a type with a non-empty queue. + for potential_type in queues.keys(): + if queues[potential_type]: + current_type = potential_type + break + # Process targets of this type while possible - they will form a single chunk. + while queues[current_type]: + target = queues[current_type].popleft() + queues_total_size -= 1 + sorted_targets.append(target) + + # Let the dependees know that one more dependency is satisfied. + if target in inverted_deps: + for dependee in inverted_deps[target]: + satisfied_deps[dependee] += 1 + # Does the dependee have all its dependencies satisfied now? + if satisfied_deps[dependee] == len(dependee.dependencies): + dependee_type = discriminator(dependee) + queues[dependee_type].append(dependee) + queues_total_size += 1 + + # Remove targets that are not claimed by any member. + sorted_targets = filter(discriminator, sorted_targets) return sorted_targets @@ -149,13 +162,20 @@ def __iter__(self): yield group_member, chunk def _create_chunks(self): + # memoized mapping from target to its type (i.e. member) + target_to_member = dict() + def discriminator(tgt): + if tgt in target_to_member: + return target_to_member[tgt] for member in self._group_members: if member.select(tgt): + target_to_member[tgt] = member return member + target_to_member[tgt] = None return None - coalesced = list(reversed(self.coalesce_targets(self._targets, discriminator))) + coalesced = self.coalesce_targets(self._targets, discriminator) chunks = [] diff --git a/src/python/pants/base/build_graph.py b/src/python/pants/base/build_graph.py index 6977b092588..459bbc205e9 100644 --- a/src/python/pants/base/build_graph.py +++ b/src/python/pants/base/build_graph.py @@ -409,10 +409,9 @@ def __init__(self, cycle): ' ->\n\t'.join(target.address.spec for target in cycle) )) - -def sort_targets(targets): - """:return: the targets that targets depend on sorted from most dependent to least.""" - roots = OrderedSet() +def invert_dependencies(targets): + """:return: the full graph of dependencies for `targets` and the list of roots.""" + roots = set() inverted_deps = defaultdict(OrderedSet) # target -> dependent targets visited = set() path = OrderedSet() @@ -426,18 +425,26 @@ def invert(target): path.add(target) if target not in visited: visited.add(target) - for dependency in target.dependencies: - inverted_deps[dependency].add(target) - invert(dependency) + if target.dependencies: + for dependency in target.dependencies: + inverted_deps[dependency].add(target) + invert(dependency) else: roots.add(target) + path.remove(target) for target in targets: invert(target) + return roots, inverted_deps + +def sort_targets(targets): + """:return: the targets that `targets` depend on sorted from most dependent to least.""" + + roots, inverted_deps = invert_dependencies(targets) ordered = [] - visited.clear() + visited = set() def topological_sort(target): if target not in visited: diff --git a/tests/python/pants_test/tasks/test_group_task.py b/tests/python/pants_test/tasks/test_group_task.py index 3b81e0fa401..8d585f9a817 100644 --- a/tests/python/pants_test/tasks/test_group_task.py +++ b/tests/python/pants_test/tasks/test_group_task.py @@ -91,13 +91,13 @@ def test(self): self.assertEqual({a_blue}, set(targets)) group_member, targets = chunks[2] - self.assertEqual(self.green, type(group_member)) - self.assertEqual({a_green}, set(targets)) - - group_member, targets = chunks[3] self.assertEqual(self.red, type(group_member)) self.assertEqual({b_red, c_red}, set(targets)) + group_member, targets = chunks[3] + self.assertEqual(self.green, type(group_member)) + self.assertEqual({a_green}, set(targets)) + class BaseGroupTaskTest(BaseTest): def create_targets(self):