Cache files created by (specially designed) annotation processors.

These annotation processors (so far, only twitter.common.args) write a file into META-INF which lists all of the files they created. Make pants process that file to decide which files should go in the artifact cache. Testing Done: Ran integration tests. Bugs closed: 132 Reviewed at https://rbcommons.com/s/twitter/r/1250/
benjyw · Nov 19, 2014 · e556eaa · e556eaa
1 parent 68c2f6b
commit e556eaa
Show file tree

Hide file tree

Showing 7 changed files with 368 additions and 4 deletions.
diff --git a/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py b/src/python/pants/backend/jvm/tasks/jvm_compile/jvm_compile.py
@@ -6,8 +6,10 @@
                         print_function, unicode_literals)
 
 from collections import defaultdict
+from textwrap import dedent
 import itertools
 import os
+import re
 import shutil
 import sys
 import uuid
@@ -17,6 +19,7 @@
 from pants.backend.core.tasks.group_task import GroupMember
 from pants.backend.jvm.tasks.jvm_compile.jvm_dependency_analyzer import JvmDependencyAnalyzer
 from pants.backend.jvm.tasks.jvm_compile.jvm_fingerprint_strategy import JvmFingerprintStrategy
+from pants.backend.jvm.tasks.jvm_compile.resource_mapping import ResourceMapping
 from pants.backend.jvm.tasks.jvm_tool_task_mixin import JvmToolTaskMixin
 from pants.backend.jvm.tasks.nailgun_task import NailgunTaskBase
 from pants.base.build_environment import get_buildroot, get_scm
@@ -81,7 +84,7 @@ def name(cls):
 
   @classmethod
   def product_types(cls):
-    return ['classes_by_target', 'classes_by_source']
+    return ['classes_by_target', 'classes_by_source', 'resources_by_target']
 
   def select(self, target):
     return target.has_sources(self._file_suffix)
@@ -573,12 +576,18 @@ def _write_to_artifact_cache(self, analysis_file, vts, sources_by_target):
     # Set up args for artifact cache updating.
     vts_artifactfiles_pairs = []
     classes_by_source = self._compute_classes_by_source(analysis_file)
+    resources_by_target = self.context.products.get_data('resources_by_target')
     for target, sources in sources_by_target.items():
       if target.has_label('no_cache'):
         continue
       artifacts = []
+      if resources_by_target is not None:
+        for _, paths in resources_by_target.get(target).abs_paths():
+          artifacts.extend(paths)
       for source in sources:
-        artifacts.extend(classes_by_source.get(source, []))
+        classes = classes_by_source.get(source, [])
+        artifacts.extend(classes)
+
       vt = vt_by_target.get(target)
       if vt is not None:
         # NOTE: analysis_file doesn't exist yet.
@@ -774,8 +783,17 @@ def _create_empty_products(self):
       self.context.products.safe_create_data('classes_by_source', make_products)
     if self.context.products.is_required_data('classes_by_target'):
       self.context.products.safe_create_data('classes_by_target', make_products)
-    if self.context.products.is_required_data('resources_by_target'):
-      self.context.products.safe_create_data('resources_by_target', make_products)
+
+    # Whether or not anything else requires resources_by_target, this task
+    # uses it internally.
+    self.context.products.safe_create_data('resources_by_target', make_products)
+
+  def _resources_by_class_file(self, class_file_name, resource_mapping):
+    assert class_file_name.endswith(".class")
+    assert class_file_name.startswith(self.workdir)
+    class_file_name = class_file_name[len(self._classes_dir) + 1:-len(".class")]
+    class_name = class_file_name.replace("/", ".")
+    return resource_mapping.get(class_name, [])
 
   def _register_products(self, targets, analysis_file):
     classes_by_source = self.context.products.get_data('classes_by_source')
@@ -784,10 +802,15 @@ def _register_products(self, targets, analysis_file):
 
     if classes_by_source is not None or classes_by_target is not None:
       computed_classes_by_source = self._compute_classes_by_source(analysis_file)
+      resource_mapping = ResourceMapping(self._classes_dir)
       for target in targets:
         target_products = classes_by_target[target] if classes_by_target is not None else None
         for source in self._sources_by_target.get(target, []):  # Source is relative to buildroot.
           classes = computed_classes_by_source.get(source, [])  # Classes are absolute paths.
+          for cls in classes:
+            resources = self._resources_by_class_file(cls, resource_mapping)
+            resources_by_target[target].add_abs_paths(self._classes_dir, resources)
+
           if classes_by_target is not None:
             target_products.add_abs_paths(self._classes_dir, classes)
           if classes_by_source is not None:

diff --git a/src/python/pants/backend/jvm/tasks/jvm_compile/resource_mapping.py b/src/python/pants/backend/jvm/tasks/jvm_compile/resource_mapping.py
@@ -0,0 +1,100 @@
+# coding=utf-8
+# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+from __future__ import (nested_scopes, generators, division, absolute_import, with_statement,
+                        print_function, unicode_literals)
+
+from collections import defaultdict
+import os
+import re
+
+
+class ResourceMapping(object):
+  RESOURCES_BY_CLASS_NAME_RE = re.compile(r'^(?P<classname>[\w+.]+) -> (?P<path>.+)$')
+
+  def __init__(self, classes_dir):
+    self._classes_dir = classes_dir
+    self._resource_mappings = None
+
+  def _read_resource_mappings(self, mappings, lines):
+    def parse_items(line):
+      try:
+        n, items = line.split(" ")
+        return int(n)
+      except ValueError as error:
+        raise ValueError(dedent('''
+          Unable to parse resource mappings.
+          Expected "N items", got "{line}: {error}"'''.format(line=line, error=error)))
+
+    items_left = 0
+    section = None
+    for line in lines:
+      line = line.strip()
+      # Skip comments.
+      if not line or line.startswith("#"):
+        continue
+
+      # We have just read a section name and now want to read a number of items
+      if section:
+        section = None
+        items_left = parse_items(line)
+        continue
+
+      # This is the section we are looking for
+      if line == "resources by class name:":
+        section = line
+        continue
+
+      # Here, we read the individual items.
+      if items_left:
+        items_left -= 1
+        match = ResourceMapping.RESOURCES_BY_CLASS_NAME_RE.match(line)
+        if not match:
+          raise ValueError(dedent('''
+            Unable to parse resource mappings.
+            Expected classname -> path, got "{line}"'''.format(line=line)))
+        classname, path = match.group('classname'), match.group('path')
+        mappings[classname].append(path)
+
+  @property
+  def mappings(self):
+    """Returns a dict from class name to file name, from the resource-mappings in META-INF.
+
+    The protocol is that annotation processors create files under
+    META-INF/compiler/resource-mappings to describe any new files that
+    they create and their relationship to class files.
+
+    Each file contains some number of sections.  Each section starts
+    with a section name followed by a colon and a newline.  The next
+    line is an integer N followed by 'items' and a newline.  The next
+    N lines contain [class name] ' -> ' [absolute output file path]
+
+    Blank lines and lines with leading # (comment lines) are skipped.
+
+    The section this method handles is "resources by class name".
+
+    So far, this protocol is only implemented to by
+    com.twitter.tools.args.apt.CmdLineProcessor from Twitter Commons;
+    that's not enabled by default in pants.
+
+    """
+    if self._resource_mappings is not None:
+      return self._resource_mappings
+
+    mapping_dir = os.path.join(self._classes_dir, "META-INF", "compiler", "resource-mappings")
+    mappings = defaultdict(list)
+    if os.path.exists(mapping_dir):
+      for filename in os.listdir(mapping_dir):
+        path = os.path.join(mapping_dir, filename)
+        with open(path) as f:
+          self._read_resource_mappings(mappings, f.readlines())
+
+    self._resource_mappings = mappings
+    return self._resource_mappings
+
+  def __getitem__(self, key):
+    return self.mappings.get(key)
+
+  def get(self, key, default=None):
+    return self.mappings.get(key, default)
diff --git a/testprojects/src/java/com/pants/testproject/annotation/main/BUILD b/testprojects/src/java/com/pants/testproject/annotation/main/BUILD
@@ -0,0 +1,13 @@
+# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+#
+#  Test project for resource mapping feature
+
+jvm_binary(name='main',
+  source='Main.java',
+  main='com.pants.testproject.annotation.main.Main',
+  basename = 'annotation-example',
+  dependencies=[
+    'testprojects/src/java/com/pants/testproject/annotation/processor',
+  ],
+)
diff --git a/testprojects/src/java/com/pants/testproject/annotation/main/Main.java b/testprojects/src/java/com/pants/testproject/annotation/main/Main.java
@@ -0,0 +1,17 @@
+// Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
+// Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+package com.pants.testproject.annotation.main;
+
+/**
+ * We need this so that there is some class that ResourceMappingProcessor can
+ * put in to the resource-mappings file.  It's marked @Deprecated because it
+ * needs to be annotated with something so that the annotation processor gets
+ * run, and Deprecated is built-in to java.
+ */
+@Deprecated
+public class Main {
+  public static void main(String args[]) {
+    System.out.println("Hello World!");
+  }
+}
diff --git a/testprojects/src/java/com/pants/testproject/annotation/processor/BUILD b/testprojects/src/java/com/pants/testproject/annotation/processor/BUILD
@@ -0,0 +1,13 @@
+# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+#
+#  annotation_processor() target to test resource mapping
+
+annotation_processor(name='processor',
+  sources=globs('*.java'),
+  processors=['com.pants.testproject.annotation.processor.ResourceMappingProcessor'],
+  dependencies=[
+    '3rdparty:guava',
+  ],
+)
+