Skip to content

Commit

Permalink
Support zinc name hashing.
Browse files Browse the repository at this point in the history
- Upgrade to a recent version of zinc.
- Support version 5 of the analysis serialization format.
- This version moves the CompileSetup sections from the end of
the file to the beginning.
- This version adds a "name hashing" section to the CompileSetup.
- Add a pants option to turn name-hashing on.
-  Fixed the implementation of is_nonempty_analysis(), which can no
longer simply look at a prefix (because the order of elements in the zinc
analysis file has now changed).

We had already added support for splitting/merging the analysis sections
used by name hashing, under the assumption that their structure and semantics
were the same as for their equivalent pre-name-hashing sections.  We had been
told by TypeSafe that this assumption was correct, but had never tested it.
I have now verified that it is indeed true.

Note that when name hashing is turned on, the member* and inheritance* sections
are populated by zinc INSTEAD OF the direct* and public* sections. However the
"used names" section is populated AS WELL AS the "class names" section. This means
that turning on name hashing will cause analysis files to be larger. Whether this
is significant, in particular wrt split/merge times, needs to be measured. I suspect
it should be OK, since split/merge of these sections is simple - they don't have
the complicated internalization/externalization logic.

Testing Done:
CI passes: https://travis-ci.org/pantsbuild/pants/builds/50907336

Reviewed at https://rbcommons.com/s/twitter/r/1779/
  • Loading branch information
Benjy committed Feb 25, 2015
1 parent 1da125c commit 2d9972a
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 141 deletions.
2 changes: 1 addition & 1 deletion BUILD.tools
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jar_library(name = 'benchmark-java-allocation-instrumenter-2.1',

jar_library(name = 'zinc',
jars = [
jar(org = 'com.typesafe.zinc', name = 'zinc', rev = '0.3.2-M1')
jar(org = 'com.typesafe.zinc', name = 'zinc', rev = '0.3.7')
.exclude(org = 'com.martiansoftware', name = 'nailgun-server')
.exclude(org = 'org.ensime', name = 'ensime-sbt-cmd')
])
Expand Down
37 changes: 26 additions & 11 deletions src/python/pants/backend/jvm/tasks/jvm_compile/analysis_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
unicode_literals, with_statement)

import os
import re

from pants.base.exceptions import TaskError

Expand All @@ -16,24 +17,29 @@ class ParseError(TaskError):

class AnalysisParser(object):
"""Parse a file containing representation of an analysis for some JVM language."""

def __init__(self, classes_dir):
self.classes_dir = classes_dir # The output dir for classes in this analysis.

@property
def empty_test_header(self):
"""The header of a section that will be nonempty iff the analysis is nonempty.
We look at this section to determine whether the analysis contains any useful data.
"""
raise NotImplementedError('Subclasses must implement.')

def is_nonempty_analysis(self, path):
"""Returns whether an analysis at a specified path is nontrivial."""
"""Does the specified analysis file contain information for at least one source file."""
if not os.path.exists(path):
return False
empty_prefix = self.empty_prefix()
with open(path, 'r') as infile:
prefix = infile.read(len(empty_prefix))
return prefix != empty_prefix

def empty_prefix(self):
"""Returns a prefix indicating a trivial analysis file.
I.e., this prefix is present at the begnning of an analysis file iff the analysis is trivial.
"""
raise NotImplementedError()
# Skip until we get to the section that will be nonempty iff the analysis is nonempty.
expected_header = '{0}:\n'.format(self.empty_test_header)
while infile.next() != expected_header:
pass
# Now see if this section is empty or not.
return self.parse_num_items(infile.next()) > 0

def parse_from_path(self, infile_path):
"""Parse an analysis instance from a text file."""
Expand Down Expand Up @@ -82,3 +88,12 @@ def parse_deps(self, infile, classpath_indexer):
All paths are absolute.
"""
raise NotImplementedError()

_num_items_re = re.compile(r'(\d+) items\n')

def parse_num_items(self, line):
"""Parse a line of the form '<num> items' and returns <num> as an int."""
matchobj = self._num_items_re.match(line)
if not matchobj:
raise ParseError('Expected: "<num> items". Found: "{0}"'.format(line))
return int(matchobj.group(1))
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
unicode_literals, with_statement)

import os
import re
from collections import defaultdict

from pants.backend.jvm.tasks.jvm_compile.analysis_parser import AnalysisParser, ParseError
Expand All @@ -17,12 +16,11 @@
class JMakeAnalysisParser(AnalysisParser):
"""Parse a file containing representation of an analysis for some JVM language."""

def empty_prefix(self):
return 'pcd entries:\n0 items\n'
empty_test_header = 'pcd entries'

def parse(self, infile):
self._expect_header(infile.readline(), 'pcd entries')
num_pcd_entries = self._parse_num_items(infile.readline())
num_pcd_entries = self.parse_num_items(infile.readline())
pcd_entries = []
for i in xrange(0, num_pcd_entries):
line = infile.readline()
Expand All @@ -35,7 +33,7 @@ def parse(self, infile):

def parse_products(self, infile):
self._expect_header(infile.readline(), 'pcd entries')
num_pcd_entries = self._parse_num_items(infile.readline())
num_pcd_entries = self.parse_num_items(infile.readline())
ret = defaultdict(list)
# Parse more efficiently than above, since we only care about
# the first two elements in the line.
Expand All @@ -52,7 +50,7 @@ def parse_deps(self, infile, classpath_indexer):
buildroot = get_buildroot()
classpath_elements_by_class = classpath_indexer()
self._expect_header(infile.readline(), 'pcd entries')
num_pcd_entries = self._parse_num_items(infile.readline())
num_pcd_entries = self.parse_num_items(infile.readline())
for _ in xrange(0, num_pcd_entries):
infile.readline() # Skip these lines.
src_to_deps = self._parse_deps_at_position(infile)
Expand All @@ -73,7 +71,7 @@ def parse_deps(self, infile, classpath_indexer):

def _parse_deps_at_position(self, infile):
self._expect_header(infile.readline(), 'dependencies')
num_deps = self._parse_num_items(infile.readline())
num_deps = self.parse_num_items(infile.readline())
src_to_deps = {}
for i in xrange(0, num_deps):
tpl = infile.readline().split('\t')
Expand All @@ -83,15 +81,6 @@ def _parse_deps_at_position(self, infile):
src_to_deps[src] = deps
return src_to_deps

num_items_re = re.compile(r'(\d+) items\n')

def _parse_num_items(self, line):
"""Parse a line of the form '<num> items' and returns <num> as an int."""
matchobj = JMakeAnalysisParser.num_items_re.match(line)
if not matchobj:
raise ParseError('Expected: "<num> items". Found: "%s"' % line)
return int(matchobj.group(1))

def _expect_header(self, line, header):
expected = header + ':\n'
if line != expected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def register_options(cls, register):
# Note: Used in ZincUtils.
# TODO: Revisit this. It's unintuitive for ZincUtils to reach back into the task for options.
register('--plugins', action='append', help='Use these scalac plugins.')
register('--name-hashing', action='store_true', default=False, help='Use zinc name hashing.')
ZincUtils.register_options(register, cls.register_jvm_tool)

def __init__(self, *args, **kwargs):
Expand Down
131 changes: 58 additions & 73 deletions src/python/pants/backend/jvm/tasks/jvm_compile/scala/zinc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
unicode_literals, with_statement)

import itertools
import json
import os
from collections import defaultdict

Expand Down Expand Up @@ -40,10 +39,6 @@ class ZincAnalysisElement(object):
# The section names for the sections in this element. Subclasses override.
headers = ()

@classmethod
def from_json_obj(cls, obj):
return cls([obj[header] for header in cls.headers])

def __init__(self, args):
# self.args is a list of maps from key to list of values. Each map corresponds to a
# section in the analysis file. E.g.,
Expand Down Expand Up @@ -136,7 +131,7 @@ class ZincAnalysis(Analysis):

# Implementation of class method required by Analysis.

FORMAT_VERSION_LINE = 'format version: 4\n'
FORMAT_VERSION_LINE = 'format version: 5\n'

@staticmethod
def merge_dicts(dicts):
Expand All @@ -153,6 +148,10 @@ def merge_dicts(dicts):
def merge(cls, analyses):
# Note: correctly handles "internalizing" external deps that must be internal post-merge.

# "Merge" compile setup. We assume that all merged analyses have the same setup, so we just take the
# setup of the first analysis. TODO: Validate that all analyses have the same setup.
compile_setup = analyses[0].compile_setup if len(analyses) > 0 else CompileSetup((defaultdict(list), ))

# Merge relations.
src_prod = ZincAnalysis.merge_dicts([a.relations.src_prod for a in analyses])
binary_dep = ZincAnalysis.merge_dicts([a.relations.binary_dep for a in analyses])
Expand Down Expand Up @@ -235,38 +234,37 @@ def merge_dependencies(internals, externals):
compilations_dict['%03d' % i] = [v]
compilations = Compilations((compilations_dict, ))

compile_setup = analyses[0].compile_setup if len(analyses) > 0 else CompileSetup((defaultdict(list), ))
return ZincAnalysis(relations, stamps, apis, source_infos, compilations, compile_setup)
return ZincAnalysis(compile_setup, relations, stamps, apis, source_infos, compilations)

def __init__(self, relations, stamps, apis, source_infos, compilations, compile_setup):
(self.relations, self.stamps, self.apis, self.source_infos, self.compilations, self.compile_setup) = \
(relations, stamps, apis, source_infos, compilations, compile_setup)
def __init__(self, compile_setup, relations, stamps, apis, source_infos, compilations):
(self.compile_setup, self.relations, self.stamps, self.apis, self.source_infos, self.compilations) = \
(compile_setup, relations, stamps, apis, source_infos, compilations)

def diff(self, other):
"""Returns a list of element diffs, one per element where self and other differ."""
element_diffs = []
for self_elem, other_elem in zip(
(self.relations, self.stamps, self.apis, self.source_infos,
self.compilations, self.compile_setup),
(other.relations, other.stamps, other.apis, other.source_infos,
other.compilations, other.compile_setup)):
(self.compile_setup, self.relations, self.stamps, self.apis,
self.source_infos, self.compilations),
(other.compile_setup, other.relations, other.stamps, other.apis,
other.source_infos, other.compilations)):
element_diff = self_elem.diff(other_elem)
if element_diff.is_different():
element_diffs.append(element_diff)
return element_diffs

def __eq__(self, other):
return (self.relations, self.stamps, self.apis, self.source_infos,
self.compilations, self.compile_setup) == \
(other.relations, other.stamps, other.apis, other.source_infos,
other.compilations, other.compile_setup)
return ((self.compile_setup, self.relations, self.stamps, self.apis,
self.source_infos, self.compilations) ==
(other.compile_setup, other.relations, other.stamps, other.apis,
other.source_infos, other.compilations))

def __ne__(self, other):
return not self.__eq__(other)

def __hash__(self):
return hash((self.relations, self.stamps, self.apis, self.source_infos,
self.compilations, self.compile_setup))
return hash((self.compile_setup, self.relations, self.stamps, self.apis,
self.source_infos, self.compilations))

# Implementation of methods required by Analysis.

Expand All @@ -284,9 +282,7 @@ def split(self, splits, catchall=False):
binary_dep_splits = self._split_dict(self.relations.binary_dep, splits)
classes_splits = self._split_dict(self.relations.classes, splits)

# For historical reasons, external deps are specified as src->class while internal deps are
# specified as src->src. So we pick a representative class for each src.
representatives = dict((k, min(vs)) for k, vs in self.relations.classes.items())
representatives = dict((k, self.representative(k, vs)) for k, vs in self.relations.classes.items())

def split_dependencies(all_internal, all_external):
internals = []
Expand Down Expand Up @@ -373,40 +369,30 @@ def split_dependencies(all_internal, all_external):

analyses = []
for relations, stamps, apis, source_infos in zip(relations_splits, stamps_splits, apis_splits, source_info_splits):
analyses.append(ZincAnalysis(relations, stamps, apis, source_infos, self.compilations, self.compile_setup))
analyses.append(ZincAnalysis(self.compile_setup, relations, stamps, apis, source_infos, self.compilations))

return analyses

def write(self, outfile, rebasings=None):
outfile.write(ZincAnalysis.FORMAT_VERSION_LINE)
self.compile_setup.write(outfile, inline_vals=True, rebasings=rebasings)
self.relations.write(outfile, rebasings=rebasings)
self.stamps.write(outfile, rebasings=rebasings)
self.apis.write(outfile, inline_vals=False, rebasings=rebasings)
self.source_infos.write(outfile, inline_vals=False, rebasings=rebasings)
self.compilations.write(outfile, inline_vals=True, rebasings=rebasings)
self.compile_setup.write(outfile, inline_vals=True, rebasings=rebasings)

# Extra methods on this class only.

# Anonymize the contents of this analysis. Useful for creating test data.
# Translate the contents of this analysis. Useful for creating anonymized test data.
# Note that the resulting file is not a valid analysis, as the base64-encoded serialized objects
# will be replaced with random base64 strings. So these are useful for testing analysis parsing,
# splitting and merging, but not for actually reading into Zinc.
def translate(self, token_translator):
for element in [self.relations, self.stamps, self.apis, self.source_infos,
self.compilations, self.compile_setup]:
for element in [self.compile_setup, self.relations, self.stamps, self.apis,
self.source_infos, self.compilations]:
element.translate(token_translator)

# Write this analysis to JSON.
def write_json_to_path(self, outfile_path):
with open(outfile_path, 'w') as outfile:
self.write_json(outfile)

def write_json(self, outfile):
obj = dict(zip(('relations', 'stamps', 'apis', 'source_infos', 'compilations', 'compile_setup'),
(self.relations, self.stamps, self.apis, self.source_infos, self.compilations, self.compile_setup)))
json.dump(obj, outfile, cls=ZincAnalysisJSONEncoder, sort_keys=True, indent=2)

def _split_dict(self, d, splits):
"""Split a dict by its keys.
Expand All @@ -422,6 +408,39 @@ def _split_dict(self, d, splits):
ret.append(dict_split)
return ret

def representative(self, src, classes):
"""Pick a representative class for each src.
For historical reasons, external deps are specified as src->class while internal deps are
specified as src->src. So when splitting we need to pick a representative. We must pick
consistently.
"""
primary_class_name = os.path.splitext(os.path.basename(src))[0]
for fqcn in classes:
if fqcn.rsplit('.', 1)[-1] == primary_class_name:
# For ease of debugging, pick the class with the same name as the source file, if it exists.
return primary_class_name
# Pick the class that sorts lowest in dictionary order.
return min(classes)


class CompileSetup(ZincAnalysisElement):
headers = ('output mode', 'output directories','compile options','javac options',
'compiler version', 'compile order', 'name hashing')

def __init__(self, args):
super(CompileSetup, self).__init__(args)
(self.output_mode, self.output_dirs, self.compile_options, self.javac_options,
self.compiler_version, self.compile_order, self.name_hashing) = self.args

def translate(self, token_translator):
self.translate_values(token_translator, self.output_dirs)
for k, vs in list(self.compile_options.items()): # Make a copy, so we can del as we go.
# Remove mentions of custom plugins.
for v in vs:
if v.startswith('-Xplugin') or v.startswith('-P'):
del self.compile_options[k]


class Relations(ZincAnalysisElement):
headers = ('products', 'binary dependencies',
Expand Down Expand Up @@ -513,37 +532,3 @@ def __init__(self, args):

def translate(self, token_translator):
pass


class CompileSetup(ZincAnalysisElement):
headers = ('output mode', 'output directories','compile options','javac options',
'compiler version', 'compile order')

def __init__(self, args):
super(CompileSetup, self).__init__(args)
(self.output_mode, self.output_dirs, self.compile_options, self.javac_options,
self.compiler_version, self.compile_order) = self.args

def translate(self, token_translator):
self.translate_values(token_translator, self.output_dirs)
for k, vs in list(self.compile_options.items()): # Make a copy, so we can del as we go.
# Remove mentions of custom plugins.
for v in vs:
if v.startswith('-Xplugin') or v.startswith('-P'):
del self.compile_options[k]


class ZincAnalysisJSONEncoder(json.JSONEncoder):
"""A custom encoder for writing analysis elements as JSON.
Not currently used, but might be useful in the future, e.g., for creating javascript-y
analysis browsing tools.
"""
def default(self, obj):
if isinstance(obj, ZincAnalysisElement):
ret = {}
for h, a in zip(type(obj).headers, obj.args):
ret[h] = a
return ret
else:
super(ZincAnalysisJSONEncoder, self).default(obj)
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __unicode__(self):
for header, arg_diff in self._arg_diffs.items():
if arg_diff.is_different():
parts.append('Section "%s" differs:\n' % header)
parts.append(arg_diff)
parts.append(str(arg_diff))
parts.append('\n\n')
return ''.join(parts) # '' is a unicode, so the entire result will be.

Expand Down
Loading

0 comments on commit 2d9972a

Please sign in to comment.