Skip to content
This repository has been archived by the owner on Dec 10, 2020. It is now read-only.

Commit

Permalink
Make more glob usages lazy; Pass FilesetWithSpec through source field…
Browse files Browse the repository at this point in the history
… validation, Make BundleProps.filemap lazy

One of the things that consumes a lot of time while parsing BUILD build files is loading the values of globs eagerly. Two of the places this happens are during sources field validation and generating the filemap for jvm app bundles.

This changes SourcesField so it allows FilesetWithSpec, ie globs, to be passed through without being converted into a list. This allows globs to remain unfulfilled until use rather than eagerly doing directory scanning on target initialization.

It also changes jvm_app's bundle so that its filemap is also lazily created rather than eagerly.

Testing Done:
Added a test case for passthrough, ran ci.

Travis passed https://travis-ci.org/pantsbuild/pants/builds/103508419

Bugs closed: 2794

Reviewed at https://rbcommons.com/s/twitter/r/3344/
  • Loading branch information
baroquebobcat committed Jan 20, 2016
1 parent d08e3e9 commit 2463f3a
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 29 deletions.
32 changes: 20 additions & 12 deletions src/python/pants/backend/jvm/targets/jvm_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pants.base.payload import Payload
from pants.base.payload_field import PayloadField, PrimitiveField, combine_hashes
from pants.build_graph.target import Target
from pants.util.memo import memoized_property


class RelativeToMapper(object):
Expand Down Expand Up @@ -61,7 +62,24 @@ def __repr__(self):
return 'DirectoryReMapper({0}, {1})'.format(self.base, self.dest)


BundleProps = namedtuple('_BundleProps', ['rel_path', 'mapper', 'filemap', 'fileset'])
class BundleProps(namedtuple('_BundleProps', ['rel_path', 'mapper', 'fileset'])):
@memoized_property
def filemap(self):
filemap = {}
if self.fileset is not None:
paths = self.fileset() if isinstance(self.fileset, Fileset) \
else self.fileset if hasattr(self.fileset, '__iter__') \
else [self.fileset]
for path in paths:
abspath = path
if not os.path.isabs(abspath):
abspath = os.path.join(get_buildroot(), self.rel_path, path)
filemap[abspath] = self.mapper(abspath)
return filemap

def __hash__(self):
# Leave out fileset from hash calculation since it may not be hashable.
return hash((self.rel_path, self.mapper))


class Bundle(object):
Expand Down Expand Up @@ -117,17 +135,7 @@ def __call__(self, rel_path=None, mapper=None, relative_to=None, fileset=None):
else:
mapper = mapper or RelativeToMapper(os.path.join(get_buildroot(), rel_path))

if fileset is not None:
paths = fileset() if isinstance(fileset, Fileset) \
else fileset if hasattr(fileset, '__iter__') \
else [fileset]
for path in paths:
abspath = path
if not os.path.isabs(abspath):
abspath = os.path.join(get_buildroot(), rel_path, path)
filemap[abspath] = mapper(abspath)

return BundleProps(self._rel_path, mapper, filemap, fileset)
return BundleProps(self._rel_path, mapper, fileset)


class BundleField(tuple, PayloadField):
Expand Down
7 changes: 2 additions & 5 deletions src/python/pants/base/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@


def assert_list(obj, expected_type=string_types, can_be_none=True, default=(), key_arg=None,
allowable=(list, Fileset, OrderedSet, set, tuple), allowable_add=(),
raise_type=ValueError):
allowable=(list, Fileset, OrderedSet, set, tuple), raise_type=ValueError):
"""
This function is used to ensure that parameters set by users in BUILD files are of acceptable types.
:param obj : the object that may be a list. It will pass if it is of type in allowable.
Expand All @@ -21,8 +20,6 @@ def assert_list(obj, expected_type=string_types, can_be_none=True, default=(), k
:param default : this is the default to return if can_be_none is True and obj is None.
:param key_arg : this is the name of the key to which obj belongs to
:param allowable : the acceptable types for obj. We do not want to allow any iterable (eg string).
:param allowable_add : additional acceptable types; pass this to add to the default allowable types,
rather than to replace them.
:param raise_type : the error to throw if the type is not correct.
"""
def get_key_msg(key=None):
Expand All @@ -31,7 +28,7 @@ def get_key_msg(key=None):
else:
return "In key '{}': ".format(key)

allowable = tuple(allowable) + tuple(allowable_add)
allowable = tuple(allowable)

key_msg = get_key_msg(key_arg)
val = obj
Expand Down
10 changes: 8 additions & 2 deletions src/python/pants/source/payload_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, sources_rel_path, sources, ref_address=None, filespec=None):
:param filespec: glob and exclude data that generated this set of sources
"""
self._rel_path = sources_rel_path
self._source_paths = assert_list(sources, key_arg='sources', allowable_add=(FilesetWithSpec,))
self._source_paths = self._validate_source_paths(sources)
self._ref_address = ref_address
self._filespec = filespec

Expand Down Expand Up @@ -87,6 +87,12 @@ def _compute_fingerprint(self):
hasher.update(f.read())
return hasher.hexdigest()

def _validate_source_paths(self, sources):
if isinstance(sources, FilesetWithSpec):
return sources
else:
return assert_list(sources, key_arg='sources')


class DeferredSourcesField(SourcesField):
"""A SourcesField that isn't populated immediately when the graph is constructed.
Expand Down Expand Up @@ -125,7 +131,7 @@ def populate(self, sources, rel_path=None):
raise self.AlreadyPopulatedError("Called with rel_path={rel_path} sources={sources}"
.format(rel_path=rel_path, sources=sources))
self._rel_path = rel_path
self._source_paths = assert_list(sources, key_arg='sources', allowable_add=(FilesetWithSpec,))
self._source_paths = self._validate_source_paths(sources)
self._populated = True

def _validate_populated(self):
Expand Down
11 changes: 1 addition & 10 deletions tests/python/pants_test/base/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,4 @@ def test_invalid_inputs_with_key_arg(self):
with self.assertRaisesRegexp(ValueError, "In key 'artifacts':"):
assert_list([["file3.txt"]], key_arg='artifacts') # All values most be strings
with self.assertRaisesRegexp(ValueError, "In key 'jars':"):
assert_list(None, can_be_none=False, key_arg='jars') # The default is ok as None only when can_be_noe is true

def test_additional_allowables(self):
def generator():
for x in range(11):
yield str(x)
assert_list(generator(), allowable_add=(GeneratorType,))
assert_list(['1', '2'], allowable_add=(GeneratorType,))
with self.assertRaises(ValueError):
assert_list(generator(), allowable_add=[])
assert_list(None, can_be_none=False, key_arg='jars') # The default is ok as None only when can_be_none is true
16 changes: 16 additions & 0 deletions tests/python/pants_test/source/test_payload_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
unicode_literals, with_statement)

from pants.source.payload_fields import SourcesField
from pants.source.wrapped_globs import FilesetWithSpec
from pants_test.base_test import BaseTest


Expand Down Expand Up @@ -69,3 +70,18 @@ def test_sources_field(self):
).fingerprint()

self.assertNotEqual(fp1, fp2)

def test_fails_on_invalid_sources_kwarg(self):
with self.assertRaises(ValueError):
SourcesField(sources_rel_path='anything',
sources='not-a-list')

def test_passes_fileset_with_spec_through(self):
self.create_file('foo/a.txt', 'a_contents')

fileset = FilesetWithSpec('foo', 'a.txt', lambda: ['a.txt'])
sf = SourcesField(sources_rel_path='foo',
sources=fileset)

self.assertIs(fileset, sf.source_paths)
self.assertEqual(['a.txt'], list(sf.source_paths))

0 comments on commit 2463f3a

Please sign in to comment.