Skip to content

Commit

Permalink
[engine] Proper implementation of ** globs in the v2 engine
Browse files Browse the repository at this point in the history
We don't have a proper implementation of trailing ** matching in the new engine.
Currently "**" matches 1 or more levels of dirs, while in gitignore syntax and also in zsh spec, "**" should match 0 or more levels of dirs.

For example, we expect "**" behaves like following:
** -> matches everything, recursively.
dir/** -> matches everything under dir, recursively.
**/*.py -> matches every dir recursively, searching for files with a .py extension (even in cwd).

This review does following:
1. implement "**" matching logic as specified above.
2. add test cases in test_fs.py and test_path_globs.py.
3. add some missing dependencies in test BUILD files.

Testing Done:
ci green:
https://travis-ci.org/pantsbuild/pants/builds/141203951

Bugs closed: 3413, 3589, 3608
Fixes: pantsbuild#3413
Fixes: pantsbuild#3589

Reviewed at https://rbcommons.com/s/twitter/r/4034/
  • Loading branch information
JieGhost authored and kwlzn committed Jul 1, 2016
1 parent 6ac25c8 commit c7cd71d
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 49 deletions.
47 changes: 28 additions & 19 deletions src/python/pants/engine/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from abc import abstractproperty
from fnmatch import fnmatch
from hashlib import sha1
from itertools import chain
from os import sep as os_sep
from os.path import basename, join, normpath

Expand Down Expand Up @@ -115,7 +116,6 @@ class PathGlob(AbstractClass):
"""

_DOUBLE = '**'
_SINGLE = '*'

@abstractproperty
def canonical_stat(self):
Expand All @@ -127,7 +127,7 @@ def symbolic_path(self):

@classmethod
def create_from_spec(cls, canonical_stat, symbolic_path, filespec):
"""Given a filespec, return a PathGlob object.
"""Given a filespec, return a tuple of PathGlob objects.
:param canonical_stat: A canonical Dir relative to the ProjectTree, to which the filespec
is relative.
Expand All @@ -141,24 +141,35 @@ def create_from_spec(cls, canonical_stat, symbolic_path, filespec):
parts = normpath(filespec).split(os_sep)
if canonical_stat == Dir('') and len(parts) == 1 and parts[0] == '.':
# A request for the root path.
return PathRoot()
return (PathRoot(),)
elif parts[0] == cls._DOUBLE and len(parts) == 1:
# Per https://git-scm.com/docs/gitignore:
#
# "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files inside
# directory "abc", relative to the location of the .gitignore file, with infinite depth."
#
return (PathDirWildcard(canonical_stat, symbolic_path, '*', '**'),
PathWildcard(canonical_stat, symbolic_path, '*'))
elif cls._DOUBLE in parts[0]:
if parts[0] != cls._DOUBLE:
raise ValueError(
'Illegal component "{}" in filespec under {}: {}'.format(
parts[0], symbolic_path, filespec))
raise ValueError('Illegal component "{}" in filespec under {}: {}'
.format(parts[0], symbolic_path, filespec))

# There is a double-wildcard in a dirname of the path: double wildcards are recursive,
# so there are two remainder possibilities: one with the double wildcard included, and the
# other without.
remainders = (join(*parts[1:]), join(*parts[0:]))
return PathDirWildcard(canonical_stat, symbolic_path, parts[0], remainders)
pathglob_with_doublestar = PathDirWildcard(canonical_stat, symbolic_path, '*', join(*parts[0:]))
if len(parts) == 2:
pathglob_no_doublestar = PathWildcard(canonical_stat, symbolic_path, parts[1])
else:
pathglob_no_doublestar = PathDirWildcard(canonical_stat, symbolic_path, parts[1], join(*parts[2:]))
return (pathglob_with_doublestar, pathglob_no_doublestar)
elif len(parts) == 1:
# This is the path basename.
return PathWildcard(canonical_stat, symbolic_path, parts[0])
return (PathWildcard(canonical_stat, symbolic_path, parts[0]),)
else:
# This is a path dirname.
remainders = (join(*parts[1:]),)
return PathDirWildcard(canonical_stat, symbolic_path, parts[0], remainders)
return (PathDirWildcard(canonical_stat, symbolic_path, parts[0], join(*parts[1:])),)


class PathRoot(datatype('PathRoot', []), PathGlob):
Expand All @@ -177,10 +188,10 @@ class PathWildcard(datatype('PathWildcard', ['canonical_stat', 'symbolic_path',
"""A PathGlob matching a basename."""


class PathDirWildcard(datatype('PathDirWildcard', ['canonical_stat', 'symbolic_path', 'wildcard', 'remainders']), PathGlob):
class PathDirWildcard(datatype('PathDirWildcard', ['canonical_stat', 'symbolic_path', 'wildcard', 'remainder']), PathGlob):
"""A PathGlob matching a dirname.
Each remainders value is applied relative to each directory matched by the wildcard.
Remainder value is applied relative to each directory matched by the wildcard.
"""


Expand Down Expand Up @@ -223,8 +234,8 @@ def create(cls, relative_to, files=None, globs=None, rglobs=None, zglobs=None):

@classmethod
def create_from_specs(cls, relative_to, filespecs):
return cls(tuple(PathGlob.create_from_spec(Dir(relative_to), relative_to, filespec)
for filespec in filespecs))
path_globs = (PathGlob.create_from_spec(Dir(relative_to), relative_to, filespec) for filespec in filespecs)
return cls(tuple(chain.from_iterable(path_globs)))


class DirectoryListing(datatype('DirectoryListing', ['directory', 'dependencies', 'exists'])):
Expand Down Expand Up @@ -272,10 +283,8 @@ def apply_path_dir_wildcard(dirs, path_dir_wildcard):
sense that they will be relative to known-canonical subdirectories.
"""
# For each matching Path, create a PathGlob per remainder.
path_globs = tuple(PathGlob.create_from_spec(d.stat, d.path, remainder)
for d in dirs.dependencies
for remainder in path_dir_wildcard.remainders)
return PathGlobs(path_globs)
path_globs = (PathGlob.create_from_spec(d.stat, d.path, path_dir_wildcard.remainder) for d in dirs.dependencies)
return PathGlobs(tuple(chain.from_iterable(path_globs)))


def _zip_links(links, linked_paths):
Expand Down
4 changes: 4 additions & 0 deletions testprojects/tests/java/org/pantsbuild/build_parsing/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
jvm_app(
name='trailing_glob_doublestar',
bundles=[bundle(fileset=globs('test_dir/**'))]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# This (empty) directory is a dependency of a test, so we use a hidden file to allow git to track the directory.
1 change: 1 addition & 0 deletions tests/python/pants_test/engine/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ python_tests(
sources=['scheduler_test_base.py'],
dependencies=[
'src/python/pants/base:file_system_project_tree',
'src/python/pants/engine:engine',
'src/python/pants/engine:fs',
'src/python/pants/engine:parser',
'src/python/pants/engine:scheduler',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,11 @@ def test_exclude_rglobs(self):

def test_exclude_zglobs(self):
test_out = self._extract_exclude_output('exclude_zglobs')
# TODO: ZGlobs.to_filespecs does not match files in the current directory when a pattern
# starts with `**`; see: https://github.com/pantsbuild/pants/issues/3413
self.assertEquals(self.TEST_EXCLUDE_FILES - {'dir1/ab.py', 'dir1/aabb.py', 'dir1/dirdir1/ab.py'},
self.assertEquals(self.TEST_EXCLUDE_FILES - {'ab.py', 'aabb.py', 'dir1/ab.py', 'dir1/aabb.py', 'dir1/dirdir1/ab.py'},
test_out)

def test_exclude_composite(self):
test_out = self._extract_exclude_output('exclude_composite')
# TODO: ZGlobs.to_filespecs does not match files in the current directory when a pattern
# starts with `**`; see: https://github.com/pantsbuild/pants/issues/3413
self.assertEquals(self.TEST_EXCLUDE_FILES -
{'aaa.py', 'dir1/a.py', 'dir1/dirdir1/a.py'},
{'a.py', 'aaa.py', 'dir1/a.py', 'dir1/dirdir1/a.py'},
test_out)
10 changes: 10 additions & 0 deletions tests/python/pants_test/engine/legacy/test_list_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,13 @@ def test_list_nested_function_scopes(self):
pants_run.stdout_data.strip(),
'testprojects/tests/python/pants/build_parsing:test-nested-variable-access-in-function-call'
)

def test_list_parse_java_targets(self):
pants_run = self.do_command('list',
'testprojects/tests/java/org/pantsbuild/build_parsing::',
success=True,
enable_v2_engine=True)
self.assertRegexpMatches(
pants_run.stdout_data,
r'testprojects/tests/java/org/pantsbuild/build_parsing:trailing_glob_doublestar'
)
85 changes: 65 additions & 20 deletions tests/python/pants_test/engine/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,34 +90,67 @@ def test_walk_siblings(self):

def test_walk_recursive(self):
self.assert_walk(Files, ['**/*.txt.ln'], ['a/4.txt.ln', 'd.ln/4.txt.ln'])
self.assert_walk(Files,
['**/*.txt'],
['a/3.txt', 'a/b/1.txt', 'c.ln/1.txt', 'd.ln/3.txt', 'd.ln/b/1.txt'])
self.assert_walk(Files,
['**/*.txt', '*.txt'],
['a/3.txt', 'a/b/1.txt', 'c.ln/1.txt', 'd.ln/3.txt', 'd.ln/b/1.txt', '4.txt'])
self.assert_walk(Files, ['**/*.txt'], ['4.txt',
'a/3.txt',
'a/b/1.txt',
'c.ln/1.txt',
'd.ln/3.txt',
'd.ln/b/1.txt'])
self.assert_walk(Files, ['**/*.txt', '*.txt'], ['a/3.txt',
'a/b/1.txt',
'c.ln/1.txt',
'd.ln/3.txt',
'd.ln/b/1.txt',
'4.txt'])
self.assert_walk(Files, ['**/3.t*t'], ['a/3.txt', 'd.ln/3.txt'])
self.assert_walk(Files, ['**/*.zzz'], [])

def test_walk_recursive_all(self):
self.assert_walk(Files, ['*', '**/*'], [
'4.txt',
'a/3.txt',
'a/4.txt.ln',
'a/b/1.txt',
'a/b/2',
'c.ln/1.txt',
'c.ln/2',
'd.ln/3.txt',
'd.ln/4.txt.ln',
'd.ln/b/1.txt',
'd.ln/b/2',
])
self.assert_walk(Files, ['*', '**/*'], ['4.txt',
'a/3.txt',
'a/4.txt.ln',
'a/b/1.txt',
'a/b/2',
'c.ln/1.txt',
'c.ln/2',
'd.ln/3.txt',
'd.ln/4.txt.ln',
'd.ln/b/1.txt',
'd.ln/b/2'])

def test_walk_recursive_trailing_greedy_doublestar(self):
self.assert_walk(Files, ['**'], ['4.txt',
'a/3.txt',
'a/4.txt.ln',
'a/b/1.txt',
'a/b/2',
'c.ln/1.txt',
'c.ln/2',
'd.ln/3.txt',
'd.ln/4.txt.ln',
'd.ln/b/1.txt',
'd.ln/b/2'])

def test_walk_recursive_trailing_doublestar(self):
self.assert_walk(Files, ['a/**'], ['a/3.txt',
'a/4.txt.ln',
'a/b/1.txt',
'a/b/2'])
self.assert_walk(Files, ['d.ln/**'], ['d.ln/3.txt',
'd.ln/4.txt.ln',
'd.ln/b/1.txt',
'd.ln/b/2'])
self.assert_walk(Dirs, ['a/**'], ['a/b'])

def test_walk_recursive_slash_doublestar_slash(self):
self.assert_walk(Files, ['a/**/3.txt'], ['a/3.txt'])
self.assert_walk(Files, ['a/**/b/1.txt'], ['a/b/1.txt'])
self.assert_walk(Files, ['a/**/2'], ['a/b/2'])

def test_walk_recursive_directory(self):
self.assert_walk(Dirs, ['*'], ['a', 'c.ln', 'd.ln'])
self.assert_walk(Dirs, ['*/*'], ['a/b', 'd.ln/b'])
self.assert_walk(Dirs, ['**/*'], ['a/b', 'd.ln/b'])
self.assert_walk(Dirs, ['**/*'], ['a', 'c.ln', 'd.ln', 'a/b', 'd.ln/b'])
self.assert_walk(Dirs, ['*/*/*'], [])

def test_files_content_literal(self):
Expand Down Expand Up @@ -204,3 +237,15 @@ def test_walk_recursive_all(self):
@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_files_content_literal(self):
super(GitFSTest, self).test_files_content_literal()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_greedy_doublestar_current(self):
super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar_current()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_greedy_doublestar(self):
super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_doublestar(self):
super(GitFSTest, self).test_walk_recursive_trailing_doublestar()
19 changes: 15 additions & 4 deletions tests/python/pants_test/engine/test_path_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_literal(self):
name = 'Blah.java'
self.assert_pg_equals([pw('', name)], '', [name])
self.assert_pg_equals([pw(subdir, name)], subdir, [name])
self.assert_pg_equals([pdw('', subdir, (name,))], '', [join(subdir, name)])
self.assert_pg_equals([pdw('', subdir, name)], '', [join(subdir, name)])

def test_wildcard(self):
name = '*.java'
Expand All @@ -45,7 +45,7 @@ def test_dir_wildcard(self):
name = 'Blah.java'
subdir = 'foo'
wildcard = '*'
self.assert_pg_equals([pdw(subdir, wildcard, (name,))],
self.assert_pg_equals([pdw(subdir, wildcard, name)],
subdir,
[join(wildcard, name)])

Expand All @@ -64,7 +64,18 @@ def test_recursive_dir_wildcard(self):
name = 'Blah.java'
subdir = 'foo'
wildcard = '**'
expected_remainders = (name, join(wildcard, name))
self.assert_pg_equals([pdw(subdir, wildcard, expected_remainders)],
self.assert_pg_equals([pdw(subdir, '*', join(wildcard, name)), pw(subdir, name)],
subdir,
[join(wildcard, name)])

def test_trailing_doublestar(self):
subdir = 'foo'
wildcard = '**'
self.assert_pg_equals([pdw(subdir, '*', wildcard), pw(subdir, '*')],
subdir,
[wildcard])

def test_invalid_wildcard(self):
subdir = 'foo'
self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc'])
self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc/xyz'])
1 change: 1 addition & 0 deletions tests/python/pants_test/pantsd/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ python_tests(
name = 'pantsd_integration',
sources = ['test_pantsd_integration.py'],
dependencies = [
'src/python/pants/pantsd:process_manager',
'tests/python/pants_test:int-test'
],
tags = {'integration'}
Expand Down

0 comments on commit c7cd71d

Please sign in to comment.