From c7cd71d25a7a1d1c499a2debc558c95fef352561 Mon Sep 17 00:00:00 2001 From: Yujie Chen Date: Fri, 1 Jul 2016 11:08:52 -0700 Subject: [PATCH] [engine] Proper implementation of `**` globs in the v2 engine We don't have a proper implementation of trailing ** matching in the new engine. Currently "**" matches 1 or more levels of dirs, while in gitignore syntax and also in zsh spec, "**" should match 0 or more levels of dirs. For example, we expect "**" behaves like following: ** -> matches everything, recursively. dir/** -> matches everything under dir, recursively. **/*.py -> matches every dir recursively, searching for files with a .py extension (even in cwd). This review does following: 1. implement "**" matching logic as specified above. 2. add test cases in test_fs.py and test_path_globs.py. 3. add some missing dependencies in test BUILD files. Testing Done: ci green: https://travis-ci.org/pantsbuild/pants/builds/141203951 Bugs closed: 3413, 3589, 3608 Fixes: #3413 Fixes: #3589 Reviewed at https://rbcommons.com/s/twitter/r/4034/ --- src/python/pants/engine/fs.py | 47 +++++----- .../java/org/pantsbuild/build_parsing/BUILD | 4 + .../pantsbuild/build_parsing/test_dir/.keep | 1 + tests/python/pants_test/engine/BUILD | 1 + .../engine/legacy/test_filemap_integration.py | 8 +- .../engine/legacy/test_list_integration.py | 10 +++ tests/python/pants_test/engine/test_fs.py | 85 ++++++++++++++----- .../pants_test/engine/test_path_globs.py | 19 ++++- tests/python/pants_test/pantsd/BUILD | 1 + 9 files changed, 127 insertions(+), 49 deletions(-) create mode 100644 testprojects/tests/java/org/pantsbuild/build_parsing/BUILD create mode 100644 testprojects/tests/java/org/pantsbuild/build_parsing/test_dir/.keep diff --git a/src/python/pants/engine/fs.py b/src/python/pants/engine/fs.py index 7bd138d5d7d..5fb1fa2c0e6 100644 --- a/src/python/pants/engine/fs.py +++ b/src/python/pants/engine/fs.py @@ -9,6 +9,7 @@ from abc import abstractproperty from fnmatch import fnmatch from hashlib import sha1 +from itertools import chain from os import sep as os_sep from os.path import basename, join, normpath @@ -115,7 +116,6 @@ class PathGlob(AbstractClass): """ _DOUBLE = '**' - _SINGLE = '*' @abstractproperty def canonical_stat(self): @@ -127,7 +127,7 @@ def symbolic_path(self): @classmethod def create_from_spec(cls, canonical_stat, symbolic_path, filespec): - """Given a filespec, return a PathGlob object. + """Given a filespec, return a tuple of PathGlob objects. :param canonical_stat: A canonical Dir relative to the ProjectTree, to which the filespec is relative. @@ -141,24 +141,35 @@ def create_from_spec(cls, canonical_stat, symbolic_path, filespec): parts = normpath(filespec).split(os_sep) if canonical_stat == Dir('') and len(parts) == 1 and parts[0] == '.': # A request for the root path. - return PathRoot() + return (PathRoot(),) + elif parts[0] == cls._DOUBLE and len(parts) == 1: + # Per https://git-scm.com/docs/gitignore: + # + # "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files inside + # directory "abc", relative to the location of the .gitignore file, with infinite depth." + # + return (PathDirWildcard(canonical_stat, symbolic_path, '*', '**'), + PathWildcard(canonical_stat, symbolic_path, '*')) elif cls._DOUBLE in parts[0]: if parts[0] != cls._DOUBLE: - raise ValueError( - 'Illegal component "{}" in filespec under {}: {}'.format( - parts[0], symbolic_path, filespec)) + raise ValueError('Illegal component "{}" in filespec under {}: {}' + .format(parts[0], symbolic_path, filespec)) + # There is a double-wildcard in a dirname of the path: double wildcards are recursive, # so there are two remainder possibilities: one with the double wildcard included, and the # other without. - remainders = (join(*parts[1:]), join(*parts[0:])) - return PathDirWildcard(canonical_stat, symbolic_path, parts[0], remainders) + pathglob_with_doublestar = PathDirWildcard(canonical_stat, symbolic_path, '*', join(*parts[0:])) + if len(parts) == 2: + pathglob_no_doublestar = PathWildcard(canonical_stat, symbolic_path, parts[1]) + else: + pathglob_no_doublestar = PathDirWildcard(canonical_stat, symbolic_path, parts[1], join(*parts[2:])) + return (pathglob_with_doublestar, pathglob_no_doublestar) elif len(parts) == 1: # This is the path basename. - return PathWildcard(canonical_stat, symbolic_path, parts[0]) + return (PathWildcard(canonical_stat, symbolic_path, parts[0]),) else: # This is a path dirname. - remainders = (join(*parts[1:]),) - return PathDirWildcard(canonical_stat, symbolic_path, parts[0], remainders) + return (PathDirWildcard(canonical_stat, symbolic_path, parts[0], join(*parts[1:])),) class PathRoot(datatype('PathRoot', []), PathGlob): @@ -177,10 +188,10 @@ class PathWildcard(datatype('PathWildcard', ['canonical_stat', 'symbolic_path', """A PathGlob matching a basename.""" -class PathDirWildcard(datatype('PathDirWildcard', ['canonical_stat', 'symbolic_path', 'wildcard', 'remainders']), PathGlob): +class PathDirWildcard(datatype('PathDirWildcard', ['canonical_stat', 'symbolic_path', 'wildcard', 'remainder']), PathGlob): """A PathGlob matching a dirname. - Each remainders value is applied relative to each directory matched by the wildcard. + Remainder value is applied relative to each directory matched by the wildcard. """ @@ -223,8 +234,8 @@ def create(cls, relative_to, files=None, globs=None, rglobs=None, zglobs=None): @classmethod def create_from_specs(cls, relative_to, filespecs): - return cls(tuple(PathGlob.create_from_spec(Dir(relative_to), relative_to, filespec) - for filespec in filespecs)) + path_globs = (PathGlob.create_from_spec(Dir(relative_to), relative_to, filespec) for filespec in filespecs) + return cls(tuple(chain.from_iterable(path_globs))) class DirectoryListing(datatype('DirectoryListing', ['directory', 'dependencies', 'exists'])): @@ -272,10 +283,8 @@ def apply_path_dir_wildcard(dirs, path_dir_wildcard): sense that they will be relative to known-canonical subdirectories. """ # For each matching Path, create a PathGlob per remainder. - path_globs = tuple(PathGlob.create_from_spec(d.stat, d.path, remainder) - for d in dirs.dependencies - for remainder in path_dir_wildcard.remainders) - return PathGlobs(path_globs) + path_globs = (PathGlob.create_from_spec(d.stat, d.path, path_dir_wildcard.remainder) for d in dirs.dependencies) + return PathGlobs(tuple(chain.from_iterable(path_globs))) def _zip_links(links, linked_paths): diff --git a/testprojects/tests/java/org/pantsbuild/build_parsing/BUILD b/testprojects/tests/java/org/pantsbuild/build_parsing/BUILD new file mode 100644 index 00000000000..5795231015e --- /dev/null +++ b/testprojects/tests/java/org/pantsbuild/build_parsing/BUILD @@ -0,0 +1,4 @@ +jvm_app( + name='trailing_glob_doublestar', + bundles=[bundle(fileset=globs('test_dir/**'))] +) diff --git a/testprojects/tests/java/org/pantsbuild/build_parsing/test_dir/.keep b/testprojects/tests/java/org/pantsbuild/build_parsing/test_dir/.keep new file mode 100644 index 00000000000..6f30d22d629 --- /dev/null +++ b/testprojects/tests/java/org/pantsbuild/build_parsing/test_dir/.keep @@ -0,0 +1 @@ +# This (empty) directory is a dependency of a test, so we use a hidden file to allow git to track the directory. diff --git a/tests/python/pants_test/engine/BUILD b/tests/python/pants_test/engine/BUILD index a94e4cd9108..2ab4aaba6e0 100644 --- a/tests/python/pants_test/engine/BUILD +++ b/tests/python/pants_test/engine/BUILD @@ -150,6 +150,7 @@ python_tests( sources=['scheduler_test_base.py'], dependencies=[ 'src/python/pants/base:file_system_project_tree', + 'src/python/pants/engine:engine', 'src/python/pants/engine:fs', 'src/python/pants/engine:parser', 'src/python/pants/engine:scheduler', diff --git a/tests/python/pants_test/engine/legacy/test_filemap_integration.py b/tests/python/pants_test/engine/legacy/test_filemap_integration.py index 59eb32ff3df..6160afc2af0 100644 --- a/tests/python/pants_test/engine/legacy/test_filemap_integration.py +++ b/tests/python/pants_test/engine/legacy/test_filemap_integration.py @@ -92,15 +92,11 @@ def test_exclude_rglobs(self): def test_exclude_zglobs(self): test_out = self._extract_exclude_output('exclude_zglobs') - # TODO: ZGlobs.to_filespecs does not match files in the current directory when a pattern - # starts with `**`; see: https://github.com/pantsbuild/pants/issues/3413 - self.assertEquals(self.TEST_EXCLUDE_FILES - {'dir1/ab.py', 'dir1/aabb.py', 'dir1/dirdir1/ab.py'}, + self.assertEquals(self.TEST_EXCLUDE_FILES - {'ab.py', 'aabb.py', 'dir1/ab.py', 'dir1/aabb.py', 'dir1/dirdir1/ab.py'}, test_out) def test_exclude_composite(self): test_out = self._extract_exclude_output('exclude_composite') - # TODO: ZGlobs.to_filespecs does not match files in the current directory when a pattern - # starts with `**`; see: https://github.com/pantsbuild/pants/issues/3413 self.assertEquals(self.TEST_EXCLUDE_FILES - - {'aaa.py', 'dir1/a.py', 'dir1/dirdir1/a.py'}, + {'a.py', 'aaa.py', 'dir1/a.py', 'dir1/dirdir1/a.py'}, test_out) diff --git a/tests/python/pants_test/engine/legacy/test_list_integration.py b/tests/python/pants_test/engine/legacy/test_list_integration.py index c38bd71a33a..75b91b77ea5 100644 --- a/tests/python/pants_test/engine/legacy/test_list_integration.py +++ b/tests/python/pants_test/engine/legacy/test_list_integration.py @@ -51,3 +51,13 @@ def test_list_nested_function_scopes(self): pants_run.stdout_data.strip(), 'testprojects/tests/python/pants/build_parsing:test-nested-variable-access-in-function-call' ) + + def test_list_parse_java_targets(self): + pants_run = self.do_command('list', + 'testprojects/tests/java/org/pantsbuild/build_parsing::', + success=True, + enable_v2_engine=True) + self.assertRegexpMatches( + pants_run.stdout_data, + r'testprojects/tests/java/org/pantsbuild/build_parsing:trailing_glob_doublestar' + ) diff --git a/tests/python/pants_test/engine/test_fs.py b/tests/python/pants_test/engine/test_fs.py index 6ab38cb482c..f364302ac93 100644 --- a/tests/python/pants_test/engine/test_fs.py +++ b/tests/python/pants_test/engine/test_fs.py @@ -90,34 +90,67 @@ def test_walk_siblings(self): def test_walk_recursive(self): self.assert_walk(Files, ['**/*.txt.ln'], ['a/4.txt.ln', 'd.ln/4.txt.ln']) - self.assert_walk(Files, - ['**/*.txt'], - ['a/3.txt', 'a/b/1.txt', 'c.ln/1.txt', 'd.ln/3.txt', 'd.ln/b/1.txt']) - self.assert_walk(Files, - ['**/*.txt', '*.txt'], - ['a/3.txt', 'a/b/1.txt', 'c.ln/1.txt', 'd.ln/3.txt', 'd.ln/b/1.txt', '4.txt']) + self.assert_walk(Files, ['**/*.txt'], ['4.txt', + 'a/3.txt', + 'a/b/1.txt', + 'c.ln/1.txt', + 'd.ln/3.txt', + 'd.ln/b/1.txt']) + self.assert_walk(Files, ['**/*.txt', '*.txt'], ['a/3.txt', + 'a/b/1.txt', + 'c.ln/1.txt', + 'd.ln/3.txt', + 'd.ln/b/1.txt', + '4.txt']) self.assert_walk(Files, ['**/3.t*t'], ['a/3.txt', 'd.ln/3.txt']) self.assert_walk(Files, ['**/*.zzz'], []) def test_walk_recursive_all(self): - self.assert_walk(Files, ['*', '**/*'], [ - '4.txt', - 'a/3.txt', - 'a/4.txt.ln', - 'a/b/1.txt', - 'a/b/2', - 'c.ln/1.txt', - 'c.ln/2', - 'd.ln/3.txt', - 'd.ln/4.txt.ln', - 'd.ln/b/1.txt', - 'd.ln/b/2', - ]) + self.assert_walk(Files, ['*', '**/*'], ['4.txt', + 'a/3.txt', + 'a/4.txt.ln', + 'a/b/1.txt', + 'a/b/2', + 'c.ln/1.txt', + 'c.ln/2', + 'd.ln/3.txt', + 'd.ln/4.txt.ln', + 'd.ln/b/1.txt', + 'd.ln/b/2']) + + def test_walk_recursive_trailing_greedy_doublestar(self): + self.assert_walk(Files, ['**'], ['4.txt', + 'a/3.txt', + 'a/4.txt.ln', + 'a/b/1.txt', + 'a/b/2', + 'c.ln/1.txt', + 'c.ln/2', + 'd.ln/3.txt', + 'd.ln/4.txt.ln', + 'd.ln/b/1.txt', + 'd.ln/b/2']) + + def test_walk_recursive_trailing_doublestar(self): + self.assert_walk(Files, ['a/**'], ['a/3.txt', + 'a/4.txt.ln', + 'a/b/1.txt', + 'a/b/2']) + self.assert_walk(Files, ['d.ln/**'], ['d.ln/3.txt', + 'd.ln/4.txt.ln', + 'd.ln/b/1.txt', + 'd.ln/b/2']) + self.assert_walk(Dirs, ['a/**'], ['a/b']) + + def test_walk_recursive_slash_doublestar_slash(self): + self.assert_walk(Files, ['a/**/3.txt'], ['a/3.txt']) + self.assert_walk(Files, ['a/**/b/1.txt'], ['a/b/1.txt']) + self.assert_walk(Files, ['a/**/2'], ['a/b/2']) def test_walk_recursive_directory(self): self.assert_walk(Dirs, ['*'], ['a', 'c.ln', 'd.ln']) self.assert_walk(Dirs, ['*/*'], ['a/b', 'd.ln/b']) - self.assert_walk(Dirs, ['**/*'], ['a/b', 'd.ln/b']) + self.assert_walk(Dirs, ['**/*'], ['a', 'c.ln', 'd.ln', 'a/b', 'd.ln/b']) self.assert_walk(Dirs, ['*/*/*'], []) def test_files_content_literal(self): @@ -204,3 +237,15 @@ def test_walk_recursive_all(self): @unittest.skip('https://github.com/pantsbuild/pants/issues/3281') def test_files_content_literal(self): super(GitFSTest, self).test_files_content_literal() + + @unittest.skip('https://github.com/pantsbuild/pants/issues/3281') + def test_walk_recursive_trailing_greedy_doublestar_current(self): + super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar_current() + + @unittest.skip('https://github.com/pantsbuild/pants/issues/3281') + def test_walk_recursive_trailing_greedy_doublestar(self): + super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar() + + @unittest.skip('https://github.com/pantsbuild/pants/issues/3281') + def test_walk_recursive_trailing_doublestar(self): + super(GitFSTest, self).test_walk_recursive_trailing_doublestar() diff --git a/tests/python/pants_test/engine/test_path_globs.py b/tests/python/pants_test/engine/test_path_globs.py index f76dc6be27c..36cbda13131 100644 --- a/tests/python/pants_test/engine/test_path_globs.py +++ b/tests/python/pants_test/engine/test_path_globs.py @@ -33,7 +33,7 @@ def test_literal(self): name = 'Blah.java' self.assert_pg_equals([pw('', name)], '', [name]) self.assert_pg_equals([pw(subdir, name)], subdir, [name]) - self.assert_pg_equals([pdw('', subdir, (name,))], '', [join(subdir, name)]) + self.assert_pg_equals([pdw('', subdir, name)], '', [join(subdir, name)]) def test_wildcard(self): name = '*.java' @@ -45,7 +45,7 @@ def test_dir_wildcard(self): name = 'Blah.java' subdir = 'foo' wildcard = '*' - self.assert_pg_equals([pdw(subdir, wildcard, (name,))], + self.assert_pg_equals([pdw(subdir, wildcard, name)], subdir, [join(wildcard, name)]) @@ -64,7 +64,18 @@ def test_recursive_dir_wildcard(self): name = 'Blah.java' subdir = 'foo' wildcard = '**' - expected_remainders = (name, join(wildcard, name)) - self.assert_pg_equals([pdw(subdir, wildcard, expected_remainders)], + self.assert_pg_equals([pdw(subdir, '*', join(wildcard, name)), pw(subdir, name)], subdir, [join(wildcard, name)]) + + def test_trailing_doublestar(self): + subdir = 'foo' + wildcard = '**' + self.assert_pg_equals([pdw(subdir, '*', wildcard), pw(subdir, '*')], + subdir, + [wildcard]) + + def test_invalid_wildcard(self): + subdir = 'foo' + self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc']) + self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc/xyz']) diff --git a/tests/python/pants_test/pantsd/BUILD b/tests/python/pants_test/pantsd/BUILD index 28226db1046..02d9f36e2f9 100644 --- a/tests/python/pants_test/pantsd/BUILD +++ b/tests/python/pants_test/pantsd/BUILD @@ -66,6 +66,7 @@ python_tests( name = 'pantsd_integration', sources = ['test_pantsd_integration.py'], dependencies = [ + 'src/python/pants/pantsd:process_manager', 'tests/python/pants_test:int-test' ], tags = {'integration'}