Skip to content

Commit

Permalink
RGlobs.to_filespec should generate filespecs that match git spec
Browse files Browse the repository at this point in the history
In v1 engine's rglobs implementation, '**' matches 1 or more dirs.

In v2 engine, we want to make rglobs follow git spec, which means '**' in BUILD file should match 0 or more dirs.

v2 engine used to treat '**' as matching 1 or more dirs. Thus for a pattern like 'a/**/*.py', RGlobs.to_filespec will generate 2 filespecs: 'a/**/*.py' and 'a/*.py', to make '**' from BUILD files match 0 or more dirs. However, after https://rbcommons.com/s/twitter/r/4034/, v2 engine started to match '**' with 0 or more dirs directly. The additional logic of handling '**' in RGlobs.to_filespec can be removed then.

In this patch,
1. RGlobs.to_filespec is simplified.
2. Add a logic in PathGlob.create to replace consecutive '**' with a single '**'.
3. Fix test failures caused by 1.

Testing Done:
ci green:
https://travis-ci.org/pantsbuild/pants/builds/144851041

Bugs closed: 3413, 3664

Reviewed at https://rbcommons.com/s/twitter/r/4078/
  • Loading branch information
JieGhost authored and stuhood committed Jul 15, 2016
1 parent 2f836ad commit cb799a7
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 94 deletions.
32 changes: 20 additions & 12 deletions src/python/pants/engine/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ def canonical_stat(self):
def symbolic_path(self):
"""The symbolic name (specific to the execution of this PathGlob) for the canonical_stat."""

@classmethod
def _prune_doublestar(cls, parts):
# This is called only when parts[0] == '**'.
# Eliminating consecutive '**'s can prevent engine from doing repetitive traversing.
idx = 1
while idx < len(parts) and cls._DOUBLE == parts[idx]:
idx += 1
return parts[0:1] + parts[idx:]

@classmethod
def create_from_spec(cls, canonical_stat, symbolic_path, filespec):
"""Given a filespec, return a tuple of PathGlob objects.
Expand All @@ -142,18 +151,17 @@ def create_from_spec(cls, canonical_stat, symbolic_path, filespec):
if canonical_stat == Dir('') and len(parts) == 1 and parts[0] == '.':
# A request for the root path.
return (PathRoot(),)
elif parts[0] == cls._DOUBLE and len(parts) == 1:
# Per https://git-scm.com/docs/gitignore:
#
# "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files inside
# directory "abc", relative to the location of the .gitignore file, with infinite depth."
#
return (PathDirWildcard(canonical_stat, symbolic_path, '*', '**'),
PathWildcard(canonical_stat, symbolic_path, '*'))
elif cls._DOUBLE in parts[0]:
if parts[0] != cls._DOUBLE:
raise ValueError('Illegal component "{}" in filespec under {}: {}'
.format(parts[0], symbolic_path, filespec))
elif cls._DOUBLE == parts[0]:
parts = cls._prune_doublestar(parts)

if len(parts) == 1:
# Per https://git-scm.com/docs/gitignore:
#
# "A trailing '/**' matches everything inside. For example, 'abc/**' matches all files inside
# directory "abc", relative to the location of the .gitignore file, with infinite depth."
#
return (PathDirWildcard(canonical_stat, symbolic_path, '*', '**'),
PathWildcard(canonical_stat, symbolic_path, '*'))

# There is a double-wildcard in a dirname of the path: double wildcards are recursive,
# so there are two remainder possibilities: one with the double wildcard included, and the
Expand Down
33 changes: 4 additions & 29 deletions src/python/pants/source/wrapped_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,8 @@ def rglobs_following_symlinked_dirs_by_default(*globspecs, **kw):
@classmethod
def to_filespec(cls, args, root='', excludes=None):
# In rglobs, * at the beginning of a path component means "any
# number of directories, including 0". Unfortunately, "**" in
# some other systems, e.g. git means "one or more directories".
# So every time we see ^* or **, we need to output both
# "**/whatever" and "whatever".
# number of directories, including 0". So every time we see ^*,
# we need to output "**/*whatever".
rglobs = []
for arg in args:
components = arg.split(os.path.sep)
Expand All @@ -279,37 +277,14 @@ def to_filespec(cls, args, root='', excludes=None):
out.append(component)
elif component[0] == '*':
if out and out[-1].startswith("**"):
# We want to translate **/*.py to **/*.py, not **/**/*.py
# We want to translate *.py to **/*.py, not **/**/*.py
out.append(component)
else:
out.append('**/' + component)
else:
out.append(component)

def rglob_path(beginning, rest):
if not rest:
return [beginning]
endings = []
for i, item in enumerate(rest):
if beginning and not beginning.endswith(os.path.sep):
beginning += os.path.sep
if item.startswith('**'):
# .../**/*.java
for ending in rglob_path(beginning + item, rest[i + 1:]):
endings.append(ending)
# .../*.java
for ending in rglob_path(beginning + item[3:], rest[i + 1:]):
endings.append(ending)
return endings
else:
if beginning and not beginning.endswith(os.path.sep):
beginning += os.path.sep + item
else:
beginning += item

return [beginning]

rglobs.extend(rglob_path('', out))
rglobs.append(os.path.join(*out))

return super(RGlobs, cls).to_filespec(rglobs, root=root, excludes=excludes)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def test_source_rglobs(self):
result = self.execute_export_json('src/python/y')

self.assertEqual(
{'globs': ['src/python/y/**/*.py', 'src/python/y/*.py']},
{'globs': ['src/python/y/**/*.py']},
result['targets']['src/python/y:y']['globs']
)

Expand All @@ -367,7 +367,7 @@ def test_source_rglobs_subdir(self):
result = self.execute_export_json('src/python/y:y2')

self.assertEqual(
{'globs': ['src/python/y/subdir/**/*.py', 'src/python/y/subdir/*.py']},
{'globs': ['src/python/y/subdir/**/*.py']},
result['targets']['src/python/y:y2']['globs']
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class FileSystemPantsIgnoreTest(unittest.TestCase, PantsIgnoreTestBase):
Special test cases can be defined here.
"""

def mk_project_tree(self, build_root, ignore_patterns=[]):
def mk_project_tree(self, build_root, ignore_patterns=None):
return FileSystemProjectTree(build_root, ignore_patterns)

def setUp(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/python/pants_test/base/test_pants_ignore_scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class ScmPantsIgnoreTest(unittest.TestCase, PantsIgnoreTestBase):
Special test cases can be defined here.
"""

def mk_project_tree(self, build_root, ignore_patterns=[]):
def mk_project_tree(self, build_root, ignore_patterns=None):
return ScmProjectTree(build_root, Git(worktree=build_root), 'HEAD', ignore_patterns)

def setUp(self):
Expand Down
38 changes: 10 additions & 28 deletions tests/python/pants_test/engine/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def assert_walk(self, ftype, filespecs, paths):
with self.mk_project_tree(self._original_src) as project_tree:
scheduler = self.mk_scheduler(project_tree=project_tree)
result = self.execute(scheduler, ftype, self.specs('', *filespecs))[0]
self.assertEquals(set([p.path for p in result.dependencies]), set(paths))
self.assertEquals(sorted([p.path for p in result.dependencies]), sorted(paths))

def assert_content(self, filespecs, expected_content):
with self.mk_project_tree(self._original_src) as project_tree:
Expand Down Expand Up @@ -96,29 +96,19 @@ def test_walk_recursive(self):
'c.ln/1.txt',
'd.ln/3.txt',
'd.ln/b/1.txt'])
self.assert_walk(Files, ['**/*.txt', '*.txt'], ['a/3.txt',
'a/b/1.txt',
'c.ln/1.txt',
'd.ln/3.txt',
'd.ln/b/1.txt',
'4.txt'])
self.assert_walk(Files, ['**/*.txt'], ['a/3.txt',
'a/b/1.txt',
'c.ln/1.txt',
'd.ln/3.txt',
'd.ln/b/1.txt',
'4.txt'])
self.assert_walk(Files, ['**/3.t*t'], ['a/3.txt', 'd.ln/3.txt'])
self.assert_walk(Files, ['**/*.zzz'], [])

def test_walk_single_star(self):
self.assert_walk(Files, ['*'], ['4.txt'])

def test_walk_recursive_all(self):
self.assert_walk(Files, ['*', '**/*'], ['4.txt',
'a/3.txt',
'a/4.txt.ln',
'a/b/1.txt',
'a/b/2',
'c.ln/1.txt',
'c.ln/2',
'd.ln/3.txt',
'd.ln/4.txt.ln',
'd.ln/b/1.txt',
'd.ln/b/2'])

def test_walk_recursive_trailing_greedy_doublestar(self):
self.assert_walk(Files, ['**'], ['4.txt',
'a/3.txt',
'a/4.txt.ln',
Expand Down Expand Up @@ -238,14 +228,6 @@ def test_walk_recursive_all(self):
def test_files_content_literal(self):
super(GitFSTest, self).test_files_content_literal()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_greedy_doublestar_current(self):
super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar_current()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_greedy_doublestar(self):
super(GitFSTest, self).test_walk_recursive_trailing_greedy_doublestar()

@unittest.skip('https://github.com/pantsbuild/pants/issues/3281')
def test_walk_recursive_trailing_doublestar(self):
super(GitFSTest, self).test_walk_recursive_trailing_doublestar()
9 changes: 6 additions & 3 deletions tests/python/pants_test/engine/test_path_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ def test_trailing_doublestar(self):
subdir,
[wildcard])

def test_invalid_wildcard(self):
def test_doublestar_mixed_wildcard(self):
subdir = 'foo'
self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc'])
self.assertRaises(ValueError, PathGlobs.create_from_specs, subdir, ['**abc/xyz'])
wildcard = '**abc'
name = 'Blah.java'

self.assert_pg_equals([pw(subdir, wildcard)], subdir, [wildcard])
self.assert_pg_equals([pdw(subdir, wildcard, name)], subdir, [join(wildcard, name)])
23 changes: 5 additions & 18 deletions tests/python/pants_test/source/test_wrapped_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,36 +83,23 @@ def test_rglob_to_spec_one(self):

def test_rglob_to_spec_simple(self):
self._spec_test('rglobs("*.java")',
{'globs': ['y/**/*.java', 'y/*.java']})
{'globs': ['y/**/*.java']})

def test_rglob_to_spec_multi(self):
self._spec_test('rglobs("a/**/b/*.java")',
{'globs': ['y/a/**/b/**/*.java',
'y/a/**/b/*.java',
'y/a/b/**/*.java',
'y/a/b/*.java']})
{'globs': ['y/a/**/b/**/*.java']})

def test_rglob_to_spec_multi_more(self):
self._spec_test('rglobs("a/**/b/**/c/*.java")',
{'globs': ['y/a/**/b/**/c/**/*.java',
'y/a/**/b/**/c/*.java',
'y/a/**/b/c/**/*.java',
'y/a/**/b/c/*.java',

'y/a/b/**/c/**/*.java',
'y/a/b/**/c/*.java',
'y/a/b/c/**/*.java',
'y/a/b/c/*.java']})
{'globs': ['y/a/**/b/**/c/**/*.java']})

def test_rglob_to_spec_mid(self):
self._spec_test('rglobs("a/**/b/Fleem.java")',
{'globs': ['y/a/**/b/Fleem.java',
'y/a/b/Fleem.java']})
{'globs': ['y/a/**/b/Fleem.java']})

def test_rglob_to_spec_explicit(self):
self._spec_test('rglobs("a/**/*.java")',
{'globs': ['y/a/**/*.java',
'y/a/*.java']})
{'globs': ['y/a/**/*.java']})

def test_glob_exclude(self):
self.add_to_build_file('y/BUILD', dedent("""
Expand Down

0 comments on commit cb799a7

Please sign in to comment.