forked from twitter/pants
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
modify matching logic in changed task
"changed" task works by matching changed file path with filespecs in target's "sources" field. It has its own matching logic which is here: https://github.com/pantsbuild/pants/blob/master/src/python/pants/source/wrapped_globs.py#L22-L34 This logic uses fnmatch module internally, which has 2 issues: 1. fnmatch does not distinguish between "**" and "*". 2. fnmatch matches "*" across directory boundry. For example, fnmatch('a/b/c.py', '*.py') will return True. The above 2 issues break "changed" task in 2 ways: 1. After https://rbcommons.com/s/twitter/r/4078/, v1 and v2 engine both match "**" with 0 or more dirs. That means, for rglobs('*.py'), only filespec '**/*.py' will be generated, but fnmatch will always match double star with 1 or more dirs. 2. For globs('*.py'), only py files in the current dir should be matched, but fnmatch will match files recursively in subdirs. This review fix the matching logic using regular expression and enhance the test cases. Note: the alternative will be matching file path with sources file list directly, but that may impact performance negatively. I would also like to hear about opinions from reviewers on this. Testing Done: https://travis-ci.org/pantsbuild/pants/builds/161758103 Test in Twitter's internal repo as well. Bugs closed: 3888 Reviewed at https://rbcommons.com/s/twitter/r/4248/
- Loading branch information
Showing
6 changed files
with
221 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# coding=utf-8 | ||
# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from __future__ import (absolute_import, division, generators, nested_scopes, print_function, | ||
unicode_literals, with_statement) | ||
|
||
import re | ||
|
||
|
||
def glob_to_regex(pattern): | ||
"""Given a glob pattern, return an equivalent regex expression. | ||
:param string glob: The glob pattern. "**" matches 0 or more dirs recursively. | ||
"*" only matches patterns in a single dir. | ||
:returns: A regex string that matches same paths as the input glob does. | ||
""" | ||
out = ['^'] | ||
components = pattern.strip('/').replace('.', '[.]').split('/') | ||
doublestar = False | ||
for component in components: | ||
if len(out) == 1: | ||
if pattern.startswith('/'): | ||
out.append('/') | ||
else: | ||
if not doublestar: | ||
out.append('/') | ||
|
||
if '**' in component: | ||
if component != '**': | ||
raise ValueError('Invalid usage of "**", use "*" instead.') | ||
|
||
if not doublestar: | ||
out.append('(([^/]+/)*)') | ||
doublestar = True | ||
else: | ||
out.append(component.replace('*', '[^/]*')) | ||
doublestar = False | ||
|
||
if doublestar: | ||
out.append('[^/]*') | ||
|
||
out.append('$') | ||
|
||
return ''.join(out) | ||
|
||
|
||
def globs_matches(path, patterns): | ||
return any(re.match(glob_to_regex(pattern), path) for pattern in patterns) | ||
|
||
|
||
def matches_filespec(path, spec): | ||
if spec is None: | ||
return False | ||
if not globs_matches(path, spec.get('globs', [])): | ||
return False | ||
for spec in spec.get('exclude', []): | ||
if matches_filespec(path, spec): | ||
return False | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# coding=utf-8 | ||
# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from __future__ import (absolute_import, division, generators, nested_scopes, print_function, | ||
unicode_literals, with_statement) | ||
|
||
import re | ||
import unittest | ||
|
||
from pants.source.filespec import glob_to_regex | ||
|
||
|
||
class GlobToRegexTest(unittest.TestCase): | ||
def assert_rule_match(self, glob, expected_matches, negate=False): | ||
if negate: | ||
asserter, match_state = self.assertIsNone, 'erroneously matches' | ||
else: | ||
asserter, match_state = self.assertIsNotNone, "doesn't match" | ||
|
||
regex = glob_to_regex(glob) | ||
for expected in expected_matches: | ||
asserter(re.match(regex, expected), 'glob_to_regex(`{}`) -> `{}` {} path `{}`' | ||
.format(glob, regex, match_state, expected)) | ||
|
||
def test_glob_to_regex_single_star_0(self): | ||
self.assert_rule_match('a/b/*/f.py', ('a/b/c/f.py', 'a/b/q/f.py')) | ||
|
||
def test_glob_to_regex_single_star_0_neg(self): | ||
self.assert_rule_match('a/b/*/f.py', ('a/b/c/d/f.py','a/b/f.py'), negate=True) | ||
|
||
def test_glob_to_regex_single_star_1(self): | ||
self.assert_rule_match('foo/bar/*', ('foo/bar/baz', 'foo/bar/bar')) | ||
|
||
def test_glob_to_regex_single_star_2(self): | ||
self.assert_rule_match('*/bar/b*', ('foo/bar/baz', 'foo/bar/bar')) | ||
|
||
def test_glob_to_regex_single_star_2_neg(self): | ||
self.assert_rule_match('*/bar/b*', ('foo/koo/bar/baz', 'foo/bar/bar/zoo'), negate=True) | ||
|
||
def test_glob_to_regex_single_star_3(self): | ||
self.assert_rule_match('/*/[be]*/b*', ('/foo/bar/baz', '/foo/bar/bar')) | ||
|
||
def test_glob_to_regex_single_star_4(self): | ||
self.assert_rule_match('/foo*/bar', ('/foofighters/bar', '/foofighters.venv/bar')) | ||
|
||
def test_glob_to_regex_single_star_4_neg(self): | ||
self.assert_rule_match('/foo*/bar', ('/foofighters/baz/bar',), negate=True) | ||
|
||
def test_glob_to_regex_double_star_0(self): | ||
self.assert_rule_match('**', ('a/b/c', 'a')) | ||
|
||
def test_glob_to_regex_double_star_1(self): | ||
self.assert_rule_match('a/**/f', ('a/f', 'a/b/c/d/e/f')) | ||
|
||
def test_glob_to_regex_double_star_2(self): | ||
self.assert_rule_match('a/b/**', ('a/b/c', 'a/b/c/d/e/f')) | ||
|
||
def test_glob_to_regex_double_star_2_neg(self): | ||
self.assert_rule_match('a/b/**', ('a/b'), negate=True) | ||
|
||
def test_glob_to_regex_leading_slash_0(self): | ||
self.assert_rule_match('/a/*', ('/a/a', '/a/b.py')) | ||
|
||
def test_glob_to_regex_leading_slash_0_neg(self): | ||
self.assert_rule_match('/a/*', ('a/a', 'a/b.py'), negate=True) | ||
|
||
def test_glob_to_regex_leading_slash_1(self): | ||
self.assert_rule_match('/*', ('/a', '/a.py')) | ||
|
||
def test_glob_to_regex_leading_slash_1_neg(self): | ||
self.assert_rule_match('/*', ('a', 'a.py'), negate=True) | ||
|
||
def test_glob_to_regex_leading_slash_2(self): | ||
self.assert_rule_match('/**', ('/a', '/a/b/c/d/e/f')) | ||
|
||
def test_glob_to_regex_leading_slash_2_neg(self): | ||
self.assert_rule_match('/**', ('a', 'a/b/c/d/e/f'), negate=True) | ||
|
||
def test_glob_to_regex_dots(self): | ||
self.assert_rule_match('.*', ('.pants.d', '.', '..', '.pids')) | ||
|
||
def test_glob_to_regex_dots_neg(self): | ||
self.assert_rule_match( | ||
'.*', | ||
('a', 'a/non/dot/dir/file.py', 'dist', 'all/nested/.dot', '.some/hidden/nested/dir/file.py'), | ||
negate=True | ||
) | ||
|
||
def test_glob_to_regex_dirs(self): | ||
self.assert_rule_match('dist/', ('dist',)) | ||
|
||
def test_glob_to_regex_dirs_neg(self): | ||
self.assert_rule_match('dist/', ('not_dist', 'cdist', 'dist.py', 'dist/dist'), negate=True) | ||
|
||
def test_glob_to_regex_dirs_dots(self): | ||
self.assert_rule_match( | ||
'build-support/*.venv/', | ||
('build-support/*.venv', | ||
'build-support/rbt.venv') | ||
) | ||
|
||
def test_glob_to_regex_dirs_dots_neg(self): | ||
self.assert_rule_match('build-support/*.venv/', | ||
('build-support/rbt.venv.but_actually_a_file',), | ||
negate=True) | ||
|
||
def test_glob_to_regex_literals(self): | ||
self.assert_rule_match('a', ('a',)) | ||
|
||
def test_glob_to_regex_literal_dir(self): | ||
self.assert_rule_match('a/b/c', ('a/b/c',)) | ||
|
||
def test_glob_to_regex_literal_file(self): | ||
self.assert_rule_match('a/b/c.py', ('a/b/c.py',)) |