Skip to content

Commit

Permalink
modify matching logic in changed task
Browse files Browse the repository at this point in the history
"changed" task works by matching changed file path with filespecs in target's "sources" field. It has its own matching logic which is here:
https://github.com/pantsbuild/pants/blob/master/src/python/pants/source/wrapped_globs.py#L22-L34

This logic uses fnmatch module internally, which has 2 issues:
1. fnmatch does not distinguish between "**" and "*".
2. fnmatch matches "*" across directory boundry. For example, fnmatch('a/b/c.py', '*.py') will return True.

The above 2 issues break "changed" task in 2 ways:
1. After https://rbcommons.com/s/twitter/r/4078/, v1 and v2 engine both match "**" with 0 or more dirs. That means, for rglobs('*.py'), only filespec '**/*.py' will be generated, but fnmatch will always match double star with 1 or more dirs.
2. For globs('*.py'), only py files in the current dir should be matched, but fnmatch will match files recursively in subdirs.

This review fix the matching logic using regular expression and enhance the test cases.

Note:
the alternative will be matching file path with sources file list directly, but that may impact performance negatively. I would also like to hear about opinions from reviewers on this.

Testing Done:
https://travis-ci.org/pantsbuild/pants/builds/161758103

Test in Twitter's internal repo as well.

Bugs closed: 3888

Reviewed at https://rbcommons.com/s/twitter/r/4248/
  • Loading branch information
JieGhost committed Sep 21, 2016
1 parent eedb7cc commit 8565b63
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 21 deletions.
60 changes: 60 additions & 0 deletions src/python/pants/source/filespec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# coding=utf-8
# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import re


def glob_to_regex(pattern):
"""Given a glob pattern, return an equivalent regex expression.
:param string glob: The glob pattern. "**" matches 0 or more dirs recursively.
"*" only matches patterns in a single dir.
:returns: A regex string that matches same paths as the input glob does.
"""
out = ['^']
components = pattern.strip('/').replace('.', '[.]').split('/')
doublestar = False
for component in components:
if len(out) == 1:
if pattern.startswith('/'):
out.append('/')
else:
if not doublestar:
out.append('/')

if '**' in component:
if component != '**':
raise ValueError('Invalid usage of "**", use "*" instead.')

if not doublestar:
out.append('(([^/]+/)*)')
doublestar = True
else:
out.append(component.replace('*', '[^/]*'))
doublestar = False

if doublestar:
out.append('[^/]*')

out.append('$')

return ''.join(out)


def globs_matches(path, patterns):
return any(re.match(glob_to_regex(pattern), path) for pattern in patterns)


def matches_filespec(path, spec):
if spec is None:
return False
if not globs_matches(path, spec.get('globs', [])):
return False
for spec in spec.get('exclude', []):
if matches_filespec(path, spec):
return False
return True
3 changes: 2 additions & 1 deletion src/python/pants/source/payload_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from hashlib import sha1

from pants.base.payload_field import PayloadField
from pants.source.filespec import matches_filespec
from pants.source.source_root import SourceRootConfig
from pants.source.wrapped_globs import Files, FilesetWithSpec, matches_filespec
from pants.source.wrapped_globs import Files, FilesetWithSpec
from pants.util.memo import memoized_property


Expand Down
16 changes: 0 additions & 16 deletions src/python/pants/source/wrapped_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import fnmatch
import os
from abc import abstractmethod, abstractproperty
from hashlib import sha1
Expand All @@ -19,21 +18,6 @@
from pants.util.meta import AbstractClass


def globs_matches(path, patterns):
return any(fnmatch.fnmatch(path, pattern) for pattern in patterns)


def matches_filespec(path, spec):
if spec is None:
return False
if not globs_matches(path, spec.get('globs', [])):
return False
for spec in spec.get('exclude', []):
if matches_filespec(path, spec):
return False
return True


class FilesetWithSpec(AbstractClass):
"""A set of files that keeps track of how we got it."""

Expand Down
34 changes: 33 additions & 1 deletion tests/python/pants_test/core_tasks/test_what_changed.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@ def setUp(self):
)
"""))

self.add_to_build_file('root/src/java/b', dedent("""
java_library(
name='b_java',
sources=globs("*.java"),
)
"""))

self.add_to_build_file('root/3rdparty/BUILD.twitter', dedent("""
jar_library(
name='dummy',
Expand Down Expand Up @@ -396,6 +403,27 @@ def test_deferred_sources_new(self):
workspace=self.workspace(files=['root/proto/BUILD'])
)

def test_rglobs_in_sources(self):
self.assert_console_output(
'root/src/java/a:a_java',
workspace=self.workspace(files=['root/src/java/a/foo.java'])
)

self.assert_console_output(
'root/src/java/a:a_java',
workspace=self.workspace(files=['root/src/java/a/b/foo.java'])
)

def test_globs_in_sources(self):
self.assert_console_output(
'root/src/java/b:b_java',
workspace=self.workspace(files=['root/src/java/b/foo.java'])
)

self.assert_console_output(
workspace=self.workspace(files=['root/src/java/b/b/foo.java'])
)

def test_globs_in_resources(self):
self.add_to_build_file('root/resources', dedent("""
resources(
Expand All @@ -405,10 +433,14 @@ def test_globs_in_resources(self):
"""))

self.assert_console_output(
'root/resources:resources',
workspace=self.workspace(files=['root/resources/foo/bar/baz.yml'])
)

self.assert_console_output(
'root/resources:resources',
workspace=self.workspace(files=['root/resources/baz.yml'])
)

def test_root_config(self):
self.assert_console_output(
'//:pants-config',
Expand Down
14 changes: 11 additions & 3 deletions tests/python/pants_test/source/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_tests(
name = 'source_root',
sources = ['test_source_root.py'],
name = 'filespec',
sources = ['test_filespec.py'],
dependencies = [
'src/python/pants/source',
'tests/python/pants_test:base_test',
]
)

Expand All @@ -20,6 +19,15 @@ python_tests(
]
)

python_tests(
name = 'source_root',
sources = ['test_source_root.py'],
dependencies = [
'src/python/pants/source',
'tests/python/pants_test:base_test',
]
)

python_tests(
name = 'wrapped_globs',
sources = ['test_wrapped_globs.py'],
Expand Down
115 changes: 115 additions & 0 deletions tests/python/pants_test/source/test_filespec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# coding=utf-8
# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import re
import unittest

from pants.source.filespec import glob_to_regex


class GlobToRegexTest(unittest.TestCase):
def assert_rule_match(self, glob, expected_matches, negate=False):
if negate:
asserter, match_state = self.assertIsNone, 'erroneously matches'
else:
asserter, match_state = self.assertIsNotNone, "doesn't match"

regex = glob_to_regex(glob)
for expected in expected_matches:
asserter(re.match(regex, expected), 'glob_to_regex(`{}`) -> `{}` {} path `{}`'
.format(glob, regex, match_state, expected))

def test_glob_to_regex_single_star_0(self):
self.assert_rule_match('a/b/*/f.py', ('a/b/c/f.py', 'a/b/q/f.py'))

def test_glob_to_regex_single_star_0_neg(self):
self.assert_rule_match('a/b/*/f.py', ('a/b/c/d/f.py','a/b/f.py'), negate=True)

def test_glob_to_regex_single_star_1(self):
self.assert_rule_match('foo/bar/*', ('foo/bar/baz', 'foo/bar/bar'))

def test_glob_to_regex_single_star_2(self):
self.assert_rule_match('*/bar/b*', ('foo/bar/baz', 'foo/bar/bar'))

def test_glob_to_regex_single_star_2_neg(self):
self.assert_rule_match('*/bar/b*', ('foo/koo/bar/baz', 'foo/bar/bar/zoo'), negate=True)

def test_glob_to_regex_single_star_3(self):
self.assert_rule_match('/*/[be]*/b*', ('/foo/bar/baz', '/foo/bar/bar'))

def test_glob_to_regex_single_star_4(self):
self.assert_rule_match('/foo*/bar', ('/foofighters/bar', '/foofighters.venv/bar'))

def test_glob_to_regex_single_star_4_neg(self):
self.assert_rule_match('/foo*/bar', ('/foofighters/baz/bar',), negate=True)

def test_glob_to_regex_double_star_0(self):
self.assert_rule_match('**', ('a/b/c', 'a'))

def test_glob_to_regex_double_star_1(self):
self.assert_rule_match('a/**/f', ('a/f', 'a/b/c/d/e/f'))

def test_glob_to_regex_double_star_2(self):
self.assert_rule_match('a/b/**', ('a/b/c', 'a/b/c/d/e/f'))

def test_glob_to_regex_double_star_2_neg(self):
self.assert_rule_match('a/b/**', ('a/b'), negate=True)

def test_glob_to_regex_leading_slash_0(self):
self.assert_rule_match('/a/*', ('/a/a', '/a/b.py'))

def test_glob_to_regex_leading_slash_0_neg(self):
self.assert_rule_match('/a/*', ('a/a', 'a/b.py'), negate=True)

def test_glob_to_regex_leading_slash_1(self):
self.assert_rule_match('/*', ('/a', '/a.py'))

def test_glob_to_regex_leading_slash_1_neg(self):
self.assert_rule_match('/*', ('a', 'a.py'), negate=True)

def test_glob_to_regex_leading_slash_2(self):
self.assert_rule_match('/**', ('/a', '/a/b/c/d/e/f'))

def test_glob_to_regex_leading_slash_2_neg(self):
self.assert_rule_match('/**', ('a', 'a/b/c/d/e/f'), negate=True)

def test_glob_to_regex_dots(self):
self.assert_rule_match('.*', ('.pants.d', '.', '..', '.pids'))

def test_glob_to_regex_dots_neg(self):
self.assert_rule_match(
'.*',
('a', 'a/non/dot/dir/file.py', 'dist', 'all/nested/.dot', '.some/hidden/nested/dir/file.py'),
negate=True
)

def test_glob_to_regex_dirs(self):
self.assert_rule_match('dist/', ('dist',))

def test_glob_to_regex_dirs_neg(self):
self.assert_rule_match('dist/', ('not_dist', 'cdist', 'dist.py', 'dist/dist'), negate=True)

def test_glob_to_regex_dirs_dots(self):
self.assert_rule_match(
'build-support/*.venv/',
('build-support/*.venv',
'build-support/rbt.venv')
)

def test_glob_to_regex_dirs_dots_neg(self):
self.assert_rule_match('build-support/*.venv/',
('build-support/rbt.venv.but_actually_a_file',),
negate=True)

def test_glob_to_regex_literals(self):
self.assert_rule_match('a', ('a',))

def test_glob_to_regex_literal_dir(self):
self.assert_rule_match('a/b/c', ('a/b/c',))

def test_glob_to_regex_literal_file(self):
self.assert_rule_match('a/b/c.py', ('a/b/c.py',))

0 comments on commit 8565b63

Please sign in to comment.