forked from open-source-parsers/jsoncpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathantglob.py
201 lines (183 loc) · 7.66 KB
/
antglob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python
# encoding: utf-8
# Baptiste Lepilleur, 2009
from dircache import listdir
import re
import fnmatch
import os.path
# These fnmatch expressions are used by default to prune the directory tree
# while doing the recursive traversal in the glob_impl method of glob function.
prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
# These fnmatch expressions are used by default to exclude files and dirs
# while doing the recursive traversal in the glob_impl method of glob function.
##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
# while doing the recursive traversal in the glob_impl method of glob function.
default_excludes = '''
**/*~
**/#*#
**/.#*
**/%*%
**/._*
**/CVS
**/CVS/**
**/.cvsignore
**/SCCS
**/SCCS/**
**/vssver.scc
**/.svn
**/.svn/**
**/.git
**/.git/**
**/.gitignore
**/.bzr
**/.bzr/**
**/.hg
**/.hg/**
**/_MTN
**/_MTN/**
**/_darcs
**/_darcs/**
**/.DS_Store '''
DIR = 1
FILE = 2
DIR_LINK = 4
FILE_LINK = 8
LINKS = DIR_LINK | FILE_LINK
ALL_NO_LINK = DIR | FILE
ALL = DIR | FILE | LINKS
_ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
def ant_pattern_to_re( ant_pattern ):
"""Generates a regular expression from the ant pattern.
Matching convention:
**/a: match 'a', 'dir/a', 'dir1/dir2/a'
a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
*.py: match 'script.py' but not 'a/script.py'
"""
rex = ['^']
next_pos = 0
sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
## print 'Converting', ant_pattern
for match in _ANT_RE.finditer( ant_pattern ):
## print 'Matched', match.group()
## print match.start(0), next_pos
if match.start(0) != next_pos:
raise ValueError( "Invalid ant pattern" )
if match.group(1): # /**/
rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
elif match.group(2): # **/
rex.append( '(?:.*%s)?' % sep_rex )
elif match.group(3): # /**
rex.append( sep_rex + '.*' )
elif match.group(4): # *
rex.append( '[^/%s]*' % re.escape(os.path.sep) )
elif match.group(5): # /
rex.append( sep_rex )
else: # somepath
rex.append( re.escape(match.group(6)) )
next_pos = match.end()
rex.append('$')
return re.compile( ''.join( rex ) )
def _as_list( l ):
if isinstance(l, basestring):
return l.split()
return l
def glob(dir_path,
includes = '**/*',
excludes = default_excludes,
entry_type = FILE,
prune_dirs = prune_dirs,
max_depth = 25):
include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
dir_path = dir_path.replace('/',os.path.sep)
entry_type_filter = entry_type
def is_pruned_dir( dir_name ):
for pattern in prune_dirs:
if fnmatch.fnmatch( dir_name, pattern ):
return True
return False
def apply_filter( full_path, filter_rexs ):
"""Return True if at least one of the filter regular expression match full_path."""
for rex in filter_rexs:
if rex.match( full_path ):
return True
return False
def glob_impl( root_dir_path ):
child_dirs = [root_dir_path]
while child_dirs:
dir_path = child_dirs.pop()
for entry in listdir( dir_path ):
full_path = os.path.join( dir_path, entry )
## print 'Testing:', full_path,
is_dir = os.path.isdir( full_path )
if is_dir and not is_pruned_dir( entry ): # explore child directory ?
## print '===> marked for recursion',
child_dirs.append( full_path )
included = apply_filter( full_path, include_filter )
rejected = apply_filter( full_path, exclude_filter )
if not included or rejected: # do not include entry ?
## print '=> not included or rejected'
continue
link = os.path.islink( full_path )
is_file = os.path.isfile( full_path )
if not is_file and not is_dir:
## print '=> unknown entry type'
continue
if link:
entry_type = is_file and FILE_LINK or DIR_LINK
else:
entry_type = is_file and FILE or DIR
## print '=> type: %d' % entry_type,
if (entry_type & entry_type_filter) != 0:
## print ' => KEEP'
yield os.path.join( dir_path, entry )
## else:
## print ' => TYPE REJECTED'
return list( glob_impl( dir_path ) )
if __name__ == "__main__":
import unittest
class AntPatternToRETest(unittest.TestCase):
## def test_conversion( self ):
## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
def test_matching( self ):
test_cases = [ ( 'path',
['path'],
['somepath', 'pathsuffix', '/path', '/path'] ),
( '*.py',
['source.py', 'source.ext.py', '.py'],
['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
( '**/path',
['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
( 'path/**',
['path/a', 'path/path/a', 'path//'],
['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
( '/**/path',
['/path', '/a/path', '/a/b/path/path', '/path/path'],
['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
( 'a/b',
['a/b'],
['somea/b', 'a/bsuffix', 'a/b/c'] ),
( '**/*.py',
['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
['script.pyc', 'script.pyo', 'a.py/b'] ),
( 'src/**/*.py',
['src/a.py', 'src/dir/a.py'],
['a/src/a.py', '/src/a.py'] ),
]
for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
def local_path( paths ):
return [ p.replace('/',os.path.sep) for p in paths ]
test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
for ant_pattern, accepted_matches, rejected_matches in test_cases:
rex = ant_pattern_to_re( ant_pattern )
print 'ant_pattern:', ant_pattern, ' => ', rex.pattern
for accepted_match in accepted_matches:
print 'Accepted?:', accepted_match
self.assert_( rex.match( accepted_match ) is not None )
for rejected_match in rejected_matches:
print 'Rejected?:', rejected_match
self.assert_( rex.match( rejected_match ) is None )
unittest.main()