forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.jl
135 lines (115 loc) · 4.25 KB
/
regex.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
## object-oriented Regex interface ##
include("pcre.jl")
const DEFAULT_OPTS = PCRE.JAVASCRIPT_COMPAT | PCRE.UTF8
immutable Regex
pattern::ByteString
options::Uint32
regex::Array{Uint8}
function Regex(pattern::String, options::Integer)
pattern = bytestring(pattern)
options = uint32(options)
if (options & ~PCRE.OPTIONS_MASK) != 0
error("invalid regex options: $options")
end
regex = PCRE.compile(pattern, options & PCRE.COMPILE_MASK)
new(pattern, options, regex)
end
end
function Regex(pattern::String, flags::String)
options = DEFAULT_OPTS
for f in flags
options |= f=='i' ? PCRE.CASELESS :
f=='m' ? PCRE.MULTILINE :
f=='s' ? PCRE.DOTALL :
f=='x' ? PCRE.EXTENDED :
error("unknown regex flag: $f")
end
Regex(pattern, options)
end
Regex(pattern::String) = Regex(pattern, DEFAULT_OPTS)
macro r_str(pattern, flags...) Regex(pattern, flags...) end
copy(r::Regex) = r
function show(io::IO, re::Regex)
imsx = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED
if (re.options & ~imsx) == DEFAULT_OPTS
print(io, 'r')
print_quoted_literal(io, re.pattern)
if (re.options & PCRE.CASELESS ) != 0; print(io, 'i'); end
if (re.options & PCRE.MULTILINE) != 0; print(io, 'm'); end
if (re.options & PCRE.DOTALL ) != 0; print(io, 's'); end
if (re.options & PCRE.EXTENDED ) != 0; print(io, 'x'); end
else
print(io, "Regex(")
show(io, re.pattern)
print(io, ',')
show(io, re.options)
print(io, ')')
end
end
# TODO: map offsets into non-ByteStrings back to original indices.
# or maybe it's better to just fail since that would be quite slow
immutable RegexMatch
match::ByteString
captures::Vector{Union(Nothing,ByteString)}
offset::Int
offsets::Vector{Int}
end
function show(io::IO, m::RegexMatch)
print(io, "RegexMatch(")
show(io, m.match)
if !isempty(m.captures)
print(io, ", ")
for i = 1:length(m.captures)
print(io, i, "=")
show(io, m.captures[i])
if i < length(m.captures)
print(io, ", ")
end
end
end
print(io, ")")
end
# TODO: add ismatch with an offset.
ismatch(r::Regex, s::String) =
PCRE.exec(r.regex, C_NULL, bytestring(s), 0, r.options & PCRE.EXECUTE_MASK, false)
function match(re::Regex, str::ByteString, idx::Integer)
opts = re.options & PCRE.EXECUTE_MASK
m, n = PCRE.exec(re.regex, C_NULL, str, idx-1, opts, true)
if isempty(m); return nothing; end
mat = str[m[1]+1:m[2]]
cap = Union(Nothing,ByteString)[
m[2i+1] < 0 ? nothing : str[m[2i+1]+1:m[2i+2]] for i=1:n ]
off = Int[ m[2i+1]::Int32+1 for i=1:n ]
RegexMatch(mat, cap, m[1]+1, off)
end
match(r::Regex, s::String) = match(r, s, start(s))
match(r::Regex, s::String, i::Integer) =
error("regex matching is only available for bytestrings; use bytestring(s) to convert")
function search(str::ByteString, re::Regex, idx::Integer)
len = length(str.data)
if idx >= len+2
return idx == len+2 ? (0:-1) : error(BoundsError)
end
opts = re.options & PCRE.EXECUTE_MASK
m, n = PCRE.exec(re.regex, C_NULL, str, idx-1, opts, true)
isempty(m) ? (0:-1) : ((m[1]+1):m[2])
end
search(s::String, r::Regex, idx::Integer) =
error("regex search is only available for bytestrings; use bytestring(s) to convert")
search(s::String, r::Regex) = search(s,r,start(s))
immutable RegexMatchIterator
regex::Regex
string::ByteString
overlap::Bool
end
start(itr::RegexMatchIterator) = match(itr.regex, itr.string)
done(itr::RegexMatchIterator, m) = m == nothing
next(itr::RegexMatchIterator, m) =
(m, match(itr.regex, itr.string, m.offset + (itr.overlap ? 1 : length(m.match))))
eachmatch(re::Regex, str::String, ovr::Bool) = RegexMatchIterator(re,str,ovr)
eachmatch(re::Regex, str::String) = RegexMatchIterator(re,str,false)
# miscellaneous methods that depend on Regex being defined
filter!(r::Regex, v) = filter!(x->ismatch(r,x), v)
filter(r::Regex, v) = filter(x->ismatch(r,x), v)
filter!(r::Regex, d::Dict) = filter!((k,v)->ismatch(r,k),d)
filter(r::Regex, d::Dict) = filter!(r,copy(d))