forked from sony/nmos-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex_utils.h
195 lines (176 loc) · 6.65 KB
/
regex_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#ifndef CPPREST_REGEX_UTILS_H
#define CPPREST_REGEX_UTILS_H
#include <map>
#include "bst/regex.h"
// An implementation of named capture on top of bst::basic_regex (could be extracted from the cpprest module)
namespace xregex
{
// parse_regex_named_sub_matches parses a regular expression that is appropriate for bst::regex_match, etc.
// with the addition of handling the common extension to specify named sub_matches of the form (?<name>...)
// returning a regex with the name-specifier removed, and a map from each name to its sub_match index
// See http://xregexp.com/syntax/named_capture_comparison/
template <typename Char>
using string_t = std::basic_string<Char>;
template <typename Char>
using ostringstream_t = std::basic_ostringstream<Char>;
template <typename Char>
using regex_t = bst::basic_regex<Char>;
template <typename Char>
using smatch_t = bst::match_results<typename string_t<Char>::const_iterator>;
template <typename Char>
using sub_match_t = bst::sub_match<typename string_t<Char>::const_iterator>;
template <typename Char>
using named_sub_matches_t = std::map<string_t<Char>, typename smatch_t<Char>::size_type>;
template <typename Char>
using regex_named_sub_matches_t = std::pair<string_t<Char>, named_sub_matches_t<Char>>;
namespace regex_specials
{
const char escape = '\\';
const char sub_match_start = '(';
const char sub_match_finish = ')';
const char sub_match_extension = '?';
const char sub_match_name_start = '<';
const char sub_match_name_finish = '>';
const char sub_match_positive_lookahead = '=';
const char sub_match_negative_lookahead = '!';
const char sub_match_non_marking = ':';
}
template <typename Char>
string_t<Char> make_named_sub_match(const string_t<Char>& name, const string_t<Char>& sub_match)
{
using namespace ::xregex::regex_specials;
ostringstream_t<Char> os;
// hmm, sanity-check that name doesn't include sub_match_name_finish?
os << sub_match_start << sub_match_extension << sub_match_name_start << name << sub_match_name_finish << sub_match << sub_match_finish;
return os.str(); // i.e. (?<name>sub_match)
}
template <typename Char>
regex_named_sub_matches_t<Char> parse_regex_named_sub_matches(const string_t<Char>& regex)
{
using namespace regex_specials;
enum state_t { normal, escaped, sub_match, extended_sub_match, named_sub_match, unknown_extension };
regex_named_sub_matches_t<Char> result;
typename smatch_t<Char>::size_type sub_match_count = 0;
string_t<Char> sub_match_name;
state_t state = normal;
for (auto ch : regex)
{
bool copy = true;
switch (state)
{
case normal:
switch (ch)
{
case escape:
state = escaped;
break;
case sub_match_start:
++sub_match_count;
state = sub_match;
break;
default:
//state = normal;
break;
}
break;
case escaped:
state = normal;
break;
case sub_match:
switch (ch)
{
case sub_match_extension:
copy = false;
state = extended_sub_match;
break;
// else identical to normal transitions
case escape:
state = escaped;
break;
case sub_match_start:
++sub_match_count;
state = sub_match;
break;
default:
state = normal;
break;
}
break;
case extended_sub_match:
switch (ch)
{
case sub_match_name_start:
copy = false;
state = named_sub_match;
break;
case sub_match_positive_lookahead:
case sub_match_negative_lookahead:
case sub_match_non_marking:
// See http://en.cppreference.com/w/cpp/regex/ecmascript#Assertions
// and http://en.cppreference.com/w/cpp/regex/ecmascript#Atoms
result.first.push_back(sub_match_extension);
--sub_match_count;
state = normal;
break;
default:
result.first.push_back(sub_match_extension);
state = unknown_extension;
break;
}
break;
case named_sub_match:
copy = false;
switch (ch)
{
case sub_match_name_finish:
result.second[sub_match_name] = sub_match_count;
sub_match_name.clear();
state = normal;
break;
default:
sub_match_name.push_back(ch);
//state = named_sub_match;
break;
}
break;
case unknown_extension:
switch (ch)
{
case sub_match_finish:
state = normal;
break;
default:
//state = unknown_extension;
break;
}
break;
default: // unreachable
break;
}
if (copy)
{
result.first.push_back(ch);
}
}
return result;
}
}
#include "cpprest/details/basic_types.h"
// An implementation of named capture for utility::char_t
namespace utility
{
typedef ::xregex::regex_t<char_t> regex_t; // uregex
typedef ::xregex::smatch_t<char_t> smatch_t; // usmatch
typedef ::xregex::sub_match_t<char_t> sub_match_t; // ussub_match
typedef ::xregex::named_sub_matches_t<char_t> named_sub_matches_t;
typedef ::xregex::regex_named_sub_matches_t<char_t> regex_named_sub_matches_t;
inline string_t make_named_sub_match(const string_t& name, const string_t& sub_match)
{
return ::xregex::make_named_sub_match(name, sub_match);
}
inline regex_named_sub_matches_t parse_regex_named_sub_matches(const string_t& regex)
{
return ::xregex::parse_regex_named_sub_matches(regex);
}
}
#endif