-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathjson_path.h
497 lines (402 loc) · 15.7 KB
/
json_path.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
#ifndef SQL_JSON_PATH_INCLUDED
#define SQL_JSON_PATH_INCLUDED
/* Copyright (c) 2015, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/**
@file json_path.h
This file contains interface support for the JSON path abstraction.
The path abstraction is described by the functional spec
attached to WL#7909.
*/
#include <assert.h>
#include <stddef.h>
#include <algorithm>
#include <new>
#include <string>
#include <utility>
#include "my_alloc.h" // MEM_ROOT
#include "mysql/psi/psi_memory.h"
#include "prealloced_array.h" // Prealloced_array
class String;
/** The type of a Json_path_leg. */
enum enum_json_path_leg_type {
/**
A path leg that represents a JSON object member (such as `.name`).
This path leg matches a single member in a JSON object.
*/
jpl_member,
/**
A path leg that represents a JSON array cell (such as `[10]`).
This path leg matches a single element in a JSON object.
*/
jpl_array_cell,
/**
A path leg that represents a range in a JSON array
(such as `[2 to 7]`).
*/
jpl_array_range,
/**
@brief A path leg that represents the member wildcard.
A path leg that represents the member wildcard (`.*`), which
matches all the members of a JSON object.
*/
jpl_member_wildcard,
/**
A path leg that represents the array wildcard (`[*]`), which
matches all the elements of a JSON array.
*/
jpl_array_cell_wildcard,
/**
A path leg that represents the ellipsis (`**`), which matches any
JSON value and recursively all the JSON values nested within it if
it is an object or an array.
*/
jpl_ellipsis
};
/**
A class that represents the index of an element in a JSON array. The
index is 0-based and relative to the beginning of the array.
*/
class Json_array_index final {
/**
The array index. It is 0 if the specified index was before the
first element of the array, or equal to the array length if the
specified index was after the last element of the array.
*/
size_t m_index;
/** True if the array index is within the bounds of the array. */
bool m_within_bounds;
public:
/**
Construct a new Json_array_index object representing the specified
position in an array of the given length.
@param index the array index
@param from_end true if @a index is relative to the end of the array
@param array_length the length of the array
*/
Json_array_index(size_t index, bool from_end, size_t array_length)
: m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
: std::min(index, array_length)),
m_within_bounds(index < array_length) {}
/**
Is the array index within the bounds of the array?
@retval true if the array index is within bounds
@retval false otherwise
*/
bool within_bounds() const { return m_within_bounds; }
/**
Get the position in the array pointed to by this array index.
If the index is out of bounds, 0 will be returned if the array
index is before the first element in the array, or a value equal
to the length of the array if the index is after the last element.
@return the position in the array (0-based index relative to the
start of the array)
*/
size_t position() const { return m_index; }
};
/**
One path leg in a JSON path expression.
A path leg describes either a key/value pair in an object
or a 0-based index into an array.
*/
class Json_path_leg final {
/// The type of this path leg.
enum_json_path_leg_type m_leg_type;
/// The index of an array cell, or the start of an array range.
size_t m_first_array_index = 0;
/// Is #m_first_array_index relative to the end of the array?
bool m_first_array_index_from_end = false;
/// The end (inclusive) of an array range.
size_t m_last_array_index = 0;
/// Is #m_last_array_index relative to the end of the array?
bool m_last_array_index_from_end = false;
/// The member name of a member path leg.
std::string m_member_name;
public:
/**
Construct a wildcard or ellipsis path leg.
@param leg_type the type of wildcard (#jpl_ellipsis,
#jpl_member_wildcard or #jpl_array_cell_wildcard)
*/
explicit Json_path_leg(enum_json_path_leg_type leg_type)
: m_leg_type(leg_type) {
assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
leg_type == jpl_array_cell_wildcard);
}
/**
Construct an array cell path leg.
@param index the 0-based index in the array,
relative to the beginning of the array
*/
explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
/**
Construct an array cell path leg.
@param index the 0-based index in the array
@param from_end true if @a index is relative to the end of the array
*/
Json_path_leg(size_t index, bool from_end)
: m_leg_type(jpl_array_cell),
m_first_array_index(index),
m_first_array_index_from_end(from_end) {}
/**
Construct an array range path leg.
@param idx1 the start index of the range, inclusive
@param idx1_from_end true if the start index is relative
to the end of the array
@param idx2 the last index of the range, inclusive
@param idx2_from_end true if the last index is relative
to the end of the array
*/
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
bool idx2_from_end)
: m_leg_type(jpl_array_range),
m_first_array_index(idx1),
m_first_array_index_from_end(idx1_from_end),
m_last_array_index(idx2),
m_last_array_index_from_end(idx2_from_end) {}
/**
Construct an object member path leg.
@param member_name the name of the object member
@param length the length of the member name
*/
Json_path_leg(const char *member_name, size_t length)
: m_leg_type(jpl_member), m_member_name(member_name, length) {}
/** Construct an object member path leg. */
Json_path_leg(const std::string &member_name)
: Json_path_leg(member_name.c_str(), member_name.length()) {}
/** Get the type of the path leg. */
enum_json_path_leg_type get_type() const { return m_leg_type; }
/** Get the member name of a ::jpl_member path leg. */
const std::string &get_member_name() const { return m_member_name; }
/** Turn into a human-readable string. */
bool to_string(String *buf) const;
/**
Is this path leg an auto-wrapping array accessor?
An auto-wrapping array accessor is an array accessor that matches
non-arrays by auto-wrapping them in a single-element array before doing
the matching.
This function returns true for any ::jpl_array_cell or ::jpl_array_range
path leg that would match the element contained in a single-element
array, and which therefore would also match non-arrays that have been
auto-wrapped in single-element arrays.
*/
bool is_autowrap() const;
/**
Get the first array cell pointed to by an array range, or the
array cell pointed to by an array cell index.
@param array_length the length of the array
*/
Json_array_index first_array_index(size_t array_length) const {
assert(m_leg_type == jpl_array_cell || m_leg_type == jpl_array_range);
return Json_array_index(m_first_array_index, m_first_array_index_from_end,
array_length);
}
/**
Get the last array cell pointed to by an array range. The range
includes this cell.
@param array_length the length of the array
*/
Json_array_index last_array_index(size_t array_length) const {
assert(m_leg_type == jpl_array_range);
return Json_array_index(m_last_array_index, m_last_array_index_from_end,
array_length);
}
/**
A structure that represents an array range.
*/
struct Array_range {
size_t m_begin; ///< Beginning of the range, inclusive.
size_t m_end; ///< End of the range, exclusive.
};
/**
Get the array range pointed to by a path leg of type
::jpl_array_range or ::jpl_array_cell_wildcard.
@param array_length the length of the array
*/
Array_range get_array_range(size_t array_length) const;
};
using Json_path_leg_pointers = Prealloced_array<const Json_path_leg *, 8>;
using Json_path_iterator = Json_path_leg_pointers::const_iterator;
/**
A path expression which can be used to seek to
a position inside a JSON value.
*/
class Json_seekable_path {
protected:
/** An array of pointers to the legs of the JSON path. */
Json_path_leg_pointers m_path_legs;
explicit Json_seekable_path(PSI_memory_key key);
public:
/** Return the number of legs in this searchable path */
size_t leg_count() const { return m_path_legs.size(); }
/** Get an iterator pointing to the first path leg. */
Json_path_iterator begin() const { return m_path_legs.begin(); }
/** Get an iterator pointing just past the last path leg. */
Json_path_iterator end() const { return m_path_legs.end(); }
/** Get a pointer to the last path leg. The path must not be empty. */
const Json_path_leg *last_leg() const { return m_path_legs.back(); }
};
/**
A JSON path expression.
From the user's point of view, a path expression is a string literal
with the following structure. We parse this structure into a
Json_path object:
pathExpression ::= scope pathLeg (pathLeg)*
scope ::= dollarSign
pathLeg ::= member | arrayLocation | doubleAsterisk
member ::= period (keyName | asterisk)
arrayLocation ::=
leftBracket
(arrayIndex | arrayRange | asterisk)
rightBracket
arrayIndex ::=
non-negative-integer |
last [ minus non-negative-integer ]
arrayRange ::= arrayIndex to arrayIndex
keyName ::= ECMAScript-identifier | ECMAScript-string-literal
doubleAsterisk ::= **
to ::= "to"
last ::= "last"
*/
class Json_path final : public Json_seekable_path {
private:
/**
A MEM_ROOT in which the Json_path_leg objects pointed to by
#Json_seekable_path::m_path_legs are allocated.
*/
MEM_ROOT m_mem_root;
/**
Key used to instrument memory usage.
*/
PSI_memory_key m_psi_key;
public:
explicit Json_path(PSI_memory_key key);
~Json_path() {
for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
}
/** Move constructor. */
Json_path(Json_path &&other)
: Json_seekable_path(other.m_psi_key),
m_mem_root(std::move(other.m_mem_root)),
m_psi_key(other.m_psi_key) {
// Move the contents of m_path_legs from other into this.
m_path_legs = std::move(other.m_path_legs);
/*
Must also make sure that other.m_path_legs is empty, so that we
don't end up destroying the same objects twice; once from this's
destructor and once from other's destructor.
Move-constructing a vector would usually leave "other" empty,
but it is not guaranteed. Furthermore, m_path_legs is a
Prealloced_array, not a std::vector, so often moving will mean
copying from one prealloced area to another instead of simply
swapping pointers to the backing array. (And at the time of
writing Prealloced_array doesn't even have a move-assignment
operator, so the above assignment will always copy and leave
"other" unchanged.)
*/
other.m_path_legs.clear();
}
/** Move assignment. */
Json_path &operator=(Json_path &&other) {
if (&other != this) {
this->~Json_path();
new (this) Json_path(std::move(other));
}
return *this;
}
/**
Add a path leg to the end of this path.
@param[in] leg the leg to add
@return false on success, true on error
*/
bool append(const Json_path_leg &leg) {
auto ptr = new (&m_mem_root) Json_path_leg(leg);
return ptr == nullptr || m_path_legs.push_back(ptr);
}
/**
Resets this to an empty path with no legs.
*/
void clear() {
// Destruct all the Json_path_leg objects, and clear the pointers to them.
for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
m_path_legs.clear();
// Mark the memory as ready for reuse.
m_mem_root.ClearForReuse();
}
/**
Return true if the path can match more than one value in a JSON document.
@retval true if the path contains a path leg which is a wildcard,
ellipsis or array range
@retval false otherwise
*/
bool can_match_many() const;
/** Turn into a human-readable string. */
bool to_string(String *buf) const;
};
/**
A lightweight path expression. This exists so that paths can be cloned
from the path legs of other paths without allocating heap memory
to copy those legs into. This class does not own the memory of the
Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
just points to Json_path_leg objects that belong to a Json_path instance.
*/
class Json_path_clone final : public Json_seekable_path {
public:
explicit Json_path_clone(PSI_memory_key key) : Json_seekable_path(key) {}
/**
Add a path leg to the end of this cloned path.
@param[in] leg the leg to add
@return false on success, true on error
*/
bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
/**
Resets this to an empty path with no legs.
*/
void clear() { m_path_legs.clear(); }
};
/**
Initialize a Json_path from a path expression.
Stops parsing on the first error. It initializes the Json_path and
returns false if the path is parsed successfully. Otherwise, it
returns false. In that case, the output bad_index argument will
contain an index into the path expression. The parsing failed near
that index.
@param[in] path_length The length of the path expression.
@param[in] path_expression The string form of the path expression.
@param[out] path The Json_path object to be initialized.
@param[out] bad_index If null is returned, the parsing failed around here.
@return false on success, true on error
*/
bool parse_path(size_t path_length, const char *path_expression,
Json_path *path, size_t *bad_index);
/**
A helper function that uses the above one as workhorse. Entry point for
for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
error if the path expression is syntactically incorrect. Raises an
error if the path expression contains wildcard tokens but is not
supposed to. Otherwise updates the supplied Json_path object with
the parsed path.
@param[in] path_value A String to be interpreted as a path.
@param[in] forbid_wildcards True if the path shouldn't contain * or **
@param[out] json_path The object that will hold the parsed path
@returns false on success (valid path or NULL), true on error
*/
bool parse_path(const String &path_value, bool forbid_wildcards,
Json_path *json_path);
#endif /* SQL_JSON_PATH_INCLUDED */