Skip to content

Commit

Permalink
Add support for "hasIndices" RegExp flag (facebook#968)
Browse files Browse the repository at this point in the history
Summary:
This change adds support for `hasIndices` RegExp flag, according to ES2022 specifications.

References:
https://262.ecma-international.org/13.0/#sec-regexp-regular-expression-objects
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices

This change is also related to this issue: facebook#932

Pull Request resolved: facebook#968

Test Plan:
``` javascript
var str = "The Quick Brown Fox Jumps Over The Lazy Brown Dog";
var regex = /quick\s(?<color>brown).+?(?<action>jumps).+?(?<where>over_)*/dgi;
var res = regex.exec(str);

print(Object.getOwnPropertyNames(res)); // 0,1,2,3,length,index,input,groups,indices

print(Object.getOwnPropertyNames(res.indices)); // 0,1,2,3,length,groups
print(res.indices[0]); // 4,26
print(res.indices[1]); // 10,15
print(res.indices[2]); // 20,25
print(res.indices[3]); // undefined

print(Object.getOwnPropertyNames(res.indices.groups)); // color,action,where
print(res.indices.groups.constructor); // undefined
print(res.indices.groups.color); // 10,15
print(res.indices.groups.action); // 20,25
print(res.indices.groups.where); // undefined
```

Reviewed By: neildhar

Differential Revision: D48396577

Pulled By: fbmal7

fbshipit-source-id: 11f2e52630d3ddbc7e34c3cdbcc80849df458bc5
  • Loading branch information
fabioh8010 authored and facebook-github-bot committed Aug 29, 2023
1 parent 3b5784a commit 2afc7b0
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 23 deletions.
4 changes: 2 additions & 2 deletions include/hermes/BCGen/HBC/BytecodeVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ namespace hermes {
namespace hbc {

// Bytecode version generated by this version of the compiler.
// Updated: Mar 22, 2023
const static uint32_t BYTECODE_VERSION = 95;
// Updated: Aug 16, 2023
const static uint32_t BYTECODE_VERSION = 96;

} // namespace hbc
} // namespace hermes
Expand Down
30 changes: 22 additions & 8 deletions include/hermes/Regex/RegexTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ class SyntaxFlags {
MULTILINE = 1 << 2,
UCODE = 1 << 3,
DOTALL = 1 << 4,
STICKY = 1 << 5
STICKY = 1 << 5,
INDICES = 1 << 6,
};

public:
Expand All @@ -233,6 +234,7 @@ class SyntaxFlags {
uint8_t unicode : 1;
uint8_t dotAll : 1;
uint8_t sticky : 1;
uint8_t hasIndices : 1;

/// \return a byte representing the flags. Bits are set based on the offsets
/// specified above. This is used for serialising the flags to bytecode.
Expand All @@ -250,6 +252,8 @@ class SyntaxFlags {
ret |= STICKY;
if (dotAll)
ret |= DOTALL;
if (hasIndices)
ret |= INDICES;
return ret;
}

Expand All @@ -270,27 +274,32 @@ class SyntaxFlags {
ret.sticky = 1;
if (byte & DOTALL)
ret.dotAll = 1;
if (byte & INDICES)
ret.hasIndices = 1;
return ret;
}

/// \return a string representing the flags
/// The characters are returned in the order given in ES 6 21.2.5.3
/// (specifically global, ignoreCase, multiline, unicode, sticky)
/// Note this may differ in order from the string passed in construction
llvh::SmallString<6> toString() const {
llvh::SmallString<6> result;
/// The characters are returned in the order given in ES2022 22.2.5.4
/// (specifically hasIndices, global, ignoreCase, multiline, dotAll, unicode,
/// sticky) Note this may differ in order from the string passed in
/// construction
llvh::SmallString<7> toString() const {
llvh::SmallString<7> result;
if (hasIndices)
result.push_back('d');
if (global)
result.push_back('g');
if (ignoreCase)
result.push_back('i');
if (multiline)
result.push_back('m');
if (dotAll)
result.push_back('s');
if (unicode)
result.push_back('u');
if (sticky)
result.push_back('y');
if (dotAll)
result.push_back('s');
return result;
}

Expand Down Expand Up @@ -334,6 +343,11 @@ class SyntaxFlags {
return error;
ret.dotAll = 1;
break;
case u'd':
if (ret.hasIndices)
return error;
ret.hasIndices = 1;
break;
default:
return error;
}
Expand Down
2 changes: 2 additions & 0 deletions include/hermes/VM/PredefinedStrings.def
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ STR(multiline, "multiline")
STR(unicode, "unicode")
STR(sticky, "sticky")
STR(dotAll, "dotAll")
STR(indices, "indices")
STR(hasIndices, "hasIndices")
STR(lastIndex, "lastIndex")
STR(dollar1, "$1")
STR(dollar2, "$2")
Expand Down
139 changes: 136 additions & 3 deletions lib/VM/JSLib/RegExp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Handle<JSObject> createRegExpConstructor(Runtime &runtime) {
defineGetter(proto, Predefined::unicode, regExpFlagPropertyGetter, 'u');
defineGetter(proto, Predefined::sticky, regExpFlagPropertyGetter, 'y');
defineGetter(proto, Predefined::dotAll, regExpFlagPropertyGetter, 's');
defineGetter(proto, Predefined::hasIndices, regExpFlagPropertyGetter, 'd');

defineGetter(cons, Predefined::dollar1, regExpDollarNumberGetter, 1);
defineGetter(cons, Predefined::dollar2, regExpDollarNumberGetter, 2);
Expand Down Expand Up @@ -460,6 +461,105 @@ static void createGroupsObject(
JSObject::setNamedSlotValueUnsafe(matchObj.get(), runtime, groupsDesc, shv);
}

/// ES2022 22.2.7.8 MakeMatchIndicesIndexPairArray
static CallResult<Handle<JSArray>> makeMatchIndicesIndexPairArray(
Runtime &runtime,
Handle<StringPrimitive> S,
RegExpMatch indices,
Handle<JSObject> mappingObj,
bool hasGroups) {
// 1. Let n be the number of elements in indices.
size_t n = indices.size();
// 2-4 skipped because mappingObj is not list, but an object in our
// implementation.
// 5. Let A be ! ArrayCreate(n).
auto arrRes = JSArray::create(runtime, n, n);
if (LLVM_UNLIKELY(arrRes == ExecutionStatus::EXCEPTION)) {
return ExecutionStatus::EXCEPTION;
}
Handle<JSArray> A = runtime.makeHandle<JSArray>(*arrRes);
JSArray::setStorageEndIndex(A, runtime, indices.size());

// 6-8. done later. We can't code exactly to spec here because mappingObj
// (roughly groupNames in the spec) is not an array, but an object containing
// group name and index.

MutableHandle<> matchIndexPair{runtime};
// 9. For each integer i such that 0 ≤ i < n, in ascending order, do
for (size_t i = 0; i < n; i++) {
// a. Let matchIndices be indices[i]
auto matchIndices = indices[i];
// b. If matchIndices is not undefined, then
if (matchIndices) {
// i. Let matchIndexPair be GetMatchIndexPair(S, matchIndices).
auto matchIndexPairRes = JSArray::create(runtime, 2, 2);
if (LLVM_UNLIKELY(matchIndexPairRes == ExecutionStatus::EXCEPTION)) {
return ExecutionStatus::EXCEPTION;
}
auto pair = runtime.makeHandle<JSArray>(*matchIndexPairRes);
JSArray::setStorageEndIndex(pair, runtime, 2);
auto firstIdx =
SmallHermesValue::encodeNumberValue(matchIndices->location, runtime);
JSArray::unsafeSetExistingElementAt(*pair, runtime, 0, firstIdx);
auto secondIdx = SmallHermesValue::encodeNumberValue(
matchIndices->location + matchIndices->length, runtime);
JSArray::unsafeSetExistingElementAt(*pair, runtime, 1, secondIdx);
matchIndexPair = pair.getHermesValue();
} else {
// c. Else,
// i. Let matchIndexPair be undefined.
matchIndexPair = Runtime::getUndefinedValue();
}
// d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)),
// matchIndexPair).
auto shv = SmallHermesValue::encodeHermesValue(*matchIndexPair, runtime);
JSArray::unsafeSetExistingElementAt(*A, runtime, i, shv);
}

// This is done out of order. See note above.
// 6. If hasGroups is true, then
MutableHandle<> groups{runtime};
if (hasGroups) {
// a. Let groups be OrdinaryObjectCreate(null).
auto mappingObjClazz = runtime.makeHandle(mappingObj->getClass(runtime));
auto groupsRes = JSObject::create(
runtime, Runtime::makeNullHandle<JSObject>(), mappingObjClazz);
auto groupsObj = runtime.makeHandle(groupsRes.get());
HiddenClass::forEachProperty(
mappingObjClazz,
runtime,
[&](SymbolID id, NamedPropertyDescriptor desc) {
auto groupIdx =
JSObject::getNamedSlotValueUnsafe(*mappingObj, runtime, desc.slot)
.getNumber(runtime);
// 9.e. If i > 0 and groupNames[i - 1] is not undefined, then
// ii. Perform ! CreateDataPropertyOrThrow(groups, groupNames[i-1],
// matchIndexPair).
JSObject::setNamedSlotValueUnsafe(
*groupsObj, runtime, desc.slot, A->at(runtime, groupIdx));
});
groups = groupsObj.getHermesValue();
} else {
// 7. Else,
// a. Let groups be undefined.
groups = Runtime::getUndefinedValue();
}

// 8. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
if (LLVM_UNLIKELY(
JSObject::defineOwnProperty(
A,
runtime,
Predefined::getSymbolID(Predefined::groups),
DefinePropertyFlags::getDefaultNewPropertyFlags(),
groups) == ExecutionStatus::EXCEPTION)) {
return ExecutionStatus::EXCEPTION;
}

// 10. Return A.
return A;
}

// ES6 21.2.5.2.2
CallResult<Handle<JSArray>> directRegExpExec(
Handle<JSRegExp> regexp,
Expand Down Expand Up @@ -488,10 +588,13 @@ CallResult<Handle<JSArray>> directRegExpExec(

// If flags contains "g", let global be true, else let global be false
// If flags contains "y", let sticky be true, else let sticky be false.
// If flags contains "d", let hasIndices be true, else let hasIndices be
// false.
// If flags contains "u", let fullUnicode be true, else let fullUnicode be
// false.
const bool global = flags.global;
const bool sticky = flags.sticky;
const bool hasIndices = flags.hasIndices;
const bool fullUnicode = flags.unicode;

// If global is false and sticky is false, set lastIndex to 0.
Expand Down Expand Up @@ -595,6 +698,10 @@ CallResult<Handle<JSArray>> directRegExpExec(
auto inputSHV = SmallHermesValue::encodeStringValue(*S, runtime);
JSObject::setNamedSlotValueUnsafe(*A, runtime, inputDesc, inputSHV);

// If R contains any GroupName, then let hasGroups be true.
Handle<JSObject> groupNames = regexp->getGroupNameMappings(runtime);
bool hasGroups = (bool)groupNames;

// Set capture groups (including the initial full match)
size_t idx = 0;
auto marker = gcScope.createMarker();
Expand All @@ -618,7 +725,30 @@ CallResult<Handle<JSArray>> directRegExpExec(
}
idx++;
}
createGroupsObject(runtime, A, regexp->getGroupNameMappings(runtime));

// If hasIndices is true, then
if (hasIndices) {
// Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices,
// groupNames, hasGroups).
auto indicesArray = makeMatchIndicesIndexPairArray(
runtime, S, match, groupNames, hasGroups);
if (LLVM_UNLIKELY(indicesArray == ExecutionStatus::EXCEPTION)) {
return ExecutionStatus::EXCEPTION;
}

// Perform ! CreateDataPropertyOrThrow(A, "indices", indicesArray).
if (LLVM_UNLIKELY(
JSObject::defineOwnProperty(
A,
runtime,
Predefined::getSymbolID(Predefined::indices),
DefinePropertyFlags::getDefaultNewPropertyFlags(),
*indicesArray) == ExecutionStatus::EXCEPTION)) {
return ExecutionStatus::EXCEPTION;
}
}

createGroupsObject(runtime, A, groupNames);
return A;
}

Expand Down Expand Up @@ -833,6 +963,8 @@ regExpFlagPropertyGetter(void *ctx, Runtime &runtime, NativeArgs args) {
return HermesValue::encodeBoolValue(syntaxFlags.sticky);
case 's':
return HermesValue::encodeBoolValue(syntaxFlags.dotAll);
case 'd':
return HermesValue::encodeBoolValue(syntaxFlags.hasIndices);
default:
llvm_unreachable("Invalid flag passed to regExpFlagPropertyGetter");
return HermesValue::encodeEmptyValue();
Expand Down Expand Up @@ -2073,7 +2205,7 @@ regExpPrototypeSymbolSplit(void *, Runtime &runtime, NativeArgs args) {
return A.getHermesValue();
}

// ES9 21.2.5.4
// ES2022 22.2.5.4
// Note that we don't yet support unicode.
CallResult<HermesValue>
regExpFlagsGetter(void *ctx, Runtime &runtime, NativeArgs args) {
Expand All @@ -2085,11 +2217,12 @@ regExpFlagsGetter(void *ctx, Runtime &runtime, NativeArgs args) {
"RegExp.prototype.flags getter called on non-object");
}

llvh::SmallString<5> result;
llvh::SmallString<7> result;
static const struct FlagProp {
char flagChar;
Predefined::Str name;
} flagProps[] = {
{'d', Predefined::hasIndices},
{'g', Predefined::global},
{'i', Predefined::ignoreCase},
{'m', Predefined::multiline},
Expand Down
52 changes: 43 additions & 9 deletions test/hermes/regexp.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,14 @@ print(re.lastIndex);
(function() {
"use strict";
re = RegExp("abc", "")
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.lastIndex);
// CHECK-NEXT: false false false false 0
re = RegExp("abc", "igym")
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.lastIndex);
// CHECK-NEXT: true true true true 0
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.hasIndices, re.lastIndex);
// CHECK-NEXT: false false false false false 0
re = RegExp("abc", "igymd")
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.hasIndices, re.lastIndex);
// CHECK-NEXT: true true true true true 0
re = RegExp("abc", "gi")
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.lastIndex);
// CHECK-NEXT: true true false false 0
print(re.global, re.ignoreCase, re.multiline, re.sticky, re.hasIndices, re.lastIndex);
// CHECK-NEXT: true true false false false 0
try { re.global = false; } catch (err) { print(err.name); } // not writable
// CHECK-NEXT: TypeError
try { re.ignoreCase = false; } catch (err) { print(err.name); } // not writable
Expand All @@ -204,6 +204,8 @@ try { re.multiline = false; } catch (err) { print(err.name); } // not writable
// CHECK-NEXT: TypeError
try { re.sticky = false; } catch (err) { print(err.name); } // not writable
// CHECK-NEXT: TypeError
try { re.hasIndices = false; } catch (err) { print(err.name); } // not writable
// CHECK-NEXT: TypeError
re.lastIndex = 42; // yes writable
print(re.global, re.ignoreCase, re.multiline, re.lastIndex);
// CHECK-NEXT: true true false 42
Expand All @@ -216,6 +218,7 @@ var ignoreCaseGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'ignore
var multilineGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'multiline').get;
var stickyGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky').get;
var dotAllGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'dotAll').get;
var hasIndicesGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'hasIndices').get;
print(globalGetter.call(/abc/g), globalGetter.call(/abc/), globalGetter.call(RegExp.prototype));
// CHECK-NEXT: true false undefined
print(ignoreCaseGetter.call(/abc/i), ignoreCaseGetter.call(/abc/), ignoreCaseGetter.call(RegExp.prototype));
Expand All @@ -226,6 +229,8 @@ print(stickyGetter.call(/abc/y), stickyGetter.call(/abc/), stickyGetter.call(Reg
// CHECK-NEXT: true false undefined
print(dotAllGetter.call(/abc/s), dotAllGetter.call(/abc/), dotAllGetter.call(RegExp.prototype));
// CHECK-NEXT: true false undefined
print(hasIndicesGetter.call(/abc/d), hasIndicesGetter.call(/abc/), hasIndicesGetter.call(RegExp.prototype));
// CHECK-NEXT: true false undefined
try { multilineGetter.call({}); } catch (err) { print(err.name); }
// CHECK-NEXT: TypeError
try { multilineGetter.call(undefined); } catch (err) { print(err.name); }
Expand All @@ -237,8 +242,8 @@ print(/aaa/.flags.length);
// CHECK-NEXT: 0
print(/aaa/mi.flags, /aaa/im.flags, /aaa/ig.flags, /aaa/gi.flags, /aaa/gim.flags, /aaa/mgi.flags, /aaa/m.flags, /aaa/g.flags, /aaa/i.flags, /aaa/y.flags, /aaa/s.flags);
// CHECK-NEXT: im im gi gi gim gim m g i y s
print(/aaa/igsmyu.flags);
// CHECK-NEXT: gimsuy
print(/aaa/igsdmyu.flags);
// CHECK-NEXT: dgimsuy

var flagsGetter = Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags').get;
print(flagsGetter.call({multiline: 1, global: 0, ignoreCase: "yep"}));
Expand Down Expand Up @@ -389,6 +394,35 @@ print("X".match(/(A{9999999999}B|X)*/ ));



// hasIndices support
var indicesRegexRes1 = /(a).(c)/d.exec("abcdef")
print(indicesRegexRes1.indices);
// CHECK-NEXT: 0,3,0,1,2,3
print(indicesRegexRes1.indices.groups);
// CHECK-NEXT: undefined

var indicesRegexRes2 = /(?<a_letter>a).(?<c_letter>c)/d.exec("abcdef")
print(indicesRegexRes2.indices);
// CHECK-NEXT: 0,3,0,1,2,3
print(Object.getOwnPropertyNames(indicesRegexRes2.indices.groups));
// CHECK-NEXT: a_letter,c_letter
print(indicesRegexRes2.indices.groups.a_letter);
// CHECK-NEXT: 0,1
print(indicesRegexRes2.indices.groups.c_letter);
// CHECK-NEXT: 2,3

var indicesRegexRes3 = /(?<a_letter>a)(?<h_letter>h)*/d.exec("abcdef")
print(indicesRegexRes3.indices);
// CHECK-NEXT: 0,1,0,1,
print(Object.getOwnPropertyNames(indicesRegexRes3.indices.groups));
// CHECK-NEXT: a_letter,h_letter
print(indicesRegexRes3.indices.groups.a_letter);
// CHECK-NEXT: 0,1
print(indicesRegexRes3.indices.groups.h_letter);
// CHECK-NEXT: undefined



// Source support
print(/abc/.source);
// CHECK-NEXT: abc
Expand Down
Loading

0 comments on commit 2afc7b0

Please sign in to comment.