Skip to content

Commit

Permalink
[GR-40008] Treat RegExp identifiers as if they are in Unicode mode.
Browse files Browse the repository at this point in the history
PullRequest: graal/12437
  • Loading branch information
jirkamarsik committed Aug 29, 2022
2 parents 0cf94ee + b3bdb52 commit 96387f0
Showing 1 changed file with 10 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ private Token parseGroupBegin() throws RegexSyntaxException {

private int parseCodePointInGroupName() throws RegexSyntaxException {
if (consumingLookahead("\\u")) {
final int unicodeEscape = parseUnicodeEscapeChar();
final int unicodeEscape = parseUnicodeEscapeChar(true);
if (unicodeEscape < 0) {
throw syntaxError(ErrorMessages.INVALID_UNICODE_ESCAPE);
} else {
Expand All @@ -427,7 +427,7 @@ private int parseCodePointInGroupName() throws RegexSyntaxException {
return -1;
}
final char c = consumeChar();
return flags.isUnicode() && Character.isHighSurrogate(c) ? finishSurrogatePair(c) : c;
return Character.isHighSurrogate(c) ? finishSurrogatePair(c) : c;
}

/**
Expand Down Expand Up @@ -658,19 +658,22 @@ private CodePointSet parseUnicodeCharacterProperty(boolean invert) throws RegexS
/**
* Parse a {@code RegExpUnicodeEscapeSequence}, assuming that the prefix '&#92;u' has already
* been read.
*
* @param unicodeMode whether we are in Unicode mode, which allows '&#92;u{...} escapes and
* treats surrogate pairs as single code points
*
* @return the code point of the escaped character, or -1 if the escape was malformed
*/
private int parseUnicodeEscapeChar() throws RegexSyntaxException {
if (flags.isUnicode() && consumingLookahead("{")) {
private int parseUnicodeEscapeChar(boolean unicodeMode) throws RegexSyntaxException {
if (unicodeMode && consumingLookahead("{")) {
final int value = parseHex(1, Integer.MAX_VALUE, 0x10ffff, ErrorMessages.INVALID_UNICODE_ESCAPE);
if (!consumingLookahead("}")) {
throw syntaxError(ErrorMessages.INVALID_UNICODE_ESCAPE);
}
return value;
} else {
final int value = parseHex(4, 4, 0xffff, ErrorMessages.INVALID_UNICODE_ESCAPE);
if (flags.isUnicode() && Character.isHighSurrogate((char) value)) {
if (unicodeMode && Character.isHighSurrogate((char) value)) {
final int resetIndex = index;
if (consumingLookahead("\\u") && !lookahead("{")) {
final char lead = (char) value;
Expand Down Expand Up @@ -728,7 +731,7 @@ private int parseEscapeChar(char c, boolean inCharClass) throws RegexSyntaxExcep
advance();
return Character.toUpperCase(controlLetter) - ('A' - 1);
case 'u':
final int unicodeEscape = parseUnicodeEscapeChar();
final int unicodeEscape = parseUnicodeEscapeChar(flags.isUnicode());
return unicodeEscape < 0 ? c : unicodeEscape;
case 'x':
final int value = parseHex(2, 2, 0xff, ErrorMessages.INVALID_ESCAPE);
Expand All @@ -750,7 +753,7 @@ private int parseEscapeChar(char c, boolean inCharClass) throws RegexSyntaxExcep
}

private int finishSurrogatePair(char c) {
assert flags.isUnicode() && Character.isHighSurrogate(c);
assert Character.isHighSurrogate(c);
if (!atEnd() && Character.isLowSurrogate(curChar())) {
final char lead = c;
final char trail = consumeChar();
Expand Down

0 comments on commit 96387f0

Please sign in to comment.