Skip to content

Commit

Permalink
Bug 1362154: Part 1: Rewrite FlatStringReader r=mgaudet
Browse files Browse the repository at this point in the history
My initial shim of FlatStringReader used AutoCheckCannotGC, but the API we need to support doesn't actually require that. We only need to be able to get characters by index, which at most requires a HandleLinearString.

Getting rid of this AutoCheckCannotGC helps pave the way for allocating GC things while parsing regexps, which is a huge simplification for the FixedArray that irregexp uses to return named capture information.

Differential Revision: https://phabricator.services.mozilla.com/D76033
  • Loading branch information
iainireland committed May 20, 2020
1 parent ec9e227 commit f52b78c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 35 deletions.
16 changes: 8 additions & 8 deletions js/src/new-regexp/RegExpAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ static bool CheckPatternSyntaxImpl(JSContext* cx, FlatStringReader* pattern,
bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
const mozilla::Range<const char16_t> chars,
JS::RegExpFlags flags) {
FlatStringReader reader(chars.begin().get(), chars.length());
FlatStringReader reader(chars);
RegExpCompileData result;
if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
ReportSyntaxError(ts, result, chars.begin().get(), chars.length());
Expand All @@ -266,7 +266,7 @@ bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,

bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
HandleAtom pattern, JS::RegExpFlags flags) {
FlatStringReader reader(pattern);
FlatStringReader reader(cx, pattern);
RegExpCompileData result;
if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
ReportSyntaxError(ts, result, pattern);
Expand Down Expand Up @@ -324,18 +324,17 @@ static bool UseBoyerMoore(HandleAtom pattern, JS::AutoAssertNoGC& nogc) {
}

// Sample character frequency information for use in Boyer-Moore.
static void SampleCharacters(HandleLinearString input,
static void SampleCharacters(FlatStringReader* sample_subject,
RegExpCompiler& compiler) {
static const int kSampleSize = 128;
int chars_sampled = 0;

FlatStringReader sample_subject(input);
int length = sample_subject.length();
int length = sample_subject->length();

int half_way = (length - kSampleSize) / 2;
for (int i = std::max(0, half_way); i < length && chars_sampled < kSampleSize;
i++, chars_sampled++) {
compiler.frequency_collator()->CountCharacter(sample_subject.Get(i));
compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
}
}

Expand Down Expand Up @@ -463,7 +462,7 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,

RegExpCompileData data;
{
FlatStringReader patternBytes(pattern);
FlatStringReader patternBytes(cx, pattern);
if (!RegExpParser::ParseRegExp(cx->isolate, &zone, &patternBytes, flags,
&data)) {
JS::CompileOptions options(cx);
Expand Down Expand Up @@ -509,7 +508,8 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,

bool isLatin1 = input->hasLatin1Chars();

SampleCharacters(input, compiler);
FlatStringReader sample_subject(cx, input);
SampleCharacters(&sample_subject, compiler);
data.node = compiler.PreprocessRegExp(&data, flags, isLatin1);
data.error = AnalyzeRegExp(cx->isolate, isLatin1, data.node);
if (data.error != RegExpError::kNone) {
Expand Down
37 changes: 10 additions & 27 deletions js/src/new-regexp/regexp-shim.h
Original file line number Diff line number Diff line change
Expand Up @@ -860,46 +860,29 @@ inline Vector<const uc16> String::GetCharVector(
}

// A flat string reader provides random access to the contents of a
// string independent of the character width of the string. The handle
// must be valid as long as the reader is being used.
// Origin:
// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L807-L825
// string independent of the character width of the string.
class MOZ_STACK_CLASS FlatStringReader {
public:
FlatStringReader(JSLinearString* string)
: length_(string->length()),
is_latin1_(string->hasLatin1Chars()) {
FlatStringReader(JSContext* cx, js::HandleLinearString string)
: string_(string), length_(string->length()) {}

if (is_latin1_) {
latin1_chars_ = string->latin1Chars(nogc_);
} else {
two_byte_chars_ = string->twoByteChars(nogc_);
}
}
FlatStringReader(const char16_t* chars, size_t length)
: two_byte_chars_(chars),
length_(length),
is_latin1_(false) {}
FlatStringReader(const mozilla::Range<const char16_t> range)
: string_(nullptr), range_(range), length_(range.length()) {}

int length() { return length_; }

inline char16_t Get(size_t index) {
MOZ_ASSERT(index < length_);
if (is_latin1_) {
return latin1_chars_[index];
} else {
return two_byte_chars_[index];
if (string_) {
return string_->latin1OrTwoByteChar(index);
}
return range_[index];
}

private:
union {
const JS::Latin1Char *latin1_chars_;
const char16_t* two_byte_chars_;
};
js::HandleLinearString string_;
const mozilla::Range<const char16_t> range_;
size_t length_;
bool is_latin1_;
JS::AutoCheckCannotGC nogc_;
};

class JSRegExp : public HeapObject {
Expand Down

0 comments on commit f52b78c

Please sign in to comment.