Skip to content

Commit

Permalink
Improve performance for comment parsing (open-source-parsers#1052)
Browse files Browse the repository at this point in the history
* Improve performance for comment parsing

* Fix weird main.cpp issue

* Readd newline

* remove carriage return feed char

* Remove unnecessary checks
  • Loading branch information
baylesj authored Oct 17, 2019
1 parent aebc7fa commit a07b37e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 25 deletions.
67 changes: 43 additions & 24 deletions src/lib_json/json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#endif // if !defined(JSON_IS_AMALGAMATION)
#include <cassert>
#include <cstring>
#include <iostream>
#include <istream>
#include <limits>
#include <memory>
Expand Down Expand Up @@ -942,7 +943,7 @@ class OurReader {
void skipSpaces();
bool match(const Char* pattern, int patternLength);
bool readComment();
bool readCStyleComment();
bool readCStyleComment(bool* containsNewLineResult);
bool readCppStyleComment();
bool readString();
bool readStringSingleQuote();
Expand Down Expand Up @@ -977,18 +978,20 @@ class OurReader {
static bool containsNewLine(Location begin, Location end);

using Nodes = std::stack<Value*>;
Nodes nodes_;
Errors errors_;
String document_;
Location begin_;
Location end_;
Location current_;
Location lastValueEnd_;
Value* lastValue_;
String commentsBefore_;

Nodes nodes_{};
Errors errors_{};
String document_{};
Location begin_ = nullptr;
Location end_ = nullptr;
Location current_ = nullptr;
Location lastValueEnd_ = nullptr;
Value* lastValue_ = nullptr;
bool lastValueHasAComment_ = false;
String commentsBefore_{};

OurFeatures const features_;
bool collectComments_;
bool collectComments_ = false;
}; // OurReader

// complete copy of Read impl, for OurReader
Expand All @@ -1001,9 +1004,7 @@ bool OurReader::containsNewLine(OurReader::Location begin,
return false;
}

OurReader::OurReader(OurFeatures const& features)
: begin_(), end_(), current_(), lastValueEnd_(), lastValue_(),
features_(features), collectComments_() {}
OurReader::OurReader(OurFeatures const& features) : features_(features) {}

bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
bool collectComments) {
Expand Down Expand Up @@ -1134,6 +1135,7 @@ bool OurReader::readValue() {

if (collectComments_) {
lastValueEnd_ = current_;
lastValueHasAComment_ = false;
lastValue_ = &currentValue();
}

Expand Down Expand Up @@ -1280,21 +1282,32 @@ bool OurReader::match(const Char* pattern, int patternLength) {
}

bool OurReader::readComment() {
Location commentBegin = current_ - 1;
Char c = getNextChar();
const Location commentBegin = current_ - 1;
const Char c = getNextChar();
bool successful = false;
if (c == '*')
successful = readCStyleComment();
else if (c == '/')
bool cStyleWithEmbeddedNewline = false;

const bool isCStyleComment = (c == '*');
const bool isCppStyleComment = (c == '/');
if (isCStyleComment) {
successful = readCStyleComment(&cStyleWithEmbeddedNewline);
} else if (isCppStyleComment) {
successful = readCppStyleComment();
}

if (!successful)
return false;

if (collectComments_) {
CommentPlacement placement = commentBefore;
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
if (c != '*' || !containsNewLine(commentBegin, current_))
placement = commentAfterOnSameLine;

if (!lastValueHasAComment_) {
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
placement = commentAfterOnSameLine;
lastValueHasAComment_ = true;
}
}
}

addComment(commentBegin, current_, placement);
Expand Down Expand Up @@ -1334,12 +1347,18 @@ void OurReader::addComment(Location begin, Location end,
}
}

bool OurReader::readCStyleComment() {
bool OurReader::readCStyleComment(bool* containsNewLineResult) {
*containsNewLineResult = false;

while ((current_ + 1) < end_) {
Char c = getNextChar();
if (c == '*' && *current_ == '/')
if (c == '*' && *current_ == '/') {
break;
} else if (c == '\n') {
*containsNewLineResult = true;
}
}

return getNextChar() == '/';
}

Expand Down
6 changes: 5 additions & 1 deletion src/test_lib_json/fuzz.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
return 0;
}

uint32_t hash_settings = *(const uint32_t*)data;
const uint32_t hash_settings = static_cast<uint32_t>(data[0]) |
(static_cast<uint32_t>(data[1]) << 8) |
(static_cast<uint32_t>(data[2]) << 16) |
(static_cast<uint32_t>(data[3]) << 24);
data += sizeof(uint32_t);
size -= sizeof(uint32_t);

Expand All @@ -36,6 +39,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
builder.settings_["failIfExtra_"] = hash_settings & (1 << 6);
builder.settings_["rejectDupKeys_"] = hash_settings & (1 << 7);
builder.settings_["allowSpecialFloats_"] = hash_settings & (1 << 8);
builder.settings_["collectComments"] = hash_settings & (1 << 9);

std::unique_ptr<Json::CharReader> reader(builder.newCharReader());

Expand Down

0 comments on commit a07b37e

Please sign in to comment.