Skip to content

Commit

Permalink
Implement wrappers for the more complete Hermes parser API
Browse files Browse the repository at this point in the history
Summary:
The new Hermes parser API wrappers expose all generated error messages
with their detailed data (previously it only returned the first error
formatted as a string).

A method to convert from SMLoc to line+column is also exposed.

Lastly magic comments can be queried.

The new Rust wrappers are implemented, but for now the public module
API is preserved.

Reviewed By: avp

Differential Revision: D30444129

fbshipit-source-id: a09af9619bc1852d1d722ef5d8b3f7772a630488
  • Loading branch information
tmikov authored and facebook-github-bot committed Aug 21, 2021
1 parent 87f90b7 commit f3cc446
Show file tree
Hide file tree
Showing 9 changed files with 2,315 additions and 1,530 deletions.
223 changes: 191 additions & 32 deletions lib/Parser/rust-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,57 +335,216 @@ struct ReturnType<NodeList> {

namespace {

enum class DiagKind : uint32_t {
Error,
Warning,
Remark,
Note,
};

DiagKind toDiagKind(llvh::SourceMgr::DiagKind k) {
switch (k) {
default:
assert(false);
case llvh::SourceMgr::DK_Error:
return DiagKind::Error;
case llvh::SourceMgr::DK_Warning:
return DiagKind::Warning;
case llvh::SourceMgr::DK_Remark:
return DiagKind::Remark;
case llvh::SourceMgr::DK_Note:
return DiagKind::Note;
}
}

struct Coord {
/// 1-based.
int line = -1;
/// 1-based.
int column = -1;

Coord() = default;
Coord(int lineNo, int columnNo) : line(lineNo), column(columnNo) {}
};

/// A temporary struct describing an error message, returned to Rust.
struct DiagMessage {
/// Location.
SMLoc loc{};
/// Source coordinate.
Coord coord{};
/// What kind of message.
DiagKind diagKind = DiagKind::Error;
/// Error message.
llvh::StringRef message{};
/// Contents of the error line.
llvh::StringRef lineContents{};

DiagMessage() = default;

DiagMessage(const llvh::SMDiagnostic &diag)
: loc(diag.getLoc()),
coord(diag.getLineNo(), diag.getColumnNo()),
diagKind(toDiagKind(diag.getKind())),
message(diag.getMessage()),
lineContents(diag.getLineContents()) {}
};

enum class MagicCommentKind : uint32_t {
SourceUrl = 0,
SourceMappingUrl = 1,
};

struct DataRef {
const void *data;
size_t length;
};

template <typename T>
inline DataRef toDataRef(const T &ref) {
return {ref.data(), ref.size()};
}

/// This object contains the entire parser state.
struct Parsed {
std::shared_ptr<Context> context;
std::string error;
ESTree::ProgramNode *ast = nullptr;

explicit Parsed(std::string const &error) : error(error) {}
explicit Parsed(
std::shared_ptr<Context> const &context,
ESTree::ProgramNode *ast)
: context(context), ast(ast) {}
struct ParserContext {
/// Parser context with allocators, string table, etc.
Context context_{};
/// Source buffer id, generated by SourceErrorManager.
unsigned bufId_ = ~0u;
/// Original error messages. We need them because they provide storage for
/// the strings.
std::deque<llvh::SMDiagnostic> ourMessages_{};
/// Messages converted to the external layout.
std::vector<DiagMessage> convertedMessages_{};

/// Index of the first error.
llvh::Optional<size_t> firstError_;
/// AST.
ESTree::ProgramNode *ast_ = nullptr;

explicit ParserContext() {
context_.getSourceErrorManager().setDiagHandler(
[](const llvh::SMDiagnostic &diag, void *ctx) {
static_cast<ParserContext *>(ctx)->addMessage(diag);
},
this);
}

void setInputBuffer(llvh::StringRef str) {
assert(!haveBufferId() && "input buffer has already been set");
assert(str.back() == 0 && "input source must be 0-terminated");
bufId_ = context_.getSourceErrorManager().addNewSourceBuffer(
llvh::MemoryBuffer::getMemBuffer(str.drop_back(), "JavaScript", true));
}

bool haveBufferId() const {
return bufId_ != ~0u;
}

unsigned getBufferId() const {
assert(haveBufferId() && "input buffer has not been set");
return bufId_;
}

void addMessage(const llvh::SMDiagnostic &diag) {
if (diag.getKind() <= llvh::SourceMgr::DK_Error && !firstError_)
firstError_ = ourMessages_.size();
ourMessages_.push_back(diag);
convertedMessages_.push_back(ourMessages_.back());
}

void addError(const char *msg) {
context_.getSourceErrorManager().error(
SMLoc{}, "Input is not zero terminated");
}
};

} // namespace

/// source is the zero terminated input. source[len-1] must be \0.
extern "C" Parsed *hermes_parser_parse(const char *source, size_t len) {
extern "C" ParserContext *hermes_parser_parse(const char *source, size_t len) {
std::unique_ptr<ParserContext> parserCtx(new ParserContext());

if (len == 0 || source[len - 1] != 0) {
return new Parsed("Input is not zero terminated");
parserCtx->addError("Input is not zero terminated");
return parserCtx.release();
}

CodeGenerationSettings codeGenOpts{};
OptimizationSettings optSettings;
auto context = std::make_shared<Context>(codeGenOpts, optSettings);
SimpleDiagHandlerRAII outputManager{context->getSourceErrorManager()};

parser::JSParser parser(*context, StringRef(source, len - 1));
parserCtx->setInputBuffer(StringRef(source, len));
parser::JSParser parser(
parserCtx->context_, parserCtx->bufId_, hermes::parser::FullParse);
auto ast = parser.parse();

if (outputManager.haveErrors()) {
return new Parsed(outputManager.getErrorString());
} else if (!ast) {
// Just in case.
return new Parsed("Internal error");
if (!parserCtx->firstError_) {
if (!ast) {
// Just in case.
parserCtx->addError("Internal error");
} else {
parserCtx->ast_ = *ast;
}
}
return parserCtx.release();
}

extern "C" void hermes_parser_free(ParserContext *parserCtx) {
delete parserCtx;
}

/// \return the index of the first error or -1 if no errors.
extern "C" ssize_t hermes_parser_get_first_error(
const ParserContext *parserCtx) {
return parserCtx->firstError_ ? (int)*parserCtx->firstError_ : -1;
}

return new Parsed(context, *ast);
extern "C" DataRef hermes_parser_get_messages(const ParserContext *parserCtx) {
return toDataRef(parserCtx->convertedMessages_);
}

extern "C" void hermes_parser_free(Parsed *parsed) {
delete parsed;
extern "C" ESTree::ProgramNode *hermes_parser_get_ast(
const ParserContext *parserCtx) {
return parserCtx->ast_;
}

extern "C" const char *hermes_parser_get_error(Parsed *parsed) {
return parsed->ast ? nullptr : parsed->error.c_str();
extern "C" bool
hermes_parser_find_location(ParserContext *parserCtx, SMLoc loc, Coord *res) {
SourceErrorManager::SourceCoords coords;
if (!parserCtx->context_.getSourceErrorManager().findBufferLineAndLoc(
loc, coords)) {
res->line = res->column = -1;
return false;
}

res->line = coords.line;
res->column = coords.col;
return true;
}

extern "C" ESTree::ProgramNode *hermes_parser_get_ast(Parsed *parsed) {
return parsed->ast;
/// Note that we guarantee that the result is valid UTF-8 because we only
/// return it if there were no parse errors.
extern "C" DataRef hermes_parser_get_magic_comment(
ParserContext *parserCtx,
MagicCommentKind kind) {
// Make sure that we have successfully parsed the input. (The magic comments
// could be set even if we didn't, but in that case are not guaranteed to be
// value utf-8).
if (!parserCtx->haveBufferId() || !parserCtx->ast_)
return {nullptr, 0};

StringRef res{};
switch (kind) {
case MagicCommentKind::SourceUrl:
res = parserCtx->context_.getSourceErrorManager().getSourceUrl(
parserCtx->getBufferId());
break;
case MagicCommentKind::SourceMappingUrl:
res = parserCtx->context_.getSourceErrorManager().getSourceMappingUrl(
parserCtx->getBufferId());
break;
}

return toDataRef(res);
}

extern "C" const char *hermes_get_node_name(ESTree::Node *n) {
return n->getNodeName().data();
extern "C" DataRef hermes_get_node_name(ESTree::Node *n) {
return toDataRef(n->getNodeName());
}
7 changes: 7 additions & 0 deletions unsupported/juno/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions unsupported/juno/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ edition = "2018"
license = "MIT"

[dependencies]
libc = "0.2"
thiserror = "1.0"
Loading

0 comments on commit f3cc446

Please sign in to comment.