Simple recursive descent parser.
Example:
#include <vemaparse/lexer.h> #include <vemaparse/parser.h> #include <vemaparse/ast.h> using namespace ast; typedef lexer::LexerIterator<std::string::iterator> parser_iterator_type; typedef parser::Rule<parser_iterator_type>::Match Match; parser::Rule<parser_iterator_type> &r(const std::string ®ex, const std::string name = "") { auto &rule = parser::regex<parser_iterator_type>(regex); if (!name.empty()) rule.name = name; return rule; } parser::Rule<parser_iterator_type> &t(int id, const std::string name = "") { auto &rule = parser::terminal<parser_iterator_type>(id); if (!name.empty()) rule.name = name; return rule; } parser::Rule<parser_iterator_type> grammar() { // comment auto &open_comment = r("/\\*.*"); auto &close_comment = r("[^\\\\]*\\*/"); auto &anything = r(".*"); auto &comment = (open_comment >> (anything / close_comment)); comment.name = "comment"; auto &id = t(lexer::IDENTIFIER, "id"); auto literal_action = [](Match &m, Node &n) {ast::literal(m, n);}; auto &num = t(lexer::NUMBER_LITERAL, "num")[literal_action]; auto &string = t(lexer::STRING_LITERAL, "string"); auto &expression = *new parser::Rule<parser_iterator_type>(); auto &initializer = r("=") >> expression; initializer.name = "initializer"; // variable declaration auto &variable_declaration = r("var") >> id >> -initializer >> r(";"); variable_declaration.name = "variable_declaration"; // expressions auto expression_action = [](Match &m, Node &) {std::cout << "(" << parser::to_string(m.begin, m.end) << ")";}; auto &primary_expression = id | num | string | (r("\\(") >> expression >> r("\\)")); primary_expression.name = "primary_expression"; auto &unary_expression = ((r("\\+\\+") | r("--") | r("-") | r("\\~") | r("!")) >> primary_expression) | primary_expression; unary_expression.name = "unary_expression"; auto &multiplicative_expression = unary_expression >> *((r("\\*") | r("/") | r("%")) >> unary_expression); multiplicative_expression.name = "multiplicative_expression"; auto &additive_expression = multiplicative_expression >> *((r("\\+") | r("-")) >> multiplicative_expression); additive_expression.name = "additive_expression"; auto &comparative_expression = additive_expression >> *(r("==") | r("!=") | r("<") | r(">") | r("<=") | r(">=") >> additive_expression); comparative_expression.name = "comparative_expression"; expression = comparative_expression >> *((r("&&") | r("\\|\\|")) >> comparative_expression); expression.name = "expression"; expression[expression_action]; // block auto &statement = *new parser::Rule<parser_iterator_type>(); statement.name = "statement"; auto &block = r("\\{") >> *statement >> r("\\}"); block.name = "block"; statement = block | variable_declaration | comment; return *block; } void visit_match(Match &match, Node *parent) { static const std::bitset<lexer::NUM_TOKENS> literals((1 << lexer::IDENTIFIER) | (1 << lexer::STRING_LITERAL) | (1 << lexer::NUMBER_LITERAL)); Node *node = parent; const std::string match_string = parser::to_string(match.begin, match.end); if (match_string.empty()) { return; } if (match.action || parent->text != match_string) { node = new Node(); node->name = boost::xpressive::regex_replace(match.rule.name, boost::xpressive::sregex::compile(" |-|>"), std::string("_")); node->text = match_string; parent->children.push_back(node); } if (match.action) match.action(match, *node); for (auto c = match.children.begin(); c != match.children.end(); ++c) visit_match(**c, node); } int main(int argc, char *argv[]) { std::string input; lexer::Lexer<std::string::iterator> lexer; if (argc > 1) { std::ifstream file(argv[1]); input = std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); lexer = lexer::Lexer<std::string::iterator>(input.begin(), input.end()); #if 0 try { for (auto iter = lexer.begin(); iter != lexer.end(); ++iter) { if (iter.token != lexer::WHITESPACE) std::cout << std::setw(2) << iter.token << ": " << *iter << std::endl; } } catch (const lexer::LexerError<std::string::iterator> &error) { std::cerr << "ERROR: " << error.what() << std::endl; } #endif } else { std::getline(std::cin, input); if (!input.empty()) { lexer = lexer::Lexer<std::string::iterator>(input.begin(), input.end()); } } parser::Rule<parser_iterator_type> start = grammar(); Node *root = new Node(); parser::Rule<parser_iterator_type>::rule_result ret = start.match(lexer.begin(), lexer.end()); { root->name = "root"; std::ofstream ofs("ast.dot"); ofs << "digraph html {\n"; std::for_each(ret.match.children.begin(), ret.match.children.end(), [root](Match::match_ptr m) {visit_match(*m, root);}); std::cout << "\n\n"; root->debug(ofs); ofs << "}"; } if (ret.match.end != lexer.end()) { // Walk the partial parse tree std::shared_ptr<Match> p, m; p = m = ret.match.children.back(); while (!m->children.empty()) { p = m; m = m->children.back(); } lexer::Lexer<std::string::iterator>::iterator lex_iter = m->end; // get the line number std::string line_string; { int line_number = 1; auto i = input.begin(); while (i != lex_iter.begin) { if (*i++ == '\n') ++line_number; } std::ostringstream ss; ss << line_number; line_string = ss.str() + ": "; } // Grab the line std::string::iterator begin, end; begin = lex_iter.begin; end = lex_iter.end; if (begin == input.end()) --begin; while (begin != input.begin() && (*begin != '\n')) --begin; if (*begin == '\n') ++begin; while (end != input.end() && (*end != '\n')) ++end; std::cout << "Failed to parse: \n" << line_string; std::for_each(begin, end, [](char c) {std::cout << c;}); std::cout << "\n"; for (int i = 0; i < ((lex_iter.begin - begin) + line_string.size()); ++i) std::cout << " "; std::cout << "^\n"; std::cout << "Tried rules: " << p->rule.name; for (auto i = p->children.begin(); i != p->children.end(); ++i) std::cout << ", " << (*i)->rule.name; std::cout << "\n"; } }