diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..1d05149 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,28 @@ +Licensed Under BSD + +Copyright (c) 2013, Daniel Holden +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD Project. \ No newline at end of file diff --git a/README.md b/README.md index 3d382f1..d224f35 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,29 @@ Micro Parser Combinators ======================== -_mpc_ is a lightweight Parser Combinator library for C. +_mpc_ is a lightweight but powerful Parser Combinator library for C. + +Using _mpc_ might be of interest to you if you are... + +* Building a new programming language +* Building a new data format +* Parsing an existing data format +* Embedding a Domain Specific Language Features -------- -* Full Type Generic Parser Combinator +* Type-Generic Parser Combinators * Error Message Support * Regular Expression Support -* Packaged with AST generator -* Easy to including in source -* Written in clean ANSI C +* Abstract Syntax Tree Support +* Easy to Integrate (One Source File in ANSI C) + Alternatives ------------ -The current main alternative is a branch of (https://github.com/wbhart/Cesium3)[Cesium3]. +The current main alternative is a branch of (https://github.com/wbhart/Cesium3/tree/combinators)[Cesium3]. The main advantages of _mpc_ over this are: @@ -25,51 +32,534 @@ The main advantages of _mpc_ over this are: * Doesn't use `setjmp` and `longjmp` for errors * Doesn't pollute namespace -Example -------- -```c +View From the Top +----------------- +In this example I specify a grammar for a basic maths language and parse a string from it. +The output is an instance of the included `mpc_ast_t` type. +```c +#include "mpc.h" + +mpc_ast_t* parse_maths(const char* input) { + + mpc_parser_t* Expr = mpc_new("expression"); + mpc_parser_t* Prod = mpc_new("product"); + mpc_parser_t* Value = mpc_new("value"); + mpc_parser_t* Maths = mpc_new("maths"); + + mpc_define(Expr, mpca_grammar(" (('+' | '-') )* ", Prod)); + mpc_define(Prod, mpca_grammar(" (('*' | '/') )* ", Value)); + mpc_define(Value, mpca_grammar(" /[0-9]+/ | '(' ')' ", Expr)); + mpc_define(Maths, mpca_total(Expr)); + + mpc_result_t r; + if (!mpc_parse("parse_maths", input, Maths, &r)) { + mpc_err_print(r.error); + abort(); + } + + mpc_cleanup(4, Expr, Prod, Value, Maths); + + return r.output; +} +``` + +The output for an expression like `(4 * 2 * 11 + 2) + 5` would look something like this + +```xml + + + '(' + + + '4' + '*' + '2' + '*' + '11' + '+' + '2' + ')' + '+' + '5' ``` +View From the Bottom +-------------------- + +Parser Combinators are structures that encode how to parse a particular language. They can be combined using a number of intuitive operators to create new parsers of ever increasing complexity. With these, complex grammars and languages can be processed easily. + +The trick behind Parser Combinators is the observation that by structuring the library in a particular way, one can make building parser combinators, look like writing a grammar itself. Therefore instead of describing _how to parse a language_, a user must only specify _the language itself_, and the computer will work out how to parse it ... as if by magic! + Parsers ------- +The Parser Combinator type in _mpc_ is `mpc_parser_t`. This encodes a function that attempts to parse some string and, if successful, returns a pointer to some data, or otherwise returns some error. A parser can be run using `mpc_parse`. + +```c +bool mpc_parse(const char* filename, const char* s, mpc_parser_t* p, mpc_result_t* r); +``` + +This function returns `true` on success and `false` on failure. It takes as input some parser `p`, string `s`, and some `filename`. It outputs into `r` the result of the parse which is either a pointer to some data object, or an error. The type `mpc_result_t` is a union type defined as follows. + +```c +typedef union { + mpc_err_t* error; + mpc_val_t* output; +} mpc_result_t; +``` + +where `mpc_val_t` is synonymous with `void*` and simply represents some pointer to data - the exact type of which is dependant on the parser. + + +Basic Parsers +------------- + +All the following functions return parsers. All of those parsers return strings with the character(s) matched. They have the following functionality. + +* `mpc_parser_t* mpc_any(void);` - Matches any character +* `mpc_parser_t* mpc_char(char c);` - Matches a character `c` +* `mpc_parser_t* mpc_range(char s, char e);` - Matches any character in the range `s` to `e` +* `mpc_parser_t* mpc_oneof(const char* s);` - Matches any character in provided string +* `mpc_parser_t* mpc_noneof(const char* s);` - Matches any character not in provided string +* `mpc_parser_t* mpc_satisfy(bool(*f)(char));` - Matches any character satisfying function `f` +* `mpc_parser_t* mpc_string(const char* s);` - Matches string `s` + +Several other functions exist that return parsers with special functionality. + +* `mpc_parser_t* mpc_pass(void);` - Always is successful and returns `NULL` +* `mpc_parser_t* mpc_fail(void);` - Always fails +* `mpc_parser_t* mpc_lift(mpc_lift_t f);` - Always succeeds and returns the result of function `f` +* `mpc_parser_t* mpc_lift_val(mpc_val_t* x);` - Always succeeds and returns `x` Combinators ----------- +Combinators are functions that take one or several parsers and return a new one. These combinators are type agnostic - meaning they can be used no matter what type the input parsers are meant to return. In languages such as Haskell ensuring you don't ferry one type of data into a parser requiring a different type of data is done by the compiler. But in C we don't have that luxury, so it is at the discretion of the programmer to ensure that he deals correctly with the output types of different parsers. -Regular Expressions -------------------- +A second annoyance in C is that of manual memory management. Some parsers might get half-way and then fail, meaning they need to clean up any partial data that has been collected in the parse. In Haskell this is handled by the Garbage Collector but in C these functions take _destructors_ - functions which clean up and partial data of a given type that has been collected. +Here are some common combinators and how to use then. -Combinator Grammars -------------------- +```c +mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); +``` +Returns a parser that attempts `a` an on failure reports that `expected` was expected. -Abstract Syntax Tree --------------------- +This is useful for improving the readability of error messages. For example: + +* `mpc_or(2, mpc_char('0'), mpc_char('1'))` + +might report `expected '0' or '1' at 'x'` while + +* `mpc_expect(mpc_or(2, mpc_char('0'), mpc_char('1')), "binary digit")` + +will report `expected binary digit at 'x'`. + +```c +mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f); +mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x); +``` + +Applies function `f` to the result of parser `a`. +Applies function `f`, taking extra input `x`, to the result of parser `a`. + +```c +mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da); +mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf); +``` + +Returns a parser with the following behaviour: + * If parser `a` succeeds, the output parser fails. + * If parser `a` fails, the output parser succeeds and returns `NULL` or the result of lift function `lf`. + +Destructor `da` is used to destroy the result of `a`. + +```c +mpc_parser_t* mpc_maybe(mpc_parser_t* a); +mpc_parser_t* mpc_maybe_else(mpc_parser_t* a, mpc_lift_t lf); +``` + +Attempts to parser `a`. If this fails then succeeds and returns `NULL` or the result of `lf`. + +```c +mpc_parser_t* mpc_many(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_many_else(mpc_parser_t* a, mpc_fold_t f, mpc_lift_t lf); +``` + +Attempts to parse zero or more `a`. If zero instances are found then succeeds and returns `NULL` or the result of `lf`. + +If more than zero instances are found results of `a` are combined using fold function `f`. See the _Function Types_ section for more details. + +```c +mpc_parser_t* mpc_many1(mpc_parser_t* a, mpc_fold_t f); +``` + +Attempts to parse one or more `a`. Results are combined with fold function `f`. + +```c +mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n); +mpc_parser_t* mpc_count_else(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n, mpc_lift_t lf); +``` + +Attempts to parse exactly `n` of `a`. If it fails the result output by the fold function `f` is destructed with `da` and either it returns `NULL` or the result of lift function `lf`. + +Results of `a` are combined using fold function `f`. + +```c +mpc_parser_t* mpc_else(mpc_parser_t* a, mpc_parser_t* b); +``` + +Attempts to parse `a` and if fails attempts to parse `b`. If both fail, returns an error. + +```c +mpc_parser_t* mpc_also(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); +mpc_parser_t* mpc_bind(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); +``` + +Attempts to parse `a` and then attempts to parse `b`. If `b` fails it destructs the result of `a` using `da`. If both succeed it returns the result of `a` and `b` combined using the fold function `f`. + +```c +mpc_parser_t* mpc_or(int n, ...); +``` + +Attempts to parse `n` parsers in sequence, returning the first one that succeeds. If all fail, returns an error. + +For example: `mpc_or(3, mpc_char('a'), mpc_char('b'), mpc_char('c'))` would attempt to match either an `'a'` or a `'b'` or a `'c'`. + +```c +mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...); +``` + +Attempts to parse `n` parsers in sequence, returning the fold of them using fold function `f`. Parsers must be specified in series, followed by all the destructors for the types they return minus the last. These are used in case of partial success. + +For example: `mpc_and(3, mpcf_astrfold, mpc_char('a'), mpc_char('b'), mpc_char('c'), free, free),` would attempt to match `'a'` followed by `'b'` followed by `'c'` and if successful would concatenate them using `mpcf_astrfold`. + + +Function Types +-------------- + +The combinator functions take a number of special function types as function pointers. Here is a short explanation of those types are how they are expected to behave. + +```c +typedef void(*mpc_dtor_t)(mpc_val_t*); +``` + +Destructor function. Given some pointer to a data value it will ensure the memory it points to is freed correctly. + +```c +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +``` -If you want to do all the data processing after the parsing stage _mpc_ comes packaged with a basic AST type which makes the grammar declaration much cleaner as you don't have to pass around destructors and fold functions. All these functions reside under `mpca_*`. +Application function. This takes in some pointer to data and outputs some new or modified pointer to data, ensuring to free and old data no longer required. The `apply_to` variation takes in an extra pointer to some data such as state of the system. -This also allows for the use of parser grammars that can be declared directly in C strings similarly to regular expressions. +```c +typedef mpc_val_t*(*mpc_fold_t)(mpc_val_t*,mpc_val_t*); +``` + +Fold function. This takes two pointers to data and must output some new combined pointer to data, ensuring to free and old data no longer required. When used with the `many`, `many1` and `count` functions this initially takes in `NULL` for it's first argument and following that takes in for it's first argument whatever was previously returned by the function itself. In this way users have a chance to build some initial data structure before populating it with whatever is passed as the second argument. + +```c +typedef mpc_val_t*(*mpc_afold_t)(int,mpc_val_t**); +``` + +AFold Function. Similar to the above but it is passed in a list of pointers to data values which must all be folded together and output as a new single data value. + +```c +typedef mpc_val_t*(*mpc_lift_t)(void); +``` + +Lift Function. This is a simple function that returns some data value when called. It can be used to create _empty_ versions of data types when certain combinators have no known default value to return. + +Example +------- + +Using the above we can already create a parser that matches a C identifier with relative . + +First we build a fold function that will concatenate two strings together. ```c +mpc_val_t* parse_fold_string(mpc_val_t* x, mpc_val_t* y) { + + if (x == NULL) { return y; } + if (y == NULL) { return x; } + + char* x = realloc(x, strlen(x) + strlen(y) + 1); + strcat(x, y); + + free(y); + return x; + +} +``` + +Then we can actually specify the parser. +``` +char* parse_ident(char* input) { + + mpc_parser_t* alpha = mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); + mpc_parser_t* digit = mpc_oneof("0123456789"); + mpc_parser_t* underscore = mpc_char('_'); + + mpc_parser_t* ident0 = mpc_else(alpha, underscore); + mpc_parser_t* ident1 = mpc_many(mpc_or(3, alpha, digit, underscore), parse_fold_string); + mpc_parser_t* ident = mpc_also(ident0, ident1, free, parse_fold_string); + + mpc_result_t r; + if (!mpc_parse("parse_ident", input, ident, &r)) { + mpc_err_print(r.error); + abort(); + } + + mpc_delete(ident); + + return r.output; +} +``` + +Self Reference +-------------- +Building parsers in the above way can have issues with self reference and left handed recursion. + +To overcome this we separate the construction of parsers into two different steps. First we allocate them... + +```c +mpc_parser_t* mpc_new(const char* name); ``` +This will construct a parser a parser called `name` which can then be referenced by others including itself when defined. Any parse created using `mpc_new` is said to be _retained_. This means it will behave slightly differently to a normal parser. For example when deleting a parser that includes a _retained_ parser, the _retained_ parser it will not be deleted. To delete a retained parser `mpc_delete` must be used on it directly. + +A _retained_ parser can then be defined using... + +```c +mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a); +``` + +This assigns the contents of parser `a` into `p`, and frees and memory involved in constructing `a`. Now parsers can reference each other and themselves without trouble. + +```c +mpc_parser_t* mpc_undefine(mpc_parser_t* p); +``` + +But now parsers that reference each other must all be undefined before they are deleted. It is important to do any defining before deletion. The reason for this is that to delete a parser it must look at each sub-parser that is included in it. If any of these have already been deleted it will segfault. + + +```c +void mpc_cleanup(int n, ...); +``` + +To ease the task of undefining and then deleting parsers `mpc_cleanup` can be used. It takes `n` parsers as input and undefines them all, before deleting them all. + +Common Parsers +--------------- + +A number of common parsers have been included. + +* `mpc_parser_t* mpc_eoi(void);` - Matches only the end of input +* `mpc_parser_t* mpc_soi(void);` - Matches only the start of input + +* `mpc_parser_t* mpc_space(void);` - Matches some whitespace character +* `mpc_parser_t* mpc_spaces(void);` - Matches zero or more whitespace characters +* `mpc_parser_t* mpc_whitespace(void);` - Matches zero or more whitespace characters and frees the result + +* `mpc_parser_t* mpc_newline(void);` - Matches `'\n'` +* `mpc_parser_t* mpc_tab(void);` - Matches `'\t'` +* `mpc_parser_t* mpc_escape(void);` - Matches a backslash followed by any character + +* `mpc_parser_t* mpc_digit(void);` - Matches any character in the range `'0'` - `'9'` +* `mpc_parser_t* mpc_hexdigit(void);` - Matches any character in the range `'0'` - `'9'` as well as `'A'` - `'F'` or `'a'` - `'f'` +* `mpc_parser_t* mpc_octdigit(void);` - Matches any character in the range `'0'` - `'7'` +* `mpc_parser_t* mpc_digits(void);` - Matches one or more digit +* `mpc_parser_t* mpc_hexdigits(void);` - Matches one or more hexdigit +* `mpc_parser_t* mpc_octdigits(void);` - Matches one or more octdigit + +* `mpc_parser_t* mpc_lower(void);` - Matches and lower case character +* `mpc_parser_t* mpc_upper(void);` - Matches any upper case character +* `mpc_parser_t* mpc_alpha(void);` - Matches and alphabet character +* `mpc_parser_t* mpc_underscore(void);` - Matches `'_'` +* `mpc_parser_t* mpc_alphanum(void);` - Matches any alphabet character, underscore or digit + +* `mpc_parser_t* mpc_int(void);` - Matches digits and converts to `int*` +* `mpc_parser_t* mpc_hex(void);` - Matches hexdigits and converts to `int*` +* `mpc_parser_t* mpc_oct(void);` - Matches octdigits and converts to `int*` +* `mpc_parser_t* mpc_number(void);` - Matches `mpc_int`, `mpc_hex` or `mpc_oct` + +* `mpc_parser_t* mpc_real(void);` - Matches some floating point number +* `mpc_parser_t* mpc_float(void);` - Matches some floating point number and converts to `float*` +* `mpc_parser_t* mpc_semi(void);` - Matches `';'` +* `mpc_parser_t* mpc_comma(void);` - Matches `','` +* `mpc_parser_t* mpc_colon(void);` - Matches `':'` +* `mpc_parser_t* mpc_dot(void);` - Matches `'.'` +* `mpc_parser_t* mpc_char_lit(void);` - Matches some character literal +* `mpc_parser_t* mpc_string_lit(void);` - Matches some string literal +* `mpc_parser_t* mpc_regex_lit(void);` - Matches some regex literal -Reference ---------- +* `mpc_parser_t* mpc_ident(void);` - Matches a C identifier +Useful Parsers +-------------- +* `mpc_parser_t* mpc_start(mpc_parser_t* a);` +* `mpc_parser_t* mpc_end(mpc_parser_t* a, mpc_dtor_t da);` +* `mpc_parser_t* mpc_enclose(mpc_parser_t* a, mpc_dtor_t da);` +* `mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f);` +* `mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f);` + +* `mpc_parser_t* mpc_strip(mpc_parser_t* a);` +* `mpc_parser_t* mpc_tok(mpc_parser_t* a);` +* `mpc_parser_t* mpc_sym(const char* s);` +* `mpc_parser_t* mpc_total(mpc_parser_t* a, mpc_dtor_t da);` + +* `mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c);` +* `mpc_parser_t* mpc_parens(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_braces(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_brackets(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad);` + +* `mpc_parser_t* mpc_tok_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c);` +* `mpc_parser_t* mpc_tok_parens(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_tok_braces(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_tok_brackets(mpc_parser_t* a, mpc_dtor_t ad);` +* `mpc_parser_t* mpc_tok_squares(mpc_parser_t* a, mpc_dtor_t ad);` + + +Fold Functions +-------------- + +A number of common fold functions a user might want are included. They reside under the `mpcf_*` namespace. + +* `void mpcf_dtor_null(mpc_val_t* x);` - Empty destructor. Does nothing +* `mpc_val_t* mpcf_lift_null(void);` - Returns `NULL` +* `mpc_val_t* mpcf_lift_emptystr(void);` - Returns newly allocated empty string + +* `mpc_val_t* mpcf_free(mpc_val_t* x);` - Frees `x` and returns `NULL` +* `mpc_val_t* mpcf_int(mpc_val_t* x);` - Converts a decimal string `x` to an `int*` +* `mpc_val_t* mpcf_hex(mpc_val_t* x);` - Converts a hex string `x` to an `int*` +* `mpc_val_t* mpcf_oct(mpc_val_t* x);` - Converts a oct string `x` to an `int*` +* `mpc_val_t* mpcf_float(mpc_val_t* x);` - Converts a string `x` to a `float*` +* `mpc_val_t* mpcf_escape(mpc_val_t* x);` - Converts a string `x` to an escaped version +* `mpc_val_t* mpcf_unescape(mpc_val_t* x);` - Converts a string `x` to an unescaped version + +* `mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y);` - Returns `x` +* `mpc_val_t* mpcf_snd(mpc_val_t* x, mpc_val_t* y);` - Returns `y` + +* `mpc_val_t* mpcf_fst_free(mpc_val_t* x, mpc_val_t* y);` - Returns `x` and frees `y` +* `mpc_val_t* mpcf_snd_free(mpc_val_t* x, mpc_val_t* y);` - Returns `y` and frees `x` + +* `mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x);` - Returns `NULL` and frees `x` +* `mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x);` - Concatenates `t` and `x` and returns result + +* `mpc_val_t* mpcf_afst(int n, mpc_val_t** xs);` - Returns first argument +* `mpc_val_t* mpcf_asnd(int n, mpc_val_t** xs);` - Returns second argument +* `mpc_val_t* mpcf_atrd(int n, mpc_val_t** xs);` - Returns third argument + +* `mpc_val_t* mpcf_astrfold(int n, mpc_val_t** xs);` - Concatenates and returns all input strings +* `mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs);` - Frees first and third argument and returns second +* `mpc_val_t* mpcf_maths(int n, mpc_val_t** xs);` - Examines second argument as string to see which operator it is, then operators on first and third argument as if they are `int*`. + + +Another Example +--------------- + +Here is another example to show of the stuff learnt so far. + +Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own syntax tree types as well as perform specific house-keeping or processing in the parsing phase. For example we can specify a simple maths grammar that computes the result of the expression as it goes. + +We start with a fold function that will fold `int*` types based on some `char*` operator. + +```c +mpc_val_t* mpcf_maths(int n, mpc_val_t** xs) { + + int** vs = (int**)xs; + + if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } + if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } + if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } + if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } + if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } + + free(xs[1]); free(xs[2]); + + return xs[0]; +} +``` + +And then we use this to specify how the grammar folds. + +```c +int parse_maths(char* input) { + + mpc_parser_t* Expr = mpc_new("expr"); + mpc_parser_t* Factor = mpc_new("factor"); + mpc_parser_t* Term = mpc_new("term"); + mpc_parser_t* Maths = mpc_new("maths"); + + mpc_define(Expr, mpc_else( + mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), + Factor + )); + + mpc_define(Factor, mpc_else( + mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free), + Term + )); + + mpc_define(Term, mpc_else(mpc_int(), mpc_parens(Expr, free))); + mpc_define(Maths, mpc_enclose(Expr, free)); + + mpc_result_t r; + if (!mpc_parse("parse_maths", input, Maths, &r)) { + mpc_err_print(r.error); + abort(); + } + + int result = *r.output; + free(r.output); + + return result; +} +``` + +Supplied with something like `(4*2)+5` this will output `13`. + + +Regular Expressions +------------------- + +Even with all that has been explained above, specifying parts of text can be a tedious task requiring many lines of code. So _mpc_ provides a simple regular expression matcher. + +```c +mpc_parser_t* mpc_re(const char* re); +``` + +This returns a parser that will attempt to match the given regular expression pattern, and return the matched string on success. It does not have support for groups and match objects, but should be sufficient for simple tasks. + +A cute thing about this is that it uses previous parts of the library to parse the user input string - and because _mpc_ is type generic, the parser spits out a `mpc_parser_t` directly! It even uses many of the combinator functions as fold functions! This is a great case study in learning how to use _mpc_, so those curious are encouraged to find it in the source code. + + +Abstract Syntax Tree +-------------------- + +For those that really do not care what data they get out a basic abstract syntax tree type `mpc_ast_t` has been included. Along with this are included some combinator functions which work specifically on this type. They reside under `mpca_*` and you will notice they do not require fold functions or destructors to be specified. + +Doing things via this method means that all the data processing must take place after the parsing - but to many this will be preferable. It also allows for one more trick... + +If all the fold and destructor functions are implicit then the user can simply specify the grammar in some nice way and the system can try to build an AST for them from this alone. + +```c +mpc_parser_t* mpca_grammar(const char* grammar, ...); +``` +This can be used to do exactly that. It takes in some grammar, as well as a list of named parsers - and outputs a parser that does exactly what is specified. diff --git a/TODO.md b/TODO.md index 545dfc5..04d9ef0 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,3 @@ -- Special Start of Input -- Special End of Input -- Integrate ptest - Test All Regex Features - Test Regex Range Feature - Parser Naming diff --git a/mpc.c b/mpc.c index fbecf1d..a09a71d 100644 --- a/mpc.c +++ b/mpc.c @@ -313,7 +313,7 @@ static bool mpc_input_next(mpc_input_t* i, char** o) { static bool mpc_input_any(mpc_input_t* i, char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (i->str[i->state.pos] == '\0') { i->state.next = i->str[i->state.pos]; return false; @@ -325,7 +325,7 @@ static bool mpc_input_any(mpc_input_t* i, char** o) { static bool mpc_input_char(mpc_input_t* i, char c, char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (i->str[i->state.pos] != c) { i->state.next = i->str[i->state.pos]; return false; @@ -337,7 +337,7 @@ static bool mpc_input_char(mpc_input_t* i, char c, char** o) { static bool mpc_input_range(mpc_input_t* i, char c, char d, char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (i->str[i->state.pos] < c || i->str[i->state.pos] > d) { i->state.next = i->str[i->state.pos]; @@ -360,7 +360,7 @@ static bool char_in_string(char c, const char* x) { static bool mpc_input_oneof(mpc_input_t* i, const char* c, char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (!char_in_string(i->str[i->state.pos], c)) { i->state.next = i->str[i->state.pos]; return false; @@ -372,7 +372,7 @@ static bool mpc_input_oneof(mpc_input_t* i, const char* c, char** o) { static bool mpc_input_noneof(mpc_input_t* i, const char* c, char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (char_in_string(i->str[i->state.pos], c) || (i->str[i->state.pos] == '\0')) { i->state.next = i->str[i->state.pos]; return false; @@ -384,14 +384,22 @@ static bool mpc_input_noneof(mpc_input_t* i, const char* c, char** o) { static bool mpc_input_satisfy(mpc_input_t* i, bool(*cond)(char), char** o) { - if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return false; } if (!cond(i->str[i->state.pos])) { i->state.next = i->str[i->state.pos]; return false; } return mpc_input_next(i, o); } -bool mpc_input_string(mpc_input_t* i, const char* c, char** o) { +static bool mpc_input_eoi(mpc_input_t* i) { + return i->state.pos == strlen(i->str); +} + +static bool mpc_input_soi(mpc_input_t* i) { + return i->state.pos == 0; +} + +static bool mpc_input_string(mpc_input_t* i, const char* c, char** o) { mpc_input_mark(i); char* co = NULL; @@ -424,26 +432,28 @@ enum { MPC_TYPE_LIFT_VAL = 4, MPC_TYPE_EXPECT = 5, - MPC_TYPE_ANY = 6, - MPC_TYPE_SINGLE = 7, - MPC_TYPE_ONEOF = 8, - MPC_TYPE_NONEOF = 9, - MPC_TYPE_RANGE = 10, - MPC_TYPE_SATISFY = 11, - MPC_TYPE_STRING = 12, - - MPC_TYPE_APPLY = 13, - MPC_TYPE_APPLY_TO = 14, - MPC_TYPE_NOT = 15, - MPC_TYPE_MAYBE = 16, - MPC_TYPE_MANY = 17, - MPC_TYPE_MANY1 = 18, - MPC_TYPE_COUNT = 19, - - MPC_TYPE_ELSE = 20, - MPC_TYPE_ALSO = 21, - MPC_TYPE_OR = 22, - MPC_TYPE_AND = 23, + MPC_TYPE_SOI = 6, + MPC_TYPE_EOI = 7, + MPC_TYPE_ANY = 8, + MPC_TYPE_SINGLE = 9, + MPC_TYPE_ONEOF = 10, + MPC_TYPE_NONEOF = 11, + MPC_TYPE_RANGE = 12, + MPC_TYPE_SATISFY = 13, + MPC_TYPE_STRING = 14, + + MPC_TYPE_APPLY = 15, + MPC_TYPE_APPLY_TO = 16, + MPC_TYPE_NOT = 17, + MPC_TYPE_MAYBE = 18, + MPC_TYPE_MANY = 19, + MPC_TYPE_MANY1 = 20, + MPC_TYPE_COUNT = 21, + + MPC_TYPE_ELSE = 22, + MPC_TYPE_ALSO = 23, + MPC_TYPE_OR = 24, + MPC_TYPE_AND = 25, }; typedef struct { mpc_lift_t lf; void* x; } mpc_pdata_lift_t; @@ -480,6 +490,7 @@ typedef union { struct mpc_parser_t { bool retained; + char* name; uint8_t type; mpc_pdata_t data; }; @@ -514,8 +525,12 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { if (p->type == MPC_TYPE_LIFT_VAL) { MPC_SUCCESS(p->data.lift.x); } /* Basic Parsers */ - + char* s = NULL; + + if (p->type == MPC_TYPE_SOI) { if (mpc_input_soi(i)) { MPC_SUCCESS(NULL) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "start of input")); } } + if (p->type == MPC_TYPE_EOI) { if (mpc_input_eoi(i)) { MPC_SUCCESS(NULL) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "end of input")); } } + if (p->type == MPC_TYPE_ANY) { MPC_TRY(s, mpc_input_any(i, &s)); } if (p->type == MPC_TYPE_SINGLE) { MPC_TRY(s, mpc_input_char(i, p->data.single.x, &s)); } if (p->type == MPC_TYPE_RANGE) { MPC_TRY(s, mpc_input_range(i, p->data.range.x, p->data.range.y, &s)); } @@ -805,40 +820,42 @@ static void mpc_undefine_unretained(mpc_parser_t* p, bool force) { default: break; } - if (!force) { free(p); } + if (!force) { + free(p->name); + free(p); + } } void mpc_delete(mpc_parser_t* p) { - if (p->retained) { if (p->type != MPC_TYPE_UNDEFINED) { fprintf(stderr, "\nError: Parser still Defined! Use `mpc_undefine` before delete!\n"); abort(); } else { + free(p->name); free(p); } } else { - - mpc_undefine_unretained(p, false); - + mpc_undefine_unretained(p, false); } - - } static mpc_parser_t* mpc_undefined(void) { mpc_parser_t* p = calloc(1, sizeof(mpc_parser_t)); p->retained = false; p->type = MPC_TYPE_UNDEFINED; + p->name = NULL; return p; } -mpc_parser_t* mpc_new(void) { +mpc_parser_t* mpc_new(const char* name) { mpc_parser_t* p = mpc_undefined(); p->retained = true; + p->name = realloc(p->name, strlen(name) + 1); + strcpy(p->name, name); return p; } @@ -849,12 +866,54 @@ mpc_parser_t* mpc_undefine(mpc_parser_t* p) { } mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a) { + p->type = a->type; p->data = a->data; + free(a); return p; } +static void mpc_delete_all_va(int n, va_list va) { + int i; + for (i = 0; i < n; i++) { + mpc_delete(va_arg(va, mpc_parser_t*)); + } +} + +static void mpc_undefine_all_va(int n, va_list va) { + int i; + for (i = 0; i < n; i++) { + mpc_undefine(va_arg(va, mpc_parser_t*)); + } +} + +static void mpc_delete_all(int n, ...) { + va_list va; + va_start(va, n); + mpc_delete_all_va(n, va); + va_end(va); +} + +static void mpc_undefine_all(int n, ...) { + va_list va; + va_start(va, n); + mpc_undefine_all_va(n, va); + va_end(va); +} + +void mpc_cleanup(int n, ...) { + va_list va; + va_start(va, n); + mpc_cleanup_va(n, va); + va_end(va); +} + +void mpc_cleanup_va(int n, va_list va) { + mpc_undefine_all_va(n, va); + mpc_delete_all_va(n, va); +} + mpc_parser_t* mpc_pass(void) { mpc_parser_t* p = mpc_undefined(); p->type = MPC_TYPE_PASS; @@ -890,6 +949,7 @@ mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected) { return p; } + /* ** Basic Parsers */ @@ -1152,13 +1212,24 @@ mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { ** Common Parsers */ +mpc_parser_t* mpc_eoi(void) { + mpc_parser_t* p = mpc_undefined(); + p->type = MPC_TYPE_EOI; + return p; +} + +mpc_parser_t* mpc_soi(void) { + mpc_parser_t* p = mpc_undefined(); + p->type = MPC_TYPE_SOI; + return p; +} + mpc_parser_t* mpc_space(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "space"); } mpc_parser_t* mpc_spaces(void) { return mpc_expect(mpc_many(mpc_space(), mpcf_strfold), "spaces"); } mpc_parser_t* mpc_whitespace(void) { return mpc_expect(mpc_apply(mpc_spaces(), mpcf_free), "whitespace"); } mpc_parser_t* mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } mpc_parser_t* mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } -mpc_parser_t* mpc_eoi(void) { return mpc_expect(mpc_char('\0'), "end of input"); } mpc_parser_t* mpc_escape(void) { return mpc_also(mpc_char('\\'), mpc_any(), free, mpcf_strfold); } mpc_parser_t* mpc_digit(void) { return mpc_expect(mpc_oneof("012345689"), "digit"); } @@ -1170,7 +1241,7 @@ mpc_parser_t* mpc_octdigits(void) { return mpc_expect(mpc_many1(mpc_octdigit(), mpc_parser_t* mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } mpc_parser_t* mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } -mpc_parser_t* mpc_alpha(void) { return mpc_expect(mpc_else(mpc_lower(), mpc_upper()), "letter"); } +mpc_parser_t* mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } mpc_parser_t* mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } mpc_parser_t* mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } @@ -1219,10 +1290,8 @@ mpc_parser_t* mpc_regex_lit(void) { } mpc_parser_t* mpc_ident(void) { - mpc_parser_t* p0 = mpc_else(mpc_alpha(), mpc_underscore()); mpc_parser_t* p1 = mpc_many_else(mpc_alphanum(), mpcf_strfold, mpcf_lift_emptystr); - return mpc_also(p0, p1, free, mpcf_strfold); } @@ -1230,11 +1299,17 @@ mpc_parser_t* mpc_ident(void) { ** Useful Parsers */ -mpc_parser_t* mpc_ends(mpc_parser_t* a, mpc_dtor_t da) { return mpc_also(a, mpc_eoi(), da, mpcf_fst_free); } +mpc_parser_t* mpc_start(mpc_parser_t* a) { return mpc_also(mpc_soi(), a, mpcf_dtor_null, mpcf_snd); } +mpc_parser_t* mpc_end(mpc_parser_t* a, mpc_dtor_t da) { return mpc_also(a, mpc_eoi(), da, mpcf_fst); } +mpc_parser_t* mpc_enclose(mpc_parser_t* a, mpc_dtor_t da) { return mpc_and(3, mpcf_asnd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } + mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f) { return mpc_many(a, f); } mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f) { return mpc_many1(a, f); } + +mpc_parser_t* mpc_strip(mpc_parser_t* a) { return mpc_and(3, mpcf_asnd, mpc_whitespace(), a, mpc_whitespace(), mpcf_dtor_null, mpcf_dtor_null); } mpc_parser_t* mpc_tok(mpc_parser_t* a) { return mpc_also(a, mpc_whitespace(), mpcf_dtor_null, mpcf_fst); } mpc_parser_t* mpc_sym(const char* s) { return mpc_tok(mpc_string(s)); } +mpc_parser_t* mpc_total(mpc_parser_t* a, mpc_dtor_t da) { return mpc_enclose(mpc_strip(a), da); } mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c) { return mpc_and(3, mpcf_between_free, @@ -1247,6 +1322,17 @@ mpc_parser_t* mpc_braces(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between( mpc_parser_t* mpc_brackets(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } +mpc_parser_t* mpc_tok_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c) { + return mpc_and(3, mpcf_between_free, + mpc_sym(o), a, mpc_sym(c), + free, ad); +} + +mpc_parser_t* mpc_tok_parens(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } +mpc_parser_t* mpc_tok_braces(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } +mpc_parser_t* mpc_tok_brackets(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } +mpc_parser_t* mpc_tok_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } + /* ** Regular Expression Parsers */ @@ -1293,7 +1379,6 @@ mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between( ** ** : ** | "\" -** | "." ** | "(" ")" ** | "[" "]" */ @@ -1324,6 +1409,7 @@ static mpc_val_t* mpc_re_escape(mpc_val_t* x) { if (s[0] == '.') { free(x); return mpc_any(); } if (s[0] == '$') { free(x); return mpc_eoi(); } + if (s[0] == '^') { free(x); return mpc_soi(); } if (s[0] == '\\') { @@ -1344,20 +1430,6 @@ static mpc_val_t* mpc_re_escape(mpc_val_t* x) { } -static char* mpc_re_unescape(char c) { - - if (c == 'a') { return "\a"; } - else if (c == 'b') { return "\b"; } - else if (c == 'f') { return "\f"; } - else if (c == 'n') { return "\n"; } - else if (c == 'r') { return "\r"; } - else if (c == 't') { return "\t"; } - else if (c == 'v') { return "\v"; } - else if (c == '0') { return "\0"; } - else { return (char[]){ c, '\0' }; } - -} - static mpc_val_t* mpc_re_range(mpc_val_t* x) { char* s = x; @@ -1423,11 +1495,11 @@ static mpc_val_t* mpc_re_lift(void) { mpc_parser_t* mpc_re(const char* re) { - mpc_parser_t* Regex = mpc_new(); - mpc_parser_t* Term = mpc_new(); - mpc_parser_t* Factor = mpc_new(); - mpc_parser_t* Base = mpc_new(); - mpc_parser_t* Range = mpc_new(); + mpc_parser_t* Regex = mpc_new("regex"); + mpc_parser_t* Term = mpc_new("term"); + mpc_parser_t* Factor = mpc_new("factor"); + mpc_parser_t* Base = mpc_new("base"); + mpc_parser_t* Range = mpc_new("range"); mpc_define(Regex, mpc_else( mpc_and(3, mpc_re_afold_or, Term, mpc_char('|'), Regex, mpc_delete, free), @@ -1444,10 +1516,9 @@ mpc_parser_t* mpc_re(const char* re) { Base )); - mpc_define(Base, mpc_or(5, + mpc_define(Base, mpc_or(4, mpc_parens(Regex, (mpc_dtor_t)mpc_delete), mpc_squares(Range, (mpc_dtor_t)mpc_delete), - mpc_apply(mpc_oneof(".$"), mpc_re_escape), mpc_apply(mpc_escape(), mpc_re_escape), mpc_apply(mpc_noneof(")|"), mpc_re_escape) )); @@ -1457,34 +1528,26 @@ mpc_parser_t* mpc_re(const char* re) { mpc_re_range )); + mpc_parser_t* RegexEnclose = mpc_enclose(Regex, (mpc_dtor_t)mpc_delete); + mpc_result_t r; - bool res = mpc_parse("", re, mpc_ends(Regex, (mpc_dtor_t)mpc_delete), &r); - - mpc_undefine(Regex); - mpc_undefine(Term); - mpc_undefine(Factor); - mpc_undefine(Base); - mpc_undefine(Range); - - mpc_delete(Regex); - mpc_delete(Term); - mpc_delete(Factor); - mpc_delete(Base); - mpc_delete(Range); - - if (res) { - return r.output; - } else { + if(!mpc_parse("", re, RegexEnclose, &r)) { fprintf(stderr, "\nError Compiling Regex: '%s' ", re); mpc_err_print(r.error); - abort(); + abort(); } + mpc_delete(RegexEnclose); + mpc_cleanup(5, Regex, Term, Factor, Base, Range); + + return r.output; + } /* ** Common Fold Functions */ + void mpcf_dtor_null(mpc_val_t* x) { return; } @@ -1648,6 +1711,10 @@ mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x) { return t; } +mpc_val_t* mpcf_afst(int n, mpc_val_t** xs) { return xs[0]; } +mpc_val_t* mpcf_asnd(int n, mpc_val_t** xs) { return xs[1]; } +mpc_val_t* mpcf_atrd(int n, mpc_val_t** xs) { return xs[2]; } + mpc_val_t* mpcf_astrfold(int n, mpc_val_t** xs) { mpc_val_t* t = NULL; int i; @@ -1684,7 +1751,11 @@ mpc_val_t* mpcf_maths(int n, mpc_val_t** xs) { static void mpc_print_unretained(mpc_parser_t* p, bool force) { - if (p->retained && !force) { printf("

"); return; } + if (p->retained && !force) { + if (p->name) { printf("<%s>", p->name); } + else { printf(""); } + return; + } if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } if (p->type == MPC_TYPE_PASS) { printf(""); } @@ -1695,6 +1766,9 @@ static void mpc_print_unretained(mpc_parser_t* p, bool force) { /*mpc_print_unretained(p->data.expect.x, false);*/ } + if (p->type == MPC_TYPE_SOI) { printf(""); } + if (p->type == MPC_TYPE_EOI) { printf(""); } + if (p->type == MPC_TYPE_ANY) { printf(""); } if (p->type == MPC_TYPE_SATISFY) { printf("", p->data.satisfy.f); } @@ -1746,9 +1820,11 @@ static void mpc_print_unretained(mpc_parser_t* p, bool force) { } if (p->type == MPC_TYPE_ALSO) { + printf("("); mpc_print_unretained(p->data.also.x, false); printf(" "); mpc_print_unretained(p->data.also.y, false); + printf(")"); } if (p->type == MPC_TYPE_OR) { @@ -1763,12 +1839,14 @@ static void mpc_print_unretained(mpc_parser_t* p, bool force) { } if (p->type == MPC_TYPE_AND) { + printf("("); int i; for(i = 0; i < p->data.and.n-1; i++) { mpc_print_unretained(p->data.and.xs[i], false); printf(" "); } mpc_print_unretained(p->data.and.xs[p->data.and.n-1], false); + printf(")"); } } @@ -1819,7 +1897,6 @@ bool mpc_match(mpc_parser_t* p, const char* s, void* d, destructor(r.output); return true; } else { - printf("Failed!\n"); printf("Got "); printer(r.output); printf("\n"); printf("Expected "); printer(d); printf("\n"); destructor(r.output); @@ -1848,37 +1925,91 @@ void mpc_ast_delete(mpc_ast_t* a) { } free(a->children); + free(a->tag); free(a->contents); free(a); } -mpc_ast_t* mpc_ast_new(char* contents) { +static void mpc_ast_delete_no_children(mpc_ast_t* a) { + free(a->children); + free(a->tag); + free(a->contents); + free(a); +} + +mpc_ast_t* mpc_ast_new(const char* tag, const char* contents) { mpc_ast_t* a = malloc(sizeof(mpc_ast_t)); - a->tag = 0; - a->contents = calloc(1, 1); + + a->tag = malloc(strlen(tag) + 1); + strcpy(a->tag, tag); + + a->contents = malloc(strlen(contents) + 1); + strcpy(a->contents, contents); + a->children_num = 0; a->children = NULL; return a; } -mpc_ast_t* mpc_ast_empty(void) { - return mpc_ast_new(""); +mpc_ast_t* mpc_ast_build(int n, const char* tag, ...) { + + mpc_ast_t* a = mpc_ast_new(tag, ""); + + va_list va; + va_start(va, tag); + + int i; + for (i = 0; i < n; i++) { + mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); + } + + va_end(va); + + return a; + +} + +mpc_ast_t* mpc_ast_insert_root(mpc_ast_t* a) { + + if (a == NULL) { return a; } + if (a->children_num == 0) { return a; } + if (a->children_num == 1) { return a; } + + mpc_ast_t* r = mpc_ast_new("root", ""); + mpc_ast_add_child(r, a); + return r; +} + +bool mpc_ast_eq(mpc_ast_t* a, mpc_ast_t* b) { + + if (strcmp(a->tag, b->tag) != 0) { return false; } + if (strcmp(a->contents, b->contents) != 0) { return false; } + if (a->children_num != b->children_num) { return false; } + + int i; + for (i = 0; i < a->children_num; i++) { + if (!mpc_ast_eq(a->children[i], b->children[i])) { return false; } + } + + return true; } void mpc_ast_add_child(mpc_ast_t* r, mpc_ast_t* a) { - a->children_num++; - a->children = realloc(a->children, sizeof(mpc_ast_t*) * a->children_num); - a->children[a->children_num-1] = a; + if (a == NULL || r == NULL) { return; } + + r->children_num++; + r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); + r->children[r->children_num-1] = a; } -mpc_ast_t* mpc_ast_tag(mpc_ast_t* a, int t) { - a->tag = t; - return a; +void mpc_ast_tag(mpc_ast_t* a, const char* t) { + a->tag = realloc(a->tag, strlen(t) + 1); + strcpy(a->tag, t); } static void mpc_ast_print_depth(mpc_ast_t* a, int d) { @@ -1886,7 +2017,12 @@ static void mpc_ast_print_depth(mpc_ast_t* a, int d) { int i; for (i = 0; i < d; i++) { printf("\t"); } - printf("-> %s", a->contents); + if (strlen(a->contents)) { + printf("<%s> '%s'\n", a->tag, a->contents); + } else { + printf("<%s>\n", a->tag); + } + for (i = 0; i < a->children_num; i++) { mpc_ast_print_depth(a->children[i], d+1); @@ -1900,45 +2036,60 @@ void mpc_ast_print(mpc_ast_t* a) { mpc_val_t* mpcf_fold_ast(mpc_val_t* a, mpc_val_t* b) { - if (a == NULL) { return b; } - if (b == NULL) { return a; } + int i; + mpc_ast_t* r = mpc_ast_new("", ""); + mpc_ast_t* x = a; + mpc_ast_t* y = b; + + if (x && x->children_num > 0) { + for (i = 0; i < x->children_num; i++) { + mpc_ast_add_child(r, x->children[i]); + } + mpc_ast_delete_no_children(x); + } else if (x && x->children_num == 0) { mpc_ast_add_child(r, x); } + + if (y && y->children_num > 0) { + for (i = 0; i < y->children_num; i++) { + mpc_ast_add_child(r, y->children[i]); + } + mpc_ast_delete_no_children(y); + } else if (y && y->children_num == 0) { mpc_ast_add_child(r, y); } - mpc_ast_t* r = mpc_ast_empty(); - mpc_ast_add_child(r, a); - mpc_ast_add_child(r, b); return r; } mpc_val_t* mpcf_afold_ast(int n, mpc_val_t** as) { mpc_val_t* t = NULL; + int i; for (i = 0; i < n; i++) { - t = mpcf_fold_ast(t, as[i]); + mpcf_fold_ast(t, as[i]); } return t; } mpc_val_t* mpcf_apply_str_ast(mpc_val_t* c) { - mpc_ast_t* a = mpc_ast_new(c); + mpc_ast_t* a = mpc_ast_new("", c); free(c); return a; } -mpc_val_t* mpcf_lift_ast(void) { - return mpc_ast_empty(); +static mpc_val_t* mpcf_apply_tag(mpc_val_t* x, void* d) { + mpc_ast_tag(x, d); + return x; } -mpc_parser_t* mpc_ast(mpc_parser_t* a) { - return mpc_apply(a, mpcf_apply_str_ast); +mpc_parser_t* mpca_tag(mpc_parser_t* a, const char* t) { + return mpc_apply_to(a, mpcf_apply_tag, (void*)t); } -mpc_parser_t* mpca_not(mpc_parser_t* a) { return mpc_not_else(a, (mpc_dtor_t)mpc_ast_delete, mpcf_lift_ast); } -mpc_parser_t* mpca_maybe(mpc_parser_t* a) { return mpc_maybe_else(a, mpcf_lift_ast); } -mpc_parser_t* mpca_many(mpc_parser_t* a) { return mpc_many_else(a, mpcf_fold_ast, mpcf_lift_ast); } +mpc_parser_t* mpca_not(mpc_parser_t* a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } +mpc_parser_t* mpca_maybe(mpc_parser_t* a) { return mpc_maybe(a); } +mpc_parser_t* mpca_many(mpc_parser_t* a) { return mpc_many(a, mpcf_fold_ast); } mpc_parser_t* mpca_many1(mpc_parser_t* a) { return mpc_many1(a, mpcf_fold_ast); } -mpc_parser_t* mpca_count(mpc_parser_t* a, int n) { return mpc_count_else(a, (mpc_dtor_t)mpc_ast_delete, mpcf_fold_ast, n, mpcf_lift_ast); } +mpc_parser_t* mpca_count(mpc_parser_t* a, int n) { return mpc_count(a, (mpc_dtor_t)mpc_ast_delete, mpcf_fold_ast, n); } mpc_parser_t* mpca_else(mpc_parser_t* a, mpc_parser_t* b) { return mpc_else(a, b); } mpc_parser_t* mpca_also(mpc_parser_t* a, mpc_parser_t* b) { return mpc_also(a, b, (mpc_dtor_t)mpc_ast_delete, mpcf_fold_ast); } mpc_parser_t* mpca_bind(mpc_parser_t* a, mpc_parser_t* b) { return mpca_also(a, b); } @@ -1946,7 +2097,7 @@ mpc_parser_t* mpca_bind(mpc_parser_t* a, mpc_parser_t* b) { return mpca_also(a, mpc_parser_t* mpca_or(int n, ...) { va_list va; va_start(va, n); - mpc_parser_t* p = mpc_ast(mpc_or_va(n, va)); + mpc_parser_t* p = mpc_or_va(n, va); va_end(va); return p; } @@ -1974,10 +2125,10 @@ mpc_parser_t* mpca_and(int n, ...) { va_end(va); - return mpc_ast(p); + return p; } -mpc_parser_t* mpca_ends(mpc_parser_t* a) { return mpc_ends(a, (mpc_dtor_t)mpc_ast_delete); } +mpc_parser_t* mpca_total(mpc_parser_t* a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } /* ** Grammar Parser @@ -2016,7 +2167,7 @@ mpc_parser_t* mpca_ends(mpc_parser_t* a) { return mpc_ends(a, (mpc_dtor_t)mpc_as ** | "?" ** | "{" "}" ** -** : "<" ">" +** : "<" ( | ) ">" ** | ** | ** | @@ -2028,14 +2179,14 @@ static mpc_val_t* mpca_grammar_afold_or(int n, mpc_val_t** xs) { return mpca_else(xs[0], xs[2]); } -static mpc_val_t* mpc_grammar_fold_many(mpc_val_t* x, mpc_val_t* y) { +static mpc_val_t* mpca_grammar_fold_many(mpc_val_t* x, mpc_val_t* y) { if (x == NULL) { return y; } if (y == NULL) { return x; } return mpca_also(x, y); } static mpc_val_t* mpca_grammar_lift(void) { - return mpc_lift(mpcf_lift_ast); + return mpc_pass(); } static mpc_val_t* mpca_grammar_fold_repeat(mpc_val_t* x, mpc_val_t* y) { @@ -2047,19 +2198,25 @@ static mpc_val_t* mpca_grammar_fold_repeat(mpc_val_t* x, mpc_val_t* y) { return mpca_count(x, n); } -static mpc_val_t* mpc_grammar_apply_string(mpc_val_t* x) { - return mpc_ast(mpc_string(mpcf_unescape(x))); +static mpc_val_t* mpca_grammar_apply_string(mpc_val_t* x) { + char* y = mpcf_unescape(x); + mpc_parser_t* p = mpc_tok(mpc_string(y)); + free(y); + return mpca_tag(mpc_apply(p, mpcf_apply_str_ast), "string"); } -static mpc_val_t* mpc_grammar_apply_char(mpc_val_t* x) {; - return mpc_ast(mpc_char(*(char*)mpcf_unescape(x))); +static mpc_val_t* mpca_grammar_apply_char(mpc_val_t* x) { + char* y = mpcf_unescape(x); + mpc_parser_t* p = mpc_tok(mpc_char(*(char*)y)); + free(y); + return mpca_tag(mpc_apply(p, mpcf_apply_str_ast), "char"); } -static mpc_val_t* mpc_grammar_apply_regex(mpc_val_t* x) { +static mpc_val_t* mpca_grammar_apply_regex(mpc_val_t* x) { /* TODO: Unescape Regex */ - mpc_parser_t* p = mpc_ast(mpc_re(x)); + mpc_parser_t* p = mpc_tok(mpc_re(x)); free(x); - return p; + return mpca_tag(mpc_apply(p, mpcf_apply_str_ast), "regex"); } typedef struct { @@ -2068,81 +2225,119 @@ typedef struct { mpc_parser_t** parsers; } mpc_grammar_st_t; -static mpc_val_t* mpc_grammar_apply_id(mpc_val_t* x, void* y) { - int i = *((int*)x); - mpc_grammar_st_t* st = y; +static mpc_val_t* mpca_grammar_apply_id(mpc_val_t* x, void* y) { + + /* Case of Number */ + if (strstr("0123456789", x)) { + + int i = strtol(x, NULL, 10); + mpc_grammar_st_t* st = y; + + while (st->parsers_num <= i) { + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); + } + + return mpc_apply(st->parsers[i], (mpc_apply_t)mpc_ast_insert_root); + + /* Case of Identifier */ + } else { + + int i; + mpc_grammar_st_t* st = y; + + /* Search Existing Parsers */ + for (i = 0; i < st->parsers_num; i++) { + mpc_parser_t* p = st->parsers[i]; + if (p->name && strcmp(p->name, x) == 0) { + return mpc_apply(mpca_tag(p, p->name), (mpc_apply_t)mpc_ast_insert_root); + } + } + + /* Search New Parsers */ + while (true) { + + mpc_parser_t* p = va_arg(*st->va, mpc_parser_t*); + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = p; + + if (p == NULL) { + fprintf(stderr, "Error: Unable to find specified parser '%s' in arguments\n", (char*)x); + abort(); + } + + if (p->name && strcmp(p->name, x) == 0) { + return mpc_apply(mpca_tag(p, p->name), (mpc_apply_t)mpc_ast_insert_root); + } + + } - while (st->parsers_num <= i) { - st->parsers_num++; - st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); - st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); } - return st->parsers[i]; + } static void mpc_soft_delete(mpc_val_t* x) { mpc_undefine_unretained(x, false); } +static mpc_val_t* mpcf_make_root(mpc_val_t* x) { + return mpca_tag(x, "root"); +} + mpc_parser_t* mpca_grammar(const char* grammar, ...) { - mpc_parser_t* Grammar = mpc_new(); - mpc_parser_t* Term = mpc_new(); - mpc_parser_t* Factor = mpc_new(); - mpc_parser_t* Base = mpc_new(); + mpc_parser_t* Grammar = mpc_new("grammar"); + mpc_parser_t* Term = mpc_new("term"); + mpc_parser_t* Factor = mpc_new("factor"); + mpc_parser_t* Base = mpc_new("base"); mpc_define(Grammar, mpc_else( mpc_and(3, mpca_grammar_afold_or, Term, mpc_sym("|"), Grammar, mpc_soft_delete, free), Term )); - mpc_define(Term, mpc_many_else(Factor, mpc_grammar_fold_many, mpca_grammar_lift)); + mpc_define(Term, mpc_many_else(Factor, mpca_grammar_fold_many, mpca_grammar_lift)); mpc_define(Factor, mpc_or(5, mpc_also(Base, mpc_sym("*"), mpc_soft_delete, mpca_grammar_fold_repeat), mpc_also(Base, mpc_sym("+"), mpc_soft_delete, mpca_grammar_fold_repeat), mpc_also(Base, mpc_sym("?"), mpc_soft_delete, mpca_grammar_fold_repeat), - mpc_also(Base, mpc_tok(mpc_brackets(mpc_int(), free)), mpc_soft_delete, mpca_grammar_fold_repeat), + mpc_also(Base, mpc_tok_brackets(mpc_tok(mpc_int()), free), mpc_soft_delete, mpca_grammar_fold_repeat), Base )); va_list va; va_start(va, grammar); - mpc_grammar_st_t st = { &va, 0, NULL }; mpc_define(Base, mpc_or(5, - mpc_apply(mpc_tok(mpc_string_lit()), mpc_grammar_apply_string), - mpc_apply(mpc_tok(mpc_char_lit()), mpc_grammar_apply_char), - mpc_apply(mpc_tok(mpc_regex_lit()), mpc_grammar_apply_regex), - mpc_apply_to(mpc_tok(mpc_braces(mpc_int(), free)), mpc_grammar_apply_id, &st), - mpc_tok(mpc_parens(Grammar, mpc_soft_delete)) + mpc_apply(mpc_tok(mpc_string_lit()), mpca_grammar_apply_string), + mpc_apply(mpc_tok(mpc_char_lit()), mpca_grammar_apply_char), + mpc_apply(mpc_tok(mpc_regex_lit()), mpca_grammar_apply_regex), + mpc_apply_to(mpc_tok_braces(mpc_tok(mpc_else(mpc_digits(), mpc_ident())), free), mpca_grammar_apply_id, &st), + mpc_tok_parens(Grammar, mpc_soft_delete) )); + mpc_parser_t* GrammarTotal = mpc_apply(mpc_total(Grammar, mpc_soft_delete), mpcf_make_root); + mpc_result_t r; - bool res = mpc_parse("", grammar, mpc_ends(Grammar, mpc_soft_delete), &r); + if(!mpc_parse("", grammar, GrammarTotal, &r)) { + fprintf(stderr, "\nError Compiling Grammar: '%s' ", grammar); + mpc_err_print(r.error); + abort(); + } + mpc_delete(GrammarTotal); + free(st.parsers); va_end(va); - mpc_undefine(Grammar); - mpc_undefine(Term); - mpc_undefine(Factor); - mpc_undefine(Base); + mpc_cleanup(4, Grammar, Term, Factor, Base); - mpc_delete(Grammar); - mpc_delete(Term); - mpc_delete(Factor); - mpc_delete(Base); - - if (res) { - return r.output; - } else { - fprintf(stderr, "\nError Compiling Grammar: '%s' ", grammar); - mpc_err_print(r.error); - abort(); - } + return r.output; } diff --git a/mpc.h b/mpc.h index 6265407..573c400 100644 --- a/mpc.h +++ b/mpc.h @@ -64,21 +64,23 @@ typedef mpc_val_t*(*mpc_lift_t)(void); */ void mpc_delete(mpc_parser_t* p); -mpc_parser_t* mpc_new(void); +mpc_parser_t* mpc_new(const char* name); mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a); mpc_parser_t* mpc_undefine(mpc_parser_t* p); -mpc_parser_t* mpc_pass(void); -mpc_parser_t* mpc_fail(void); -mpc_parser_t* mpc_lift(mpc_lift_t f); -mpc_parser_t* mpc_lift_val(mpc_val_t* x); -mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); +void mpc_cleanup(int n, ...); +void mpc_cleanup_va(int n, va_list va); /* ** Basic Parsers */ +mpc_parser_t* mpc_pass(void); +mpc_parser_t* mpc_fail(void); +mpc_parser_t* mpc_lift(mpc_lift_t f); +mpc_parser_t* mpc_lift_val(mpc_val_t* x); + mpc_parser_t* mpc_any(void); mpc_parser_t* mpc_char(char c); mpc_parser_t* mpc_range(char s, char e); @@ -91,6 +93,7 @@ mpc_parser_t* mpc_string(const char* s); ** Core Parsers */ +mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f); mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x); mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da); @@ -114,13 +117,15 @@ mpc_parser_t* mpc_and_va(int n, mpc_afold_t f, va_list va); ** Common Parsers */ +mpc_parser_t* mpc_eoi(void); +mpc_parser_t* mpc_soi(void); + mpc_parser_t* mpc_space(void); mpc_parser_t* mpc_spaces(void); mpc_parser_t* mpc_whitespace(void); mpc_parser_t* mpc_newline(void); mpc_parser_t* mpc_tab(void); -mpc_parser_t* mpc_eoi(void); mpc_parser_t* mpc_escape(void); mpc_parser_t* mpc_digit(void); @@ -151,6 +156,7 @@ mpc_parser_t* mpc_dot(void); mpc_parser_t* mpc_char_lit(void); mpc_parser_t* mpc_string_lit(void); +mpc_parser_t* mpc_regex_lit(void); mpc_parser_t* mpc_ident(void); @@ -158,17 +164,29 @@ mpc_parser_t* mpc_ident(void); ** Useful Parsers */ -mpc_parser_t* mpc_ends(mpc_parser_t* a, mpc_dtor_t da); +mpc_parser_t* mpc_start(mpc_parser_t* a); +mpc_parser_t* mpc_end(mpc_parser_t* a, mpc_dtor_t da); +mpc_parser_t* mpc_enclose(mpc_parser_t* a, mpc_dtor_t da); + mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f); mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f); + +mpc_parser_t* mpc_strip(mpc_parser_t* a); mpc_parser_t* mpc_tok(mpc_parser_t* a); mpc_parser_t* mpc_sym(const char* s); +mpc_parser_t* mpc_total(mpc_parser_t* a, mpc_dtor_t da); + mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c); mpc_parser_t* mpc_parens(mpc_parser_t* a, mpc_dtor_t ad); mpc_parser_t* mpc_braces(mpc_parser_t* a, mpc_dtor_t ad); mpc_parser_t* mpc_brackets(mpc_parser_t* a, mpc_dtor_t ad); mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_tok_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c); +mpc_parser_t* mpc_tok_parens(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_tok_braces(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_tok_brackets(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_tok_squares(mpc_parser_t* a, mpc_dtor_t ad); /* ** Regular Expression Parsers @@ -201,6 +219,10 @@ mpc_val_t* mpcf_snd_free(mpc_val_t* x, mpc_val_t* y); mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x); mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x); +mpc_val_t* mpcf_afst(int n, mpc_val_t** xs); +mpc_val_t* mpcf_asnd(int n, mpc_val_t** xs); +mpc_val_t* mpcf_atrd(int n, mpc_val_t** xs); + mpc_val_t* mpcf_astrfold(int n, mpc_val_t** xs); mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs); mpc_val_t* mpcf_maths(int n, mpc_val_t** xs); @@ -212,48 +234,34 @@ mpc_val_t* mpcf_maths(int n, mpc_val_t** xs); void mpc_print(mpc_parser_t* p); - -/* -** Testing -*/ - -bool mpc_unmatch(mpc_parser_t* p, const char* s, void* d, - bool(*tester)(void*, void*), - mpc_dtor_t destructor, - void(*printer)(void*)); - -bool mpc_match(mpc_parser_t* p, const char* s, void* d, - bool(*tester)(void*, void*), - mpc_dtor_t destructor, - void(*printer)(void*)); - /* ** AST */ typedef struct mpc_ast_t { - int tag; + char* tag; char* contents; int children_num; struct mpc_ast_t** children; } mpc_ast_t; void mpc_ast_delete(mpc_ast_t* a); -mpc_ast_t* mpc_ast_empty(void); -mpc_ast_t* mpc_ast_new(char* contents); +mpc_ast_t* mpc_ast_new(const char* tag, const char* contents); +mpc_ast_t* mpc_ast_build(int n, const char* tag, ...); +mpc_ast_t* mpc_ast_insert_root(mpc_ast_t* a); void mpc_ast_add_child(mpc_ast_t* r, mpc_ast_t* a); -mpc_ast_t* mpc_ast_tag(mpc_ast_t* a, int t); +void mpc_ast_tag(mpc_ast_t* a, const char* t); void mpc_ast_print(mpc_ast_t* a); +bool mpc_ast_eq(mpc_ast_t* a, mpc_ast_t* b); mpc_val_t* mpcf_fold_ast(mpc_val_t* a, mpc_val_t* b); mpc_val_t* mpcf_afold_ast(int n, mpc_val_t** as); mpc_val_t* mpcf_apply_str_ast(mpc_val_t* c); -mpc_val_t* mpcf_lift_ast(void); - -mpc_parser_t* mpc_ast(mpc_parser_t* a); +mpc_parser_t* mpca_tag(mpc_parser_t* a, const char* t); +mpc_parser_t* mpca_total(mpc_parser_t* a); mpc_parser_t* mpca_not(mpc_parser_t* a); mpc_parser_t* mpca_maybe(mpc_parser_t* a); mpc_parser_t* mpca_many(mpc_parser_t* a); @@ -264,7 +272,21 @@ mpc_parser_t* mpca_also(mpc_parser_t* a, mpc_parser_t* b); mpc_parser_t* mpca_bind(mpc_parser_t* a, mpc_parser_t* b); mpc_parser_t* mpca_or(int n, ...); mpc_parser_t* mpca_and(int n, ...); -mpc_parser_t* mpca_ends(mpc_parser_t* a); mpc_parser_t* mpca_grammar(const char* grammar, ...); + +/* +** Testing +*/ + +bool mpc_unmatch(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, + void(*printer)(void*)); + +bool mpc_match(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, + void(*printer)(void*)); + #endif diff --git a/tests/core.c b/tests/core.c index 9fdf062..0e65dbe 100644 --- a/tests/core.c +++ b/tests/core.c @@ -13,7 +13,7 @@ void test_ident(void) { /* ^[a-zA-Z_][a-zA-Z0-9_]*$ */ - mpc_parser_t* Ident = mpc_ends( + mpc_parser_t* Ident = mpc_enclose( mpc_also( mpc_else(mpc_alpha(), mpc_underscore()), mpc_many1(mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit()), mpcf_strfold), @@ -35,10 +35,10 @@ void test_ident(void) { void test_maths(void) { - mpc_parser_t* Expr = mpc_new(); - mpc_parser_t* Factor = mpc_new(); - mpc_parser_t* Term = mpc_new(); - mpc_parser_t* Maths = mpc_new(); + mpc_parser_t* Expr = mpc_new("expr"); + mpc_parser_t* Factor = mpc_new("factor"); + mpc_parser_t* Term = mpc_new("term"); + mpc_parser_t* Maths = mpc_new("maths"); mpc_define(Expr, mpc_else( mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), @@ -55,7 +55,7 @@ void test_maths(void) { mpc_parens(Expr, free) )); - mpc_define(Maths, mpc_ends(Expr, free)); + mpc_define(Maths, mpc_enclose(Expr, free)); PT_ASSERT(mpc_match(Maths, "1", (int[]){ 1 }, int_eq, free, int_print)); PT_ASSERT(mpc_match(Maths, "(5)", (int[]){ 5 }, int_eq, free, int_print)); @@ -63,16 +63,7 @@ void test_maths(void) { PT_ASSERT(mpc_unmatch(Maths, "a", (int[]){ 0 }, int_eq, free, int_print)); PT_ASSERT(mpc_unmatch(Maths, "2b+4", (int[]){ 2 }, int_eq, free, int_print)); - mpc_undefine(Expr); - mpc_undefine(Factor); - mpc_undefine(Term); - mpc_undefine(Maths); - - mpc_delete(Expr); - mpc_delete(Factor); - mpc_delete(Term); - mpc_delete(Maths); - + mpc_cleanup(4, Expr, Factor, Term, Maths); } void suite_core(void) { diff --git a/tests/grammar.c b/tests/grammar.c index d49d733..0507ccb 100644 --- a/tests/grammar.c +++ b/tests/grammar.c @@ -1,54 +1,56 @@ #include "ptest.h" #include "../mpc.h" -bool ast_eq(void* x, void* y) { - return false; -} - void test_grammar(void) { - mpc_parser_t* Test = mpc_new(); - - mpc_define(Test, mpca_grammar("'c'*")); - - mpc_print(Test); - - mpc_undefine(Test); - - mpc_delete(Test); - - mpc_parser_t* Expression = mpc_new(); - mpc_parser_t* Product = mpc_new(); - mpc_parser_t* Value = mpc_new(); - - mpc_define(Expression, mpca_grammar("<0> (('+' | '-') <0>)*", Product)); - mpc_define(Product, mpca_grammar("<0> (('*' | '/') <0>)*", Value)); - mpc_define(Value, mpca_grammar("/[0-9]/ | '(' <0> ')'", Expression)); - - mpc_print(Expression); - mpc_print(Product); - mpc_print(Value); - - mpc_ast_t* empty = mpc_ast_empty(); - - /* - PT_ASSERT(mpc_match(Expression, "1", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); - PT_ASSERT(mpc_match(Expression, "(5)", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); - PT_ASSERT(mpc_match(Expression, "(4*2)+5", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); - PT_ASSERT(mpc_match(Expression, "a", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); - PT_ASSERT(mpc_match(Expression, "2b+4", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); - */ - - mpc_ast_delete(empty); - - mpc_undefine(Expression); - mpc_undefine(Product); - mpc_undefine(Value); - - mpc_delete(Expression); - mpc_delete(Product); - mpc_delete(Value); - + mpc_parser_t* Expr = mpc_new("expression"); + mpc_parser_t* Prod = mpc_new("product"); + mpc_parser_t* Value = mpc_new("value"); + mpc_parser_t* Maths = mpc_new("maths"); + + mpc_define(Expr, mpca_grammar(" (('+' | '-') )* ", Prod)); + mpc_define(Prod, mpca_grammar(" (('*' | '/') )* ", Value)); + mpc_define(Value, mpca_grammar(" /[0-9]+/ | '(' ')' ", Expr)); + mpc_define(Maths, mpca_total(Expr)); + + mpc_ast_t* t0 = mpc_ast_build(1, "root", mpc_ast_new("value", "24")); + mpc_ast_t* t1 = mpc_ast_build(1, "root", + mpc_ast_build(3, "value", + mpc_ast_new("char", "("), + mpc_ast_new("value", "5"), + mpc_ast_new("char", ")"))); + + mpc_ast_t* t2 = mpc_ast_build(3, "root", + + mpc_ast_build(3, "value", + mpc_ast_new("char", "("), + mpc_ast_build(3, "expression", + + mpc_ast_build(5, "product", + mpc_ast_new("value", "4"), + mpc_ast_new("char", "*"), + mpc_ast_new("value", "2"), + mpc_ast_new("char", "*"), + mpc_ast_new("value", "11")), + + mpc_ast_new("char", "+"), + mpc_ast_new("value", "2")), + mpc_ast_new("char", ")")), + + mpc_ast_new("char", "+"), + mpc_ast_new("value", "5")); + + PT_ASSERT(mpc_match(Maths, " 24 ", t0, (bool(*)(void*,void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Maths, "(5)", t1, (bool(*)(void*,void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Maths, "(4 * 2 * 11 + 2) + 5", t2, (bool(*)(void*,void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_unmatch(Maths, "a", t0, (bool(*)(void*,void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_unmatch(Maths, "2b+4", t0, (bool(*)(void*,void*))mpc_ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + + mpc_ast_delete(t0); + mpc_ast_delete(t1); + mpc_ast_delete(t2); + + mpc_cleanup(4, Expr, Prod, Value, Maths); } void suite_grammar(void) { diff --git a/tests/regex.c b/tests/regex.c index 8288e5b..710f826 100644 --- a/tests/regex.c +++ b/tests/regex.c @@ -39,12 +39,9 @@ void test_regex_range(void) { mpc_parser_t* re0 = mpc_re("abg[abcdef]"); mpc_parser_t* re1 = mpc_re("y*[a-z]"); mpc_parser_t* re2 = mpc_re("zz(p+)?[A-Z_0\\]123]*"); - mpc_parser_t* re3 = mpc_re("[^56hy].*$"); - - mpc_print(re0); - mpc_print(re1); - mpc_print(re2); - mpc_print(re3); + mpc_parser_t* re3 = mpc_re("^[^56hy].*$"); + + /* TODO: Testing */ mpc_delete(re0); mpc_delete(re1);