Added flags to language specifiction. Added optional expect string to language specification. Added some exaple grammars for testing and demos
This commit is contained in:
9
Makefile
9
Makefile
@@ -1,13 +1,18 @@
|
||||
CC = gcc
|
||||
CFLAGS = -ansi -pedantic -Wall -Werror -g
|
||||
CFLAGS = -ansi -pedantic -Wall -Wno-overlength-strings -Werror -g
|
||||
|
||||
TESTS = $(wildcard tests/*.c)
|
||||
EXAMPLES = $(wildcard examples/*.c)
|
||||
EXAMPLESEXE = $(EXAMPLES:.c=)
|
||||
|
||||
all: check
|
||||
all: $(EXAMPLESEXE) check
|
||||
|
||||
check: $(TESTS) mpc.c
|
||||
$(CC) $(CFLAGS) $^ -lm -o test
|
||||
./test
|
||||
|
||||
examples/%: examples/%.c mpc.c
|
||||
$(CC) $(CFLAGS) $^ -lm -o $@
|
||||
|
||||
clean:
|
||||
rm test
|
||||
|
393
README.md
393
README.md
@@ -19,7 +19,7 @@ Features
|
||||
* Type-Generic
|
||||
* Predictive, Recursive Descent
|
||||
* Easy to Integrate (One Source File in ANSI C)
|
||||
* Error Messages
|
||||
* Automatic Error Message Generation
|
||||
* Regular Expression Parser Generator
|
||||
* Language/Grammar Parser Generator
|
||||
|
||||
@@ -27,7 +27,7 @@ Features
|
||||
Alternatives
|
||||
------------
|
||||
|
||||
The current main alternative in C based parser combinators is a branch of [Cesium3](https://github.com/wbhart/Cesium3/tree/combinators).
|
||||
The current main alternative for a C based parser combinator library is a branch of [Cesium3](https://github.com/wbhart/Cesium3/tree/combinators).
|
||||
|
||||
_mpc_ provides a number of features that this project does not offer, and also overcomes a number of potential downsides:
|
||||
|
||||
@@ -37,10 +37,10 @@ _mpc_ provides a number of features that this project does not offer, and also o
|
||||
* _mpc_ Doesn't pollute the namespace
|
||||
|
||||
|
||||
View From the Top
|
||||
=================
|
||||
Demonstration
|
||||
=============
|
||||
|
||||
In this example I create a parser for a basic maths language. The function `parse_maths` takes as input some mathematical expression and outputs an instance of `mpc_ast_t`.
|
||||
In the below example I create a parser for basic mathematical expressions.
|
||||
|
||||
```c
|
||||
#include "mpc.h"
|
||||
@@ -75,24 +75,27 @@ void parse_maths(const char *input) {
|
||||
}
|
||||
```
|
||||
|
||||
If you were to input `"(4 * 2 * 11 + 2) - 5"` into this function, the `mpc_ast_t` output would look something like this:
|
||||
If you were to input `"(4 * 2 * 11 + 2) - 5"` into this function, the output would look something like this:
|
||||
|
||||
```python
|
||||
```
|
||||
>:
|
||||
value|>:
|
||||
char: '('
|
||||
expression|>:
|
||||
product|>:
|
||||
value|regex: '4'
|
||||
char: '*'
|
||||
value|regex: '2'
|
||||
char: '*'
|
||||
value|regex: '11'
|
||||
char: '+'
|
||||
product|value|regex: '2'
|
||||
char: ')'
|
||||
char: '+'
|
||||
product|value|regex: '5'
|
||||
regex:
|
||||
expression|>:
|
||||
value|>:
|
||||
char: '('
|
||||
expression|>:
|
||||
product|>:
|
||||
value|regex: '4'
|
||||
char: '*'
|
||||
value|regex: '2'
|
||||
char: '*'
|
||||
value|regex: '11'
|
||||
char: '+'
|
||||
product|value|regex: '2'
|
||||
char: ')'
|
||||
char: '-'
|
||||
product|value|regex: '5'
|
||||
regex:
|
||||
```
|
||||
|
||||
Getting Started
|
||||
@@ -105,54 +108,14 @@ Parser Combinators are structures that encode how to parse a particular language
|
||||
|
||||
The trick behind Parser Combinators is the observation that by structuring the library in a particular way, one can make building parser combinators look like writing a grammar itself. Therefore instead of describing _how to parse a language_, a user must only specify _the language itself_, and the computer will work out how to parse it ... as if by magic!
|
||||
|
||||
As is shown in the above example _mpc_ takes this one step further, and actually allows you to specify the grammar directly, or to built up parsers using library functions.
|
||||
|
||||
|
||||
Parsing
|
||||
-------
|
||||
|
||||
The Parser Combinator type in _mpc_ is `mpc_parser_t`. This encodes a function that attempts to parse some string and, if successful, returns a pointer to some data. Otherwise it returns some error. A parser can be run using `mpc_parse`.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r);
|
||||
```
|
||||
|
||||
This function returns `1` on success and `0` on failure. It takes as input some parser `p`, some input `string`, and some `filename`. It outputs into `r` the result of the parse - which is either a pointer to some data object, or an error. The type `mpc_result_t` is a union type defined as follows.
|
||||
|
||||
```c
|
||||
typedef union {
|
||||
mpc_err_t *error;
|
||||
mpc_val_t *output;
|
||||
} mpc_result_t;
|
||||
```
|
||||
|
||||
where `mpc_val_t` is synonymous with `void *` and simply represents some pointer to data - the exact type of which is dependant on the parser. Some variations on the above also exist. For almost all of the built-in and basic parsers the return type for a successful parser will be `char *`.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r);
|
||||
```
|
||||
|
||||
Parses the contents of `file` with parser `p` and returns the result in `r`. Returns `1` on success and `0` on failure. This is also the correct method to parse input from pipes or streams.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse_contents(const char* filename, mpc_parser_t *p, mpc_result_t* r);
|
||||
```
|
||||
|
||||
Opens file `filename` and parsers contents with `p`. Returns result in `r`. Returns `1` on success and `0` on failure;
|
||||
|
||||
_mpc_ can be used in this mode, or, as shown in the above example, it can allow you to specify the grammar directly as a string or in a file.
|
||||
|
||||
Basic Parsers
|
||||
-------------
|
||||
|
||||
### String Parsers
|
||||
|
||||
All the following functions return basic parsers. All of those parsers return a newly allocated `char*` with the character(s) they manage to match or an error on failure. They have the following functionality.
|
||||
All the following functions construct new basic parsers of the type `mpc_parser_t *`. All of those parsers return a newly allocated `char *` with the character(s) they manage to match. If unsuccessful they will return an error. They have the following functionality.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -212,7 +175,7 @@ Matches exactly the string `s`
|
||||
|
||||
### Trivial Parsers
|
||||
|
||||
Several other functions exist that return basic parsers with some other special functionality.
|
||||
Several other functions exist that construct parsers with some other special functionality.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -256,10 +219,60 @@ mpc_parser_t *mpc_lift_val(mpc_val_t* x);
|
||||
Consumes no input, always successful, returns `x`
|
||||
|
||||
|
||||
Parsing
|
||||
-------
|
||||
|
||||
Once you've build a parser, you can run it on some input using one of the following functions. These functions return `1` on success and `0` on failure. They output either the result, or an error to a `mpc_result_t` variable. This type is defined as follows.
|
||||
|
||||
```c
|
||||
typedef union {
|
||||
mpc_err_t *error;
|
||||
mpc_val_t *output;
|
||||
} mpc_result_t;
|
||||
```
|
||||
|
||||
where `mpc_val_t *` is synonymous with `void *` and simply represents some pointer to data - the exact type of which is dependant on the parser. For almost all of the basic parsers the return type for a successful parser will be `char *`.
|
||||
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r);
|
||||
```
|
||||
|
||||
Run a parser on some string.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r);
|
||||
```
|
||||
|
||||
Run a parser on some file.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r);
|
||||
```
|
||||
|
||||
Run a parser on some pipe (such as `stdin`).
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
int mpc_parse_contents(const char* filename, mpc_parser_t *p, mpc_result_t* r);
|
||||
```
|
||||
|
||||
Run a parser on the contents of some file.
|
||||
|
||||
|
||||
Combinators
|
||||
-----------
|
||||
|
||||
Combinators are functions that take one or more parsers and return a new parser of some given functionality. These combinators work independent of exactly what data type those input parsers return on success. In languages such as Haskell ensuring you don't input one type of data into a parser requiring a different type of data is done by the compiler. But in C we don't have that luxury. So it is at the discretion of the programmer to ensure that he or she deals correctly with the outputs of different parser types.
|
||||
Combinators are functions that take one or more parsers and return a new parser of some given functionality.
|
||||
|
||||
These combinators work independently of exactly what data type the parsers given as input return on success. In languages such as Haskell ensuring you don't input one type of data into a parser requiring a different type of data is done by the compiler. But in C we don't have that luxury. So it is at the discretion of the programmer to ensure that he or she deals correctly with the outputs of different parser types.
|
||||
|
||||
A second annoyance in C is that of manual memory management. Some parsers might get half-way and then fail. This means they need to clean up any partial data that has been collected in the parse. In Haskell this is handled by the Garbage Collector, but in C these combinators will need to take _destructor_ functions as input, which say how clean up any partial data that has been collected.
|
||||
|
||||
@@ -352,11 +365,10 @@ Returns a parser that runs `a` with backtracking disabled. This means if `a` con
|
||||
Another way to think of `mpc_predictive` is that it can be applied to a parser (for a performance improvement) if either successfully parsing the first character will result in a completely successful parse, or all of the referenced sub-parsers are also `LL(1)`.
|
||||
|
||||
|
||||
|
||||
Function Types
|
||||
--------------
|
||||
|
||||
The combinator functions take a number of special function types as function pointers. Here is a short explanation of those types are how they are expected to behave. It is important that these behave correctly otherwise it is exceedingly easy to introduce memory leaks or crashes into the system.
|
||||
The combinator functions take a number of special function types as function pointers. Here is a short explanation of those types are how they are expected to behave. It is important that these behave correctly otherwise it is easy to introduce memory leaks or crashes into the system.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -381,7 +393,7 @@ typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*);
|
||||
typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*);
|
||||
```
|
||||
|
||||
This takes in some pointer to data and outputs some new or modified pointer to data, ensuring to free and old data no longer used. The `apply_to` variation takes in an extra pointer to some data such as state of the system.
|
||||
This takes in some pointer to data and outputs some new or modified pointer to data, ensuring to free the input data if it is no longer used. The `apply_to` variation takes in an extra pointer to some data such as state of the system.
|
||||
|
||||
* * *
|
||||
|
||||
@@ -389,19 +401,24 @@ This takes in some pointer to data and outputs some new or modified pointer to d
|
||||
typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**);
|
||||
```
|
||||
|
||||
This takes a list of pointers to data values and must return some combined or folded version of these data values. It must ensure to free and old data that is no longer used once after combination has taken place. This will ensure no memory is leaked.
|
||||
This takes a list of pointers to data values and must return some combined or folded version of these data values. It must ensure to free and input data that is no longer used once after combination has taken place.
|
||||
|
||||
|
||||
First Example
|
||||
-------------
|
||||
Case Study - C Identifier
|
||||
=========================
|
||||
|
||||
Using the above we can create a parser that matches a C identifier with relative ease.
|
||||
Combinator Method
|
||||
-----------------
|
||||
|
||||
First we build a fold function that will concatenate two strings together - freeing any data we no longer needed. For this sake of this tutorial we will write it by hand, but this (as well as many other useful fold functions) is actually included in _mpc_ as `mpcf_strfold`.
|
||||
Using the above combinators we can create a parser that matches a C identifier.
|
||||
|
||||
If we match two parts of a string separately with a function such as `mpc_and` we'll need to combine them together into a long string to represent the final identifier.
|
||||
|
||||
For this we build a fold function that will concatenate zero or more strings together. For this sake of this tutorial we will write it by hand, but this (as well as many other useful fold functions), are actually included in _mpc_ under the `mpcf_*` namespace, such as `mpcf_strfold`.
|
||||
|
||||
```c
|
||||
mpc_val_t* strfold(mpc_val_t* x, mpc_val_t* y) {
|
||||
char* x = calloc(1, 1);
|
||||
mpc_val_t *strfold(int n, mpc_val_t **xs) {
|
||||
char *x = calloc(1, 1);
|
||||
int i;
|
||||
for (i = 0; i < n; i++) {
|
||||
x = realloc(x, strlen(x) + strlen(xs[i]) + 1);
|
||||
@@ -412,38 +429,58 @@ mpc_val_t* strfold(mpc_val_t* x, mpc_val_t* y) {
|
||||
}
|
||||
```
|
||||
|
||||
Then we can actually specify the grammar using combinators to say how the basic parsers are combined.
|
||||
We can use this to specify a C identifier, making use of some combinators to say how the basic parsers are combined.
|
||||
|
||||
```c
|
||||
char* parse_ident(char* input) {
|
||||
|
||||
mpc_parser_t *alpha = mpc_or(2, mpc_range('a', 'z'), mpc_range('A', 'Z'));
|
||||
mpc_parser_t *digit = mpc_range('0', '9');
|
||||
mpc_parser_t *underscore = mpc_char('_');
|
||||
|
||||
mpc_parser_t *ident0 = mpc_or(2, alpha, underscore);
|
||||
mpc_parser_t *ident1 = mpc_many(strfold, mpc_or(3, alpha, digit, underscore));
|
||||
mpc_parser_t *ident = mpc_and(2, strfold, ident0, ident1, free);
|
||||
|
||||
mpc_result_t r;
|
||||
if (!mpc_parse("<parse_ident>", input, ident, &r)) {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
mpc_delete(ident);
|
||||
|
||||
return r.output;
|
||||
}
|
||||
mpc_parser_t *alpha = mpc_or(2, mpc_range('a', 'z'), mpc_range('A', 'Z'));
|
||||
mpc_parser_t *digit = mpc_range('0', '9');
|
||||
mpc_parser_t *underscore = mpc_char('_');
|
||||
|
||||
mpc_parser_t *ident = mpc_and(2, strfold,
|
||||
mpc_or(2, alpha, underscore),
|
||||
mpc_many(strfold, mpc_or(3, alpha, digit, underscore)),
|
||||
free);
|
||||
|
||||
/* Do Some Parsing... */
|
||||
|
||||
mpc_delete(ident);
|
||||
```
|
||||
|
||||
Note that only `ident` must be deleted. When we input a parser into a combinator we should consider it to be part of that combinator now. This means we shouldn't create a parser and input it into multiple places of it will be doubly feed.
|
||||
Notice that previous parsers are used as input to new parsers we construct from the combinators. Note that only the final parser `ident` must be deleted. When we input a parser into a combinator we should consider it to be part of the output of that combinator.
|
||||
|
||||
Because of this we shouldn't create a parser and input it into multiple places, or it will be doubly feed.
|
||||
|
||||
|
||||
Self Reference
|
||||
Regex Method
|
||||
------------
|
||||
|
||||
There is an easy way to do this than the above method. _mpc_ comes with a handy regex function for constructing parsers using regex syntax. We can specify an identifier using a regex pattern as shown below.
|
||||
|
||||
```c
|
||||
mpc_parser_t *ident = mpc_re("[a-zA-Z_][a-zA-Z_0-9]*");
|
||||
|
||||
/* Do Some Parsing... */
|
||||
|
||||
mpc_delete(ident);
|
||||
```
|
||||
|
||||
|
||||
Library Method
|
||||
--------------
|
||||
|
||||
Although if we really wanted to create a parser for C identifiers, a function for creating this parser comes included in _mpc_ along with many other common parsers.
|
||||
|
||||
```c
|
||||
mpc_parser_t *ident = mpc_ident();
|
||||
|
||||
/* Do Some Parsing... */
|
||||
|
||||
mpc_delete(ident);
|
||||
```
|
||||
|
||||
Parser References
|
||||
=================
|
||||
|
||||
Building parsers in the above way can have issues with self-reference or cyclic-reference. To overcome this we can separate the construction of parsers into two different steps. Construction and Definition.
|
||||
|
||||
* * *
|
||||
@@ -452,7 +489,7 @@ Building parsers in the above way can have issues with self-reference or cyclic-
|
||||
mpc_parser_t *mpc_new(const char* name);
|
||||
```
|
||||
|
||||
This will construct a parser called `name` which can then be used by others, including itself, without ownership being transfered. Any parser created using `mpc_new` is said to be _retained_. This means it will behave differently to a normal parser when referenced. When deleting a parser that includes a _retained_ parser, the _retained_ parser it will not be deleted along with it. To delete a retained parser `mpc_delete` must be used on it directly.
|
||||
This will construct a parser called `name` which can then be used as input to others, including itself, without fear of being deleted. Any parser created using `mpc_new` is said to be _retained_. This means it will behave differently to a normal parser when referenced. When deleting a parser that includes a _retained_ parser, the _retained_ parser will not be deleted along with it. To delete a retained parser `mpc_delete` must be used on it directly.
|
||||
|
||||
A _retained_ parser can then be defined using...
|
||||
|
||||
@@ -480,18 +517,20 @@ void mpc_cleanup(int n, ...);
|
||||
|
||||
To ease the task of undefining and then deleting parsers `mpc_cleanup` can be used. It takes `n` parsers as input, and undefines them all, before deleting them all.
|
||||
|
||||
Note: _mpc_ may have separate stages for construction and definition, but it does not detect [left-recursive grammars](http://en.wikipedia.org/wiki/Left_recursion). These will go into an infinite loop when they attempt to parse input, and so should specified instead in right-recursive form instead.
|
||||
Note: _mpc_ may have separate stages for construction and definition, but it still does not detect [left-recursive grammars](http://en.wikipedia.org/wiki/Left_recursion). These will go into an infinite loop when they attempt to parse input, and so should specified instead in right-recursive form instead.
|
||||
|
||||
|
||||
Library Reference
|
||||
=================
|
||||
|
||||
Common Parsers
|
||||
---------------
|
||||
|
||||
A number of common parsers are included.
|
||||
--------------
|
||||
|
||||
* `mpc_soi(void);` Matches only the start of input, returns `NULL`
|
||||
* `mpc_eoi(void);` Matches only the end of input, returns `NULL`
|
||||
* `mpc_space(void);` Matches any whitespace character (" \f\n\r\t\v")
|
||||
* `mpc_spaces(void);` Matches zero or more whitespace characters
|
||||
* `mpc_whitespace(void);` Matches spaces and frees the result, returns `NULL`
|
||||
* `mpc_whitespace(void);` Matches any whitespace character `" \f\n\r\t\v"`
|
||||
* `mpc_whitespaces(void);` Matches zero or more whitespace characters
|
||||
* `mpc_blank(void);` Matches whitespaces and frees the result, returns `NULL`
|
||||
* `mpc_newline(void);` Matches `'\n'`
|
||||
* `mpc_tab(void);` Matches `'\t'`
|
||||
* `mpc_escape(void);` Matches a backslash followed by any character
|
||||
@@ -521,9 +560,11 @@ A number of common parsers are included.
|
||||
Useful Parsers
|
||||
--------------
|
||||
|
||||
* `mpc_start(mpc_parser_t *a);` Matches the start of input followed by `a`
|
||||
* `mpc_end(mpc_parser_t *a, mpc_dtor_t da);` Matches `a` followed by the end of input
|
||||
* `mpc_enclose(mpc_parser_t *a, mpc_dtor_t da);` Matches the start of input, `a`, and the end of input
|
||||
* `mpc_startswith(mpc_parser_t *a);` Matches the start of input followed by `a`
|
||||
* `mpc_endswith(mpc_parser_t *a, mpc_dtor_t da);` Matches `a` followed by the end of input
|
||||
* `mpc_whole(mpc_parser_t *a, mpc_dtor_t da);` Matches the start of input, `a`, and the end of input
|
||||
* `mpc_stripl(mpc_parser_t *a);` Matches `a` striping any whitespace to the left
|
||||
* `mpc_stripr(mpc_parser_t *a);` Matches `a` striping any whitespace to the right
|
||||
* `mpc_strip(mpc_parser_t *a);` Matches `a` striping any surrounding whitespace
|
||||
* `mpc_tok(mpc_parser_t *a);` Matches `a` and strips any trailing whitespace
|
||||
* `mpc_sym(const char* s);` Matches string `s` and strips any trailing whitespace
|
||||
@@ -540,10 +581,8 @@ Useful Parsers
|
||||
* `mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad);` Matches `a` between trailing whitespace stripped `"["` and `"]"`
|
||||
|
||||
|
||||
Fold Functions
|
||||
--------------
|
||||
|
||||
A number of common fold functions a user might want are included. They reside under the `mpcf_*` namespace.
|
||||
Apply Functions
|
||||
---------------
|
||||
|
||||
* `void mpcf_dtor_null(mpc_val_t* x);` Empty destructor. Does nothing
|
||||
* `mpc_val_t* mpcf_ctor_null(void);` Returns `NULL`
|
||||
@@ -554,29 +593,42 @@ A number of common fold functions a user might want are included. They reside un
|
||||
* `mpc_val_t* mpcf_oct(mpc_val_t* x);` Converts a oct string `x` to an `int*`
|
||||
* `mpc_val_t* mpcf_float(mpc_val_t* x);` Converts a string `x` to a `float*`
|
||||
* `mpc_val_t* mpcf_escape(mpc_val_t* x);` Converts a string `x` to an escaped version
|
||||
* `mpc_val_t* mpcf_escape_regex(mpc_val_t* x);` Converts a regex `x` to an escaped version
|
||||
* `mpc_val_t* mpcf_escape_string_raw(mpc_val_t* x);` Converts a raw string `x` to an escaped version
|
||||
* `mpc_val_t* mpcf_escape_char_raw(mpc_val_t* x);` Converts a raw character `x` to an escaped version
|
||||
* `mpc_val_t* mpcf_unescape(mpc_val_t* x);` Converts a string `x` to an unescaped version
|
||||
* `mpc_val_t* mpcf_unescape(mpc_val_t* x);` Converts a string `x` to an unescaped version unescaping `\\/`
|
||||
* `mpc_val_t* mpcf_unescape_regex(mpc_val_t* x);` Converts a regex `x` to an unescaped version
|
||||
* `mpc_val_t* mpcf_unescape_string_raw(mpc_val_t* x);` Converts a raw string `x` to an unescaped version
|
||||
* `mpc_val_t* mpcf_unescape_char_raw(mpc_val_t* x);` Converts a raw character `x` to an unescaped version
|
||||
|
||||
Fold Functions
|
||||
--------------
|
||||
|
||||
* `mpc_val_t* mpcf_null(int n, mpc_val_t** xs);` Returns `NULL`
|
||||
* `mpc_val_t* mpcf_fst(int n, mpc_val_t** xs);` Returns first element of `xs`
|
||||
* `mpc_val_t* mpcf_snd(int n, mpc_val_t** xs);` Returns second element of `xs`
|
||||
* `mpc_val_t* mpcf_trd(int n, mpc_val_t** xs);` Returns third element of `xs`
|
||||
* `mpc_val_t* mpcf_fst_free(int n, mpc_val_t** xs);` Returns first element of `xs` and frees others
|
||||
* `mpc_val_t* mpcf_snd_free(int n, mpc_val_t** xs);` Returns second element of `xs` and frees others
|
||||
* `mpc_val_t* mpcf_trd_free(int n, mpc_val_t** xs);` Returns third element of `xs` and frees others
|
||||
* `mpc_val_t* mpcf_fst_free(int n, mpc_val_t** xs);` Returns first element of `xs` and calls `free` on others
|
||||
* `mpc_val_t* mpcf_snd_free(int n, mpc_val_t** xs);` Returns second element of `xs` and calls `free` on others
|
||||
* `mpc_val_t* mpcf_trd_free(int n, mpc_val_t** xs);` Returns third element of `xs` and calls `free` on others
|
||||
* `mpc_val_t* mpcf_strfold(int n, mpc_val_t** xs);` Concatenates all `xs` together as strings and returns result
|
||||
* `mpc_val_t* mpcf_maths(int n, mpc_val_t** xs);` Examines second argument as string to see which operator it is, then operators on first and third argument as if they are `int*`.
|
||||
|
||||
|
||||
Second Example
|
||||
--------------
|
||||
Case Study - Maths Language
|
||||
===========================
|
||||
|
||||
Combinator Approach
|
||||
-------------------
|
||||
|
||||
Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own syntax tree types, as well as allows them perform specific house-keeping or data processing in the parsing phase. For example we can specify a simple maths grammar that computes the result of the expression as it goes along.
|
||||
|
||||
We start with a fold function that will fold two `int*` into a new `int*` based on some `char*` operator.
|
||||
|
||||
```c
|
||||
mpc_val_t* mpcf_maths(int n, mpc_val_t** xs) {
|
||||
mpc_val_t* fold_maths(int n, mpc_val_t **xs) {
|
||||
|
||||
int** vs = (int**)xs;
|
||||
int **vs = (int**)xs;
|
||||
|
||||
if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; }
|
||||
if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; }
|
||||
@@ -601,12 +653,12 @@ int parse_maths(char* input) {
|
||||
mpc_parser_t *Maths = mpc_new("maths");
|
||||
|
||||
mpc_define(Expr, mpc_or(2,
|
||||
mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free),
|
||||
mpc_and(3, fold_maths, Factor, mpc_oneof("*/"), Factor, free, free),
|
||||
Factor
|
||||
));
|
||||
|
||||
mpc_define(Factor, mpc_or(2,
|
||||
mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free),
|
||||
mpc_and(3, fold_maths, Term, mpc_oneof("+-"), Term, free, free),
|
||||
Term
|
||||
));
|
||||
|
||||
@@ -620,6 +672,7 @@ int parse_maths(char* input) {
|
||||
}
|
||||
|
||||
int result = *r.output;
|
||||
printf("Result: %i\n", result);
|
||||
free(r.output);
|
||||
|
||||
return result;
|
||||
@@ -629,69 +682,55 @@ int parse_maths(char* input) {
|
||||
If we supply this function with something like `(4*2)+5`, we can expect it to output `13`.
|
||||
|
||||
|
||||
Regular Expressions
|
||||
-------------------
|
||||
Language Approach
|
||||
-----------------
|
||||
|
||||
Even with all that has been shown above, specifying parts of text can be a tedious task requiring many lines of code. So _mpc_ provides a simple regular expression matcher.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
mpc_parser_t *mpc_re(const char* re);
|
||||
```
|
||||
|
||||
This returns a parser that will attempt to match the given regular expression pattern, and return the matched string on success. It does not have support for groups and match objects, but should be sufficient for simple tasks.
|
||||
|
||||
A cute thing about this is that it uses previous parts of the library to parse the user input string - and because _mpc_ is type generic, the parser spits out a new `mpc_parser_t` that matches the regular expression directly! It even uses many of the combinator functions indirectly as fold functions! This is a great case study in learning how to use _mpc_, so those curious are encouraged to find it in the source code.
|
||||
|
||||
|
||||
Abstract Syntax Tree
|
||||
--------------------
|
||||
|
||||
One can avoid passing in and around all those clumbsy function pointer if they don't care what type is output by _mpc_. For this a generic Abstract Syntax Tree type `mpc_ast_t` is included in _mpc_. The combinator functions which act on this don't need information on how to destruct or fold instances of the result as they know it will be a `mpc_ast_t`. So there are a number of combinator functions which work specifically (and only) on parsers that return this type. They reside under `mpca_*`.
|
||||
It is possible to avoid passing in and around all those function pointers, if you don't care what type is output by _mpc_. For this, a generic Abstract Syntax Tree type `mpc_ast_t` is included in _mpc_. The combinator functions which act on this don't need information on how to destruct or fold instances of the result as they know it will be a `mpc_ast_t`. So there are a number of combinator functions which work specifically (and only) on parsers that return this type. They reside under `mpca_*`.
|
||||
|
||||
Doing things via this method means that all the data processing must take place after the parsing. In many instances this is no problem or even preferable.
|
||||
|
||||
It also allows for one more trick. As all the fold and destructor functions are implicit, the user can simply specify the grammar of the language in some nice way and the system can try to build a parser for the AST type from this alone. For this there are two functions supplied which take in a string and output a parser. The format for these grammars is simple and familar to those who have used parser generators before. It looks something like this.
|
||||
It also allows for one more trick. As all the fold and destructor functions are implicit, the user can simply specify the grammar of the language in some nice way and the system can try to build a parser for the AST type from this alone. For this there are two functions supplied which take in a string and output a parser. The format for these grammars is simple and familiar to those who have used parser generators before. It looks something like this.
|
||||
|
||||
```
|
||||
expression : <product> (('+' | '-') <product>)*;
|
||||
number "number" : /[0-9]+/ ;
|
||||
|
||||
product : <value> (('*' | '/') <value>)*;
|
||||
expression : <product> (('+' | '-') <product>)* ;
|
||||
|
||||
value : /[0-9]+/ | '(' <expression> ')';
|
||||
product : <value> (('*' | '/') <value>)* ;
|
||||
|
||||
maths : /^/ <expression> /$/;
|
||||
value : <number> | '(' <expression> ')' ;
|
||||
|
||||
maths : /^/ <expression> /$/ ;
|
||||
```
|
||||
|
||||
String literals are surrounded in double quotes `"`. Character literals in single quotes `'` and regex literals in slashes `/`. References to other parsers are surrounded in braces `<>` and referred to by name.
|
||||
|
||||
Parts specified one after another are parsed in order (like `mpc_and`), while parts separated by a pipe `|` are alternatives (like `mpc_or`). Parenthesis `()` are used to specify precidence. `*` can be used to mean zero or more of. `+` for one or more of. `?` for zero or one of. `!` for negation. And a number inside braces `{5}` to mean N counts of.
|
||||
Parts specified one after another are parsed in order (like `mpc_and`), while parts separated by a pipe `|` are alternatives (like `mpc_or`). Parenthesis `()` are used to specify precedence. `*` can be used to mean zero or more of. `+` for one or more of. `?` for zero or one of. `!` for negation. And a number inside braces `{5}` to means so many counts of.
|
||||
|
||||
Rules are specified by rule name followed by a colon `:`, followed by the definition, and ending in a semicolon `;`.
|
||||
Rules are specified by rule name, optionally followed by an _expected_ string, followed by a colon `:`, followed by the definition, and ending in a semicolon `;`.
|
||||
|
||||
Like with the regular expressions, this user input is parsed by existing parts of the _mpc_ library. It provides one of the more powerful features of the library.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
mpc_parser_t *mpca_grammar(const char* grammar, ...);
|
||||
mpc_parser_t *mpca_grammar(int flags, const char* grammar, ...);
|
||||
```
|
||||
|
||||
This takes in some single right hand side of a rule, as well as a list of any of the parsers it refers to, and outputs a parser that does exactly what is specified by the rule.
|
||||
This takes in some single right hand side of a rule, as well as a list of any of the parsers it refers to, and outputs a parser that does exactly what is specified by the rule. The flags variable is a set of flags `MPC_LANG_DEFAULT`, `MPC_LANG_PREDICTIVE`, or `MPC_LANG_WHITESPACE_SENSITIVE`. For specifying if the language is predictive or whitespace sensitive.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
mpc_err_t* mpca_lang(const char* lang, ...);
|
||||
mpc_err_t* mpca_lang(int flags, const char* lang, ...);
|
||||
```
|
||||
|
||||
This takes in a full language (one or more rules) as well as any parsers referred to by either the right or left hand sides. Any parsers specified on the left hand side of any rule will be assigned a parser equivalent to what is specified on the right. On valid user input this returns `NULL`, while if there are any errors in the user input it will return an instance of `mpc_err_t` describing the issues.
|
||||
This takes in a full language (zero or more rules) as well as any parsers referred to by either the right or left hand sides. Any parsers specified on the left hand side of any rule will be assigned a parser equivalent to what is specified on the right. On valid user input this returns `NULL`, while if there are any errors in the user input it will return an instance of `mpc_err_t` describing the issues.
|
||||
|
||||
* * *
|
||||
|
||||
```c
|
||||
mpc_err_t* mpca_lang_file(FILE* f, ...);
|
||||
mpc_err_t* mpca_lang_file(int flags, FILE* f, ...);
|
||||
```
|
||||
|
||||
This reads in the contents of file `f` and inputs it into `mpca_lang`.
|
||||
@@ -699,14 +738,14 @@ This reads in the contents of file `f` and inputs it into `mpca_lang`.
|
||||
* * *
|
||||
|
||||
```c
|
||||
mpc_err_t* mpca_lang_contents(const char* filename, ...);
|
||||
mpc_err_t* mpca_lang_contents(int flags, const char* filename, ...);
|
||||
```
|
||||
|
||||
This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`.
|
||||
|
||||
|
||||
Error Reporting
|
||||
---------------
|
||||
===============
|
||||
|
||||
_mpc_ provides some automatic generation of error messages. These can be enhanced by the user, with use of `mpc_expect`, but many of the defaults should provide both useful and readable. An example of an error message might look something like this:
|
||||
|
||||
@@ -714,8 +753,44 @@ _mpc_ provides some automatic generation of error messages. These can be enhance
|
||||
<test>:0:3: error: expected one or more of 'a' or 'd' at 'k'
|
||||
```
|
||||
|
||||
|
||||
|
||||
Limitations & FAQ
|
||||
=================
|
||||
|
||||
### ASCII
|
||||
|
||||
Only supports ASCII. Sorry!
|
||||
|
||||
|
||||
### Backtracking and Left Recursion
|
||||
|
||||
MPC supports backtracking, but will not completely backtrack up a parse tree if it encounters some success on the path it is going. To demonstrate this behaviour examine the following erroneous grammar, intended to parse either a C style identifier, or a C style function call.
|
||||
|
||||
```
|
||||
factor : <ident>
|
||||
| <ident> '(' <expr>? (',' <expr>)* ')' ;
|
||||
```
|
||||
|
||||
This grammar will never correctly parse a function call because it will always first succeed parsing the initial identifier. At this point it will encounter the parenthesis of the function call, give up, and throw an error. It will not backtrack far enough, to attempt the next potential option, which would have succeeded.
|
||||
|
||||
The solution to this is to always structure grammars with the most specific clause first, and more general clauses afterwards. This is the natural technique used for avoiding left-recursive grammars, so is a good habit to get into anyway.
|
||||
|
||||
```
|
||||
factor : <ident> '(' <expr>? (',' <expr>)* ')'
|
||||
| <ident> ;
|
||||
```
|
||||
|
||||
An alternative, and better option is to remove the ambiguity by factoring out the first identifier completely. This is better because it removes any need for backtracking at all!
|
||||
|
||||
```
|
||||
factor : <ident> ('(' <expr>? (',' <expr>)* ')')? ;
|
||||
```
|
||||
|
||||
|
||||
### Max String Length
|
||||
|
||||
Some compilers limit the maximum length of string literals. If you have a huge language string in the source file to be passed into `mpca_lang` you might encounter this. The ANSI standard says that 509 is the maximum length allowed for a string literal. Most compilers support greater than this. Visual Studio supports up to 2048 characters, while gcc allocates memory dynamically and so has no real limit.
|
||||
|
||||
There are a couple of ways to overcome this issue if it arises. You could instead use `mpca_lang_contents` and load the language from file or you could use a string literal for each line and let the preprocessor automatically concatenate them together, avoiding the limit. The final option is to upgrade your compiler. In C99 this limit has been increased to 4095.
|
||||
|
||||
|
||||
|
||||
|
11
TODO.md
11
TODO.md
@@ -1,11 +0,0 @@
|
||||
- Escape/Crop String Literals
|
||||
- Escale/Crop Char Literals
|
||||
- Escape/Crop Regex Literals
|
||||
- Combinator that scans input then returns cursor.
|
||||
|
||||
- Test All Regex Features
|
||||
- Test Regex Range Feature
|
||||
- Add proper tests for everything in general
|
||||
|
||||
- Find some good demo grammars to show
|
||||
|
1
examples/book.doge
Normal file
1
examples/book.doge
Normal file
@@ -0,0 +1 @@
|
||||
wow c so language such book
|
47
examples/doge.c
Normal file
47
examples/doge.c
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "../mpc.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
mpc_parser_t* Adjective = mpc_new("adjective");
|
||||
mpc_parser_t* Noun = mpc_new("noun");
|
||||
mpc_parser_t* Phrase = mpc_new("phrase");
|
||||
mpc_parser_t* Doge = mpc_new("doge");
|
||||
|
||||
mpca_lang(MPC_LANG_DEFAULT,
|
||||
" \
|
||||
adjective : \"wow\" | \"many\" | \"so\" | \"such\"; \
|
||||
noun : \"lisp\" | \"language\" | \"c\" | \"book\" | \"build\"; \
|
||||
phrase : <adjective> <noun>; \
|
||||
doge : /^/ <phrase>* /$/; \
|
||||
",
|
||||
Adjective, Noun, Phrase, Doge);
|
||||
|
||||
if (argc > 1) {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_contents(argv[1], Doge, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_pipe("<stdin>", stdin, Doge, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mpc_cleanup(4, Adjective, Noun, Phrase, Doge);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
21
examples/fib.smallc
Normal file
21
examples/fib.smallc
Normal file
@@ -0,0 +1,21 @@
|
||||
#include "stdio.h"
|
||||
|
||||
int fib(int n) {
|
||||
if (n == 0) { return 0; }
|
||||
if (n == 1) { return 1; }
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
|
||||
main() {
|
||||
|
||||
int n;
|
||||
int i;
|
||||
|
||||
while (i < 10) {
|
||||
n = fib(10);
|
||||
print(n);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
56
examples/lispy.c
Normal file
56
examples/lispy.c
Normal file
@@ -0,0 +1,56 @@
|
||||
#include "../mpc.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
mpc_parser_t* Number = mpc_new("number");
|
||||
mpc_parser_t* Symbol = mpc_new("symbol");
|
||||
mpc_parser_t* String = mpc_new("string");
|
||||
mpc_parser_t* Comment = mpc_new("comment");
|
||||
mpc_parser_t* Sexpr = mpc_new("sexpr");
|
||||
mpc_parser_t* Qexpr = mpc_new("qexpr");
|
||||
mpc_parser_t* Expr = mpc_new("expr");
|
||||
mpc_parser_t* Lispy = mpc_new("lispy");
|
||||
|
||||
mpca_lang(MPC_LANG_PREDICTIVE,
|
||||
" \
|
||||
number \"number\" : /[0-9]+/ ; \
|
||||
symbol \"symbol\" : /[a-zA-Z0-9_+\\-*\\/\\\\=<>!&]+/ ; \
|
||||
string \"string\" : /\"(\\\\.|[^\"])*\"/ ; \
|
||||
comment : /;[^\\r\\n]*/ ; \
|
||||
sexpr : '(' <expr>* ')' ; \
|
||||
qexpr : '{' <expr>* '}' ; \
|
||||
expr : <number> | <symbol> | <string> \
|
||||
| <comment> | <sexpr> | <qexpr> ; \
|
||||
lispy : /^/ <expr>* /$/ ; \
|
||||
",
|
||||
Number, Symbol, String, Comment, Sexpr, Qexpr, Expr, Lispy);
|
||||
|
||||
if (argc > 1) {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_contents(argv[1], Lispy, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_pipe("<stdin>", stdin, Lispy, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mpc_cleanup(8, Number, Symbol, String, Comment, Sexpr, Qexpr, Expr, Lispy);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
47
examples/maths.c
Normal file
47
examples/maths.c
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "../mpc.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
mpc_parser_t *Expr = mpc_new("expression");
|
||||
mpc_parser_t *Prod = mpc_new("product");
|
||||
mpc_parser_t *Value = mpc_new("value");
|
||||
mpc_parser_t *Maths = mpc_new("maths");
|
||||
|
||||
mpca_lang(MPC_LANG_PREDICTIVE,
|
||||
" \
|
||||
expression : <product> (('+' | '-') <product>)*; \
|
||||
product : <value> (('*' | '/') <value>)*; \
|
||||
value : /[0-9]+/ | '(' <expression> ')'; \
|
||||
maths : /^/ <expression> /$/; \
|
||||
",
|
||||
Expr, Prod, Value, Maths);
|
||||
|
||||
if (argc > 1) {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_contents(argv[1], Maths, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_pipe("<stdin>", stdin, Maths, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mpc_cleanup(4, Expr, Prod, Value, Maths);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
11
examples/minimal.smallc
Normal file
11
examples/minimal.smallc
Normal file
@@ -0,0 +1,11 @@
|
||||
#include "stdio.h"
|
||||
|
||||
main() {
|
||||
|
||||
int i;
|
||||
int j;
|
||||
|
||||
j = 10;
|
||||
|
||||
return 0;
|
||||
}
|
239
examples/prelude.lspy
Normal file
239
examples/prelude.lspy
Normal file
@@ -0,0 +1,239 @@
|
||||
;;;
|
||||
;;; Lispy Standard Prelude
|
||||
;;;
|
||||
|
||||
;;; Atoms
|
||||
(def {nil} {})
|
||||
(def {true} 1)
|
||||
(def {false} 0)
|
||||
|
||||
;;; Functional Functions
|
||||
|
||||
; Function Definitions
|
||||
(def {fun} (\ {f b} {
|
||||
def (head f) (\ (tail f) b)
|
||||
}))
|
||||
|
||||
; Open new scope
|
||||
(fun {let b} {
|
||||
((\ {_} b) ())
|
||||
})
|
||||
|
||||
; Unpack List to Function
|
||||
(fun {unpack f l} {
|
||||
eval (join (list f) l)
|
||||
})
|
||||
|
||||
; Unapply List to Function
|
||||
(fun {pack f & xs} {f xs})
|
||||
|
||||
; Curried and Uncurried calling
|
||||
(def {curry} {unpack})
|
||||
(def {uncurry} {pack})
|
||||
|
||||
; Perform Several things in Sequence
|
||||
(fun {do & l} {
|
||||
if (== l {})
|
||||
{{}}
|
||||
{last l}
|
||||
})
|
||||
|
||||
;;; Logical Functions
|
||||
|
||||
; Logical Functions
|
||||
(fun {not x} {- 1 x})
|
||||
(fun {or x y} {+ x y})
|
||||
(fun {and x y} {* x y})
|
||||
|
||||
|
||||
;;; Numeric Functions
|
||||
|
||||
; Minimum of Arguments
|
||||
(fun {min & xs} {
|
||||
if (== (tail xs) {}) {fst xs}
|
||||
{do
|
||||
(= {rest} (unpack min (tail xs)))
|
||||
(= {item} (fst xs))
|
||||
(if (< item rest) {item} {rest})
|
||||
}
|
||||
})
|
||||
|
||||
; Minimum of Arguments
|
||||
(fun {max & xs} {
|
||||
if (== (tail xs) {}) {fst xs}
|
||||
{do
|
||||
(= {rest} (unpack max (tail xs)))
|
||||
(= {item} (fst xs))
|
||||
(if (> item rest) {item} {rest})
|
||||
}
|
||||
})
|
||||
|
||||
;;; Conditional Functions
|
||||
|
||||
(fun {select & cs} {
|
||||
if (== cs {})
|
||||
{error "No Selection Found"}
|
||||
{if (fst (fst cs)) {snd (fst cs)} {unpack select (tail cs)}}
|
||||
})
|
||||
|
||||
(fun {case x & cs} {
|
||||
if (== cs {})
|
||||
{error "No Case Found"}
|
||||
{if (== x (fst (fst cs))) {snd (fst cs)} {unpack case (join (list x) (tail cs))}}
|
||||
})
|
||||
|
||||
(def {otherwise} true)
|
||||
|
||||
|
||||
;;; Misc Functions
|
||||
|
||||
(fun {flip f a b} {f b a})
|
||||
(fun {ghost & xs} {eval xs})
|
||||
(fun {comp f g x} {f (g x)})
|
||||
|
||||
;;; List Functions
|
||||
|
||||
; First, Second, or Third Item in List
|
||||
(fun {fst l} { eval (head l) })
|
||||
(fun {snd l} { eval (head (tail l)) })
|
||||
(fun {trd l} { eval (head (tail (tail l))) })
|
||||
|
||||
; List Length
|
||||
(fun {len l} {
|
||||
if (== l {})
|
||||
{0}
|
||||
{+ 1 (len (tail l))}
|
||||
})
|
||||
|
||||
; Nth item in List
|
||||
(fun {nth n l} {
|
||||
if (== n 0)
|
||||
{fst l}
|
||||
{nth (- n 1) (tail l)}
|
||||
})
|
||||
|
||||
; Last item in List
|
||||
(fun {last l} {nth (- (len l) 1) l})
|
||||
|
||||
; Apply Function to List
|
||||
(fun {map f l} {
|
||||
if (== l {})
|
||||
{{}}
|
||||
{join (list (f (fst l))) (map f (tail l))}
|
||||
})
|
||||
|
||||
; Apply Filter to List
|
||||
(fun {filter f l} {
|
||||
if (== l {})
|
||||
{{}}
|
||||
{join (if (f (fst l)) {head l} {{}}) (filter f (tail l))}
|
||||
})
|
||||
|
||||
; Return all of list but last element
|
||||
(fun {init l} {
|
||||
if (== (tail l) {})
|
||||
{{}}
|
||||
{join (head l) (init (tail l))}
|
||||
})
|
||||
|
||||
; Reverse List
|
||||
(fun {reverse l} {
|
||||
if (== l {})
|
||||
{{}}
|
||||
{join (reverse (tail l)) (head l)}
|
||||
})
|
||||
|
||||
; Fold Left
|
||||
(fun {foldl f z l} {
|
||||
if (== l {})
|
||||
{z}
|
||||
{foldl f (f z (fst l)) (tail l)}
|
||||
})
|
||||
|
||||
; Fold Right
|
||||
(fun {foldr f z l} {
|
||||
if (== l {})
|
||||
{z}
|
||||
{f (fst l) (foldr f z (tail l))}
|
||||
})
|
||||
|
||||
(fun {sum l} {foldl + 0 l})
|
||||
(fun {product l} {foldl * 1 l})
|
||||
|
||||
; Take N items
|
||||
(fun {take n l} {
|
||||
if (== n 0)
|
||||
{{}}
|
||||
{join (head l) (take (- n 1) (tail l))}
|
||||
})
|
||||
|
||||
; Drop N items
|
||||
(fun {drop n l} {
|
||||
if (== n 0)
|
||||
{l}
|
||||
{drop (- n 1) (tail l)}
|
||||
})
|
||||
|
||||
; Split at N
|
||||
(fun {split n l} {list (take n l) (drop n l)})
|
||||
|
||||
; Take While
|
||||
(fun {take-while f l} {
|
||||
if (not (unpack f (head l)))
|
||||
{{}}
|
||||
{join (head l) (take-while f (tail l))}
|
||||
})
|
||||
|
||||
; Drop While
|
||||
(fun {drop-while f l} {
|
||||
if (not (unpack f (head l)))
|
||||
{l}
|
||||
{drop-while f (tail l)}
|
||||
})
|
||||
|
||||
; Element of List
|
||||
(fun {elem x l} {
|
||||
if (== l {})
|
||||
{false}
|
||||
{if (== x (fst l)) {true} {elem x (tail l)}}
|
||||
})
|
||||
|
||||
; Find element in list of pairs
|
||||
(fun {lookup x l} {
|
||||
if (== l {})
|
||||
{error "No Element Found"}
|
||||
{do
|
||||
(= {key} (fst (fst l)))
|
||||
(= {val} (snd (fst l)))
|
||||
(if (== key x) {val} {lookup x (tail l)})
|
||||
}
|
||||
})
|
||||
|
||||
; Zip two lists together into a list of pairs
|
||||
(fun {zip x y} {
|
||||
if (or (== x {}) (== y {}))
|
||||
{{}}
|
||||
{join (list (join (head x) (head y))) (zip (tail x) (tail y))}
|
||||
})
|
||||
|
||||
; Unzip a list of pairs into two lists
|
||||
(fun {unzip l} {
|
||||
if (== l {})
|
||||
{{{} {}}}
|
||||
{do
|
||||
(= {x} (fst l))
|
||||
(= {xs} (unzip (tail l)))
|
||||
(list (join (head x) (fst xs)) (join (tail x) (snd xs)))
|
||||
}
|
||||
})
|
||||
|
||||
;;; Other Fun
|
||||
|
||||
; Fibonacci
|
||||
(fun {fib n} {
|
||||
select
|
||||
{ (== n 0) 0 }
|
||||
{ (== n 1) 1 }
|
||||
{ otherwise (+ (fib (- n 1)) (fib (- n 2))) }
|
||||
})
|
||||
|
1
examples/simple.maths
Normal file
1
examples/simple.maths
Normal file
@@ -0,0 +1 @@
|
||||
29 + 2 * 3 - 99 - (5 + 5 + 2) / 100
|
102
examples/smallc.c
Normal file
102
examples/smallc.c
Normal file
@@ -0,0 +1,102 @@
|
||||
#include "../mpc.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
mpc_parser_t* Ident = mpc_new("ident");
|
||||
mpc_parser_t* Number = mpc_new("number");
|
||||
mpc_parser_t* Character = mpc_new("character");
|
||||
mpc_parser_t* String = mpc_new("string");
|
||||
mpc_parser_t* Factor = mpc_new("factor");
|
||||
mpc_parser_t* Term = mpc_new("term");
|
||||
mpc_parser_t* Lexp = mpc_new("lexp");
|
||||
mpc_parser_t* Stmt = mpc_new("stmt");
|
||||
mpc_parser_t* Exp = mpc_new("exp");
|
||||
mpc_parser_t* Typeident = mpc_new("typeident");
|
||||
mpc_parser_t* Decls = mpc_new("decls");
|
||||
mpc_parser_t* Args = mpc_new("args");
|
||||
mpc_parser_t* Body = mpc_new("body");
|
||||
mpc_parser_t* Procedure = mpc_new("procedure");
|
||||
mpc_parser_t* Main = mpc_new("main");
|
||||
mpc_parser_t* Includes = mpc_new("includes");
|
||||
mpc_parser_t* Smallc = mpc_new("smallc");
|
||||
|
||||
mpc_err_t* err = mpca_lang(MPC_LANG_DEFAULT,
|
||||
" \n\
|
||||
ident : /[a-zA-Z_][a-zA-Z0-9_]*/ ; \n\
|
||||
number : /[0-9]+/ ; \n\
|
||||
character : /'.'/ ; \n\
|
||||
string : /\"(\\\\.|[^\"])*\"/ ; \n\
|
||||
\n\
|
||||
factor : '(' <lexp> ')' \n\
|
||||
| <number> \n\
|
||||
| <character> \n\
|
||||
| <string> \n\
|
||||
| <ident> '(' <lexp>? (',' <lexp>)* ')' \n\
|
||||
| <ident> ; \n\
|
||||
\n\
|
||||
term : <factor> (('*' | '/' | '%') <factor>)* ; \n\
|
||||
lexp : <term> (('+' | '-') <term>)* ; \n\
|
||||
\n\
|
||||
stmt : '{' <stmt>* '}' \n\
|
||||
| \"while\" '(' <exp> ')' <stmt> \n\
|
||||
| \"if\" '(' <exp> ')' <stmt> \n\
|
||||
| <ident> '=' <lexp> ';' \n\
|
||||
| \"print\" '(' <lexp>? ')' ';' \n\
|
||||
| \"return\" <lexp>? ';' \n\
|
||||
| <ident> '(' <ident>? (',' <ident>)* ')' ';' ; \n\
|
||||
\n\
|
||||
exp : <lexp> '>' <lexp> \n\
|
||||
| <lexp> '<' <lexp> \n\
|
||||
| <lexp> \">=\" <lexp> \n\
|
||||
| <lexp> \"<=\" <lexp> \n\
|
||||
| <lexp> \"!=\" <lexp> \n\
|
||||
| <lexp> \"==\" <lexp> ; \n\
|
||||
\n\
|
||||
typeident : (\"int\" | \"char\") <ident> ; \n\
|
||||
decls : (<typeident> ';')* ; \n\
|
||||
args : <typeident>? (',' <typeident>)* ; \n\
|
||||
body : '{' <decls> <stmt>* '}' ; \n\
|
||||
procedure : (\"int\" | \"char\") <ident> '(' <args> ')' <body> ; \n\
|
||||
main : \"main\" '(' ')' <body> ; \n\
|
||||
includes : (\"#include\" <string>)* ; \n\
|
||||
smallc : /^/ <includes> <decls> <procedure>* <main> /$/ ; \n\
|
||||
",
|
||||
Ident, Number, Character, String, Factor, Term, Lexp, Stmt, Exp,
|
||||
Typeident, Decls, Args, Body, Procedure, Main, Includes, Smallc);
|
||||
|
||||
if (err != NULL) {
|
||||
mpc_err_print(err);
|
||||
mpc_err_delete(err);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (argc > 1) {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_contents(argv[1], Smallc, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
mpc_result_t r;
|
||||
if (mpc_parse_pipe("<stdin>", stdin, Smallc, &r)) {
|
||||
mpc_ast_print(r.output);
|
||||
mpc_ast_delete(r.output);
|
||||
} else {
|
||||
mpc_err_print(r.error);
|
||||
mpc_err_delete(r.error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mpc_cleanup(17, Ident, Number, Character, String, Factor, Term, Lexp, Stmt, Exp,
|
||||
Typeident, Decls, Args, Body, Procedure, Main, Includes, Smallc);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
215
mpc.c
215
mpc.c
@@ -152,13 +152,13 @@ char *mpc_err_string(mpc_err_t *x) {
|
||||
if (x->failure) {
|
||||
mpc_err_string_cat(buffer, &pos, &max,
|
||||
"error: %s\n",
|
||||
x->filename, x->state.row,
|
||||
x->state.col, x->failure);
|
||||
x->filename, x->state.row+1,
|
||||
x->state.col+1, x->failure);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
mpc_err_string_cat(buffer, &pos, &max,
|
||||
"%s:%i:%i: error: expected ", x->filename, x->state.row, x->state.col);
|
||||
"%s:%i:%i: error: expected ", x->filename, x->state.row+1, x->state.col+1);
|
||||
|
||||
if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); }
|
||||
if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); }
|
||||
@@ -344,19 +344,35 @@ static mpc_input_t *mpc_input_new_string(const char *filename, const char *strin
|
||||
return i;
|
||||
}
|
||||
|
||||
static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) {
|
||||
|
||||
mpc_input_t *i = malloc(sizeof(mpc_input_t));
|
||||
|
||||
i->filename = malloc(strlen(filename) + 1);
|
||||
strcpy(i->filename, filename);
|
||||
|
||||
i->type = MPC_INPUT_PIPE;
|
||||
i->state = mpc_state_new();
|
||||
|
||||
i->string = NULL;
|
||||
i->buffer = NULL;
|
||||
i->file = pipe;
|
||||
|
||||
i->backtrack = 1;
|
||||
i->marks_num = 0;
|
||||
i->marks = NULL;
|
||||
|
||||
return i;
|
||||
|
||||
}
|
||||
|
||||
static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) {
|
||||
|
||||
mpc_input_t *i = malloc(sizeof(mpc_input_t));
|
||||
|
||||
i->filename = malloc(strlen(filename) + 1);
|
||||
strcpy(i->filename, filename);
|
||||
|
||||
if (fseek(file, 0, SEEK_CUR) != 0) {
|
||||
i->type = MPC_INPUT_PIPE;
|
||||
} else {
|
||||
i->type = MPC_INPUT_FILE;
|
||||
}
|
||||
|
||||
i->type = MPC_INPUT_FILE;
|
||||
i->state = mpc_state_new();
|
||||
|
||||
i->string = NULL;
|
||||
@@ -381,12 +397,12 @@ static void mpc_input_delete(mpc_input_t *i) {
|
||||
free(i);
|
||||
}
|
||||
|
||||
static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack = 0; }
|
||||
static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack = 1; }
|
||||
static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; }
|
||||
static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; }
|
||||
|
||||
static void mpc_input_mark(mpc_input_t *i) {
|
||||
|
||||
if (!i->backtrack) { return; }
|
||||
if (i->backtrack < 1) { return; }
|
||||
|
||||
i->marks_num++;
|
||||
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
|
||||
@@ -400,7 +416,7 @@ static void mpc_input_mark(mpc_input_t *i) {
|
||||
|
||||
static void mpc_input_unmark(mpc_input_t *i) {
|
||||
|
||||
if (!i->backtrack) { return; }
|
||||
if (i->backtrack < 1) { return; }
|
||||
|
||||
i->marks_num--;
|
||||
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
|
||||
@@ -414,7 +430,7 @@ static void mpc_input_unmark(mpc_input_t *i) {
|
||||
|
||||
static void mpc_input_rewind(mpc_input_t *i) {
|
||||
|
||||
if (!i->backtrack) { return; }
|
||||
if (i->backtrack < 1) { return; }
|
||||
|
||||
i->state = i->marks[i->marks_num-1];
|
||||
|
||||
@@ -449,7 +465,7 @@ static char mpc_input_getc(mpc_input_t *i) {
|
||||
case MPC_INPUT_FILE: c = fgetc(i->file); break;
|
||||
case MPC_INPUT_PIPE:
|
||||
|
||||
if (!i->buffer) { c = getc(i->file); }
|
||||
if (!i->buffer) { c = getc(i->file); break; }
|
||||
|
||||
if (i->buffer && mpc_input_buffer_in_range(i)) {
|
||||
c = mpc_input_buffer_get(i);
|
||||
@@ -469,7 +485,16 @@ static int mpc_input_failure(mpc_input_t *i, char c) {
|
||||
switch (i->type) {
|
||||
case MPC_INPUT_STRING: break;
|
||||
case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); break;
|
||||
case MPC_INPUT_PIPE: ungetc(c, i->file); break;
|
||||
case MPC_INPUT_PIPE:
|
||||
|
||||
if (!i->buffer) { ungetc(c, i->file); break; }
|
||||
|
||||
if (i->buffer && mpc_input_buffer_in_range(i)) {
|
||||
break;
|
||||
} else {
|
||||
ungetc(c, i->file);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
i->state.next = c;
|
||||
@@ -1107,6 +1132,14 @@ int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result
|
||||
return x;
|
||||
}
|
||||
|
||||
int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) {
|
||||
int x;
|
||||
mpc_input_t *i = mpc_input_new_pipe(filename, pipe);
|
||||
x = mpc_parse_input(i, p, r);
|
||||
mpc_input_delete(i);
|
||||
return x;
|
||||
}
|
||||
|
||||
int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) {
|
||||
|
||||
FILE *f = fopen(filename, "rb");
|
||||
@@ -1579,9 +1612,9 @@ mpc_parser_t *mpc_soi(void) {
|
||||
return mpc_expect(p, "start of input");
|
||||
}
|
||||
|
||||
mpc_parser_t *mpc_space(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); }
|
||||
mpc_parser_t *mpc_spaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_space()), "spaces"); }
|
||||
mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_apply(mpc_spaces(), mpcf_free), "whitespace"); }
|
||||
mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); }
|
||||
mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); }
|
||||
mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); }
|
||||
|
||||
mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); }
|
||||
mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); }
|
||||
@@ -1652,15 +1685,17 @@ mpc_parser_t *mpc_ident(void) {
|
||||
** Useful Parsers
|
||||
*/
|
||||
|
||||
mpc_parser_t *mpc_start(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_end(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); }
|
||||
mpc_parser_t *mpc_enclose(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); }
|
||||
mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); }
|
||||
mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); }
|
||||
|
||||
mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_whitespace(), a, mpc_whitespace(), mpcf_dtor_null, mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_whitespace(), mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); }
|
||||
mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); }
|
||||
|
||||
mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_enclose(mpc_strip(a), da); }
|
||||
mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); }
|
||||
|
||||
mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) {
|
||||
return mpc_and(3, mpcf_snd_free,
|
||||
@@ -1774,8 +1809,8 @@ static mpc_parser_t *mpc_re_escape_char(char c) {
|
||||
case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free);
|
||||
case 'd': return mpc_digit();
|
||||
case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str);
|
||||
case 's': return mpc_space();
|
||||
case 'S': return mpc_not_lift(mpc_space(), free, mpcf_ctor_str);
|
||||
case 's': return mpc_whitespace();
|
||||
case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str);
|
||||
case 'w': return mpc_alphanum();
|
||||
case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str);
|
||||
default: return NULL;
|
||||
@@ -1932,7 +1967,7 @@ mpc_parser_t *mpc_re(const char *re) {
|
||||
mpcf_re_range
|
||||
));
|
||||
|
||||
RegexEnclose = mpc_enclose(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete);
|
||||
RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete);
|
||||
|
||||
if(!mpc_parse("<mpc_re_compiler>", re, RegexEnclose, &r)) {
|
||||
err_msg = mpc_err_string(r.error);
|
||||
@@ -2122,7 +2157,7 @@ mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) {
|
||||
return y;
|
||||
}
|
||||
|
||||
|
||||
mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { return NULL; }
|
||||
mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { return xs[0]; }
|
||||
mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { return xs[1]; }
|
||||
mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { return xs[2]; }
|
||||
@@ -2442,6 +2477,7 @@ mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) {
|
||||
}
|
||||
|
||||
mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) {
|
||||
if (a == NULL) { return a; }
|
||||
a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1);
|
||||
memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1);
|
||||
memmove(a->tag, t, strlen(t));
|
||||
@@ -2631,6 +2667,13 @@ mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_
|
||||
** | "(" <grammar> ")"
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
va_list *va;
|
||||
int parsers_num;
|
||||
mpc_parser_t **parsers;
|
||||
int flags;
|
||||
} mpca_grammar_st_t;
|
||||
|
||||
static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) {
|
||||
if (xs[1] == NULL) { return xs[0]; }
|
||||
else { return mpca_or(2, xs[0], xs[1]); }
|
||||
@@ -2658,32 +2701,35 @@ static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) {
|
||||
return mpca_count(num, xs[0]);
|
||||
}
|
||||
|
||||
static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x) {
|
||||
static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) {
|
||||
mpca_grammar_st_t *st = s;
|
||||
char *y = mpcf_unescape(x);
|
||||
mpc_parser_t *p = mpc_tok(mpc_string(y));
|
||||
mpc_parser_t *p = (st->flags & MPC_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y));
|
||||
free(y);
|
||||
return mpca_tag(mpc_apply(p, mpcf_str_ast), "string");
|
||||
}
|
||||
|
||||
static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x) {
|
||||
static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) {
|
||||
mpca_grammar_st_t *st = s;
|
||||
char *y = mpcf_unescape(x);
|
||||
mpc_parser_t *p = mpc_tok(mpc_char(y[0]));
|
||||
mpc_parser_t *p = (st->flags & MPC_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0]));
|
||||
free(y);
|
||||
return mpca_tag(mpc_apply(p, mpcf_str_ast), "char");
|
||||
}
|
||||
|
||||
static mpc_val_t *mpcaf_grammar_regex(mpc_val_t *x) {
|
||||
static mpc_val_t *mpcaf_grammar_regex(mpc_val_t *x, void *s) {
|
||||
mpca_grammar_st_t *st = s;
|
||||
char *y = mpcf_unescape_regex(x);
|
||||
mpc_parser_t *p = mpc_tok(mpc_re(y));
|
||||
mpc_parser_t *p = (st->flags & MPC_LANG_WHITESPACE_SENSITIVE) ? mpc_re(y) : mpc_tok(mpc_re(y));
|
||||
free(y);
|
||||
return mpca_tag(mpc_apply(p, mpcf_str_ast), "regex");
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
va_list *va;
|
||||
int parsers_num;
|
||||
mpc_parser_t **parsers;
|
||||
} mpca_grammar_st_t;
|
||||
static int is_number(const char* s) {
|
||||
int i;
|
||||
for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } }
|
||||
return 1;
|
||||
}
|
||||
|
||||
static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) {
|
||||
|
||||
@@ -2691,7 +2737,7 @@ static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) {
|
||||
mpc_parser_t *p;
|
||||
|
||||
/* Case of Number */
|
||||
if (strstr("0123456789", x)) {
|
||||
if (is_number(x)) {
|
||||
|
||||
i = strtol(x, NULL, 10);
|
||||
|
||||
@@ -2736,17 +2782,17 @@ static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) {
|
||||
|
||||
}
|
||||
|
||||
static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *y) {
|
||||
static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) {
|
||||
|
||||
mpc_parser_t *p = mpca_grammar_find_parser(x, y);
|
||||
mpca_grammar_st_t *st = s;
|
||||
mpc_parser_t *p = mpca_grammar_find_parser(x, st);
|
||||
free(x);
|
||||
|
||||
|
||||
if (p->name) {
|
||||
return mpca_root(mpca_add_tag(p, p->name));
|
||||
} else {
|
||||
return mpca_root(p);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) {
|
||||
@@ -2787,9 +2833,9 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) {
|
||||
));
|
||||
|
||||
mpc_define(Base, mpc_or(5,
|
||||
mpc_apply(mpc_tok(mpc_string_lit()), mpcaf_grammar_string),
|
||||
mpc_apply(mpc_tok(mpc_char_lit()), mpcaf_grammar_char),
|
||||
mpc_apply(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex),
|
||||
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
||||
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
||||
mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st),
|
||||
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
||||
mpc_tok_parens(Grammar, mpc_soft_delete)
|
||||
));
|
||||
@@ -2804,11 +2850,11 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) {
|
||||
|
||||
mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base);
|
||||
|
||||
return r.output;
|
||||
return (st->flags & MPC_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output;
|
||||
|
||||
}
|
||||
|
||||
mpc_parser_t *mpca_grammar(const char *grammar, ...) {
|
||||
mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) {
|
||||
mpca_grammar_st_t st;
|
||||
mpc_parser_t *res;
|
||||
va_list va;
|
||||
@@ -2817,6 +2863,7 @@ mpc_parser_t *mpca_grammar(const char *grammar, ...) {
|
||||
st.va = &va;
|
||||
st.parsers_num = 0;
|
||||
st.parsers = NULL;
|
||||
st.flags = flags;
|
||||
|
||||
res = mpca_grammar_st(grammar, &st);
|
||||
free(st.parsers);
|
||||
@@ -2826,6 +2873,7 @@ mpc_parser_t *mpca_grammar(const char *grammar, ...) {
|
||||
|
||||
typedef struct {
|
||||
char *ident;
|
||||
char *name;
|
||||
mpc_parser_t *grammar;
|
||||
} mpca_stmt_t;
|
||||
|
||||
@@ -2833,10 +2881,11 @@ static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) {
|
||||
|
||||
mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t));
|
||||
stmt->ident = ((char**)xs)[0];
|
||||
stmt->grammar = ((mpc_parser_t**)xs)[2];
|
||||
stmt->name = ((char**)xs)[1];
|
||||
stmt->grammar = ((mpc_parser_t**)xs)[3];
|
||||
|
||||
free(((char**)xs)[1]);
|
||||
free(((char**)xs)[3]);
|
||||
free(((char**)xs)[2]);
|
||||
free(((char**)xs)[4]);
|
||||
|
||||
return stmt;
|
||||
}
|
||||
@@ -2861,6 +2910,7 @@ static void mpca_stmt_list_delete(mpc_val_t *x) {
|
||||
while(*stmts) {
|
||||
mpca_stmt_t *stmt = *stmts;
|
||||
free(stmt->ident);
|
||||
free(stmt->name);
|
||||
mpc_soft_delete(stmt->grammar);
|
||||
free(stmt);
|
||||
stmts++;
|
||||
@@ -2869,8 +2919,9 @@ static void mpca_stmt_list_delete(mpc_val_t *x) {
|
||||
|
||||
}
|
||||
|
||||
static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *st) {
|
||||
static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) {
|
||||
|
||||
mpca_grammar_st_t *st = s;
|
||||
mpca_stmt_t *stmt;
|
||||
mpca_stmt_t **stmts = x;
|
||||
mpc_parser_t *left;
|
||||
@@ -2878,8 +2929,11 @@ static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *st) {
|
||||
while(*stmts) {
|
||||
stmt = *stmts;
|
||||
left = mpca_grammar_find_parser(stmt->ident, st);
|
||||
if (st->flags & MPC_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); }
|
||||
if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); }
|
||||
mpc_define(left, stmt->grammar);
|
||||
free(stmt->ident);
|
||||
free(stmt->name);
|
||||
free(stmt);
|
||||
stmts++;
|
||||
}
|
||||
@@ -2894,20 +2948,20 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) {
|
||||
mpc_err_t *e;
|
||||
mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base;
|
||||
|
||||
Lang = mpc_new("lang");
|
||||
Stmt = mpc_new("stmt");
|
||||
Lang = mpc_new("lang");
|
||||
Stmt = mpc_new("stmt");
|
||||
Grammar = mpc_new("grammar");
|
||||
Term = mpc_new("term");
|
||||
Factor = mpc_new("factor");
|
||||
Base = mpc_new("base");
|
||||
Term = mpc_new("term");
|
||||
Factor = mpc_new("factor");
|
||||
Base = mpc_new("base");
|
||||
|
||||
mpc_define(Lang, mpc_apply_to(
|
||||
mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete),
|
||||
mpca_stmt_list_apply_to, st
|
||||
));
|
||||
|
||||
mpc_define(Stmt, mpc_and(4, mpca_stmt_afold,
|
||||
mpc_tok(mpc_ident()), mpc_sym(":"), Grammar, mpc_sym(";"),
|
||||
mpc_define(Stmt, mpc_and(5, mpca_stmt_afold,
|
||||
mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"),
|
||||
free, free, mpc_soft_delete
|
||||
));
|
||||
|
||||
@@ -2932,9 +2986,9 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) {
|
||||
));
|
||||
|
||||
mpc_define(Base, mpc_or(5,
|
||||
mpc_apply(mpc_tok(mpc_string_lit()), mpcaf_grammar_string),
|
||||
mpc_apply(mpc_tok(mpc_char_lit()), mpcaf_grammar_char),
|
||||
mpc_apply(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex),
|
||||
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
||||
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
||||
mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st),
|
||||
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
||||
mpc_tok_parens(Grammar, mpc_soft_delete)
|
||||
));
|
||||
@@ -2951,7 +3005,7 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) {
|
||||
return e;
|
||||
}
|
||||
|
||||
mpc_err_t *mpca_lang_file(FILE *f, ...) {
|
||||
mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) {
|
||||
mpca_grammar_st_t st;
|
||||
mpc_input_t *i;
|
||||
mpc_err_t *err;
|
||||
@@ -2962,6 +3016,7 @@ mpc_err_t *mpca_lang_file(FILE *f, ...) {
|
||||
st.va = &va;
|
||||
st.parsers_num = 0;
|
||||
st.parsers = NULL;
|
||||
st.flags = flags;
|
||||
|
||||
i = mpc_input_new_file("<mpca_lang_file>", f);
|
||||
err = mpca_lang_st(i, &st);
|
||||
@@ -2972,7 +3027,29 @@ mpc_err_t *mpca_lang_file(FILE *f, ...) {
|
||||
return err;
|
||||
}
|
||||
|
||||
mpc_err_t *mpca_lang(const char *language, ...) {
|
||||
mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) {
|
||||
mpca_grammar_st_t st;
|
||||
mpc_input_t *i;
|
||||
mpc_err_t *err;
|
||||
|
||||
va_list va;
|
||||
va_start(va, p);
|
||||
|
||||
st.va = &va;
|
||||
st.parsers_num = 0;
|
||||
st.parsers = NULL;
|
||||
st.flags = flags;
|
||||
|
||||
i = mpc_input_new_pipe("<mpca_lang_pipe>", p);
|
||||
err = mpca_lang_st(i, &st);
|
||||
mpc_input_delete(i);
|
||||
|
||||
free(st.parsers);
|
||||
va_end(va);
|
||||
return err;
|
||||
}
|
||||
|
||||
mpc_err_t *mpca_lang(int flags, const char *language, ...) {
|
||||
|
||||
mpca_grammar_st_t st;
|
||||
mpc_input_t *i;
|
||||
@@ -2984,6 +3061,7 @@ mpc_err_t *mpca_lang(const char *language, ...) {
|
||||
st.va = &va;
|
||||
st.parsers_num = 0;
|
||||
st.parsers = NULL;
|
||||
st.flags = flags;
|
||||
|
||||
i = mpc_input_new_string("<mpca_lang>", language);
|
||||
err = mpca_lang_st(i, &st);
|
||||
@@ -2994,7 +3072,7 @@ mpc_err_t *mpca_lang(const char *language, ...) {
|
||||
return err;
|
||||
}
|
||||
|
||||
mpc_err_t *mpca_lang_contents(const char *filename, ...) {
|
||||
mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) {
|
||||
|
||||
mpca_grammar_st_t st;
|
||||
mpc_input_t *i;
|
||||
@@ -3013,6 +3091,7 @@ mpc_err_t *mpca_lang_contents(const char *filename, ...) {
|
||||
st.va = &va;
|
||||
st.parsers_num = 0;
|
||||
st.parsers = NULL;
|
||||
st.flags = flags;
|
||||
|
||||
i = mpc_input_new_file(filename, f);
|
||||
err = mpca_lang_st(i, &st);
|
||||
|
32
mpc.h
32
mpc.h
@@ -56,7 +56,8 @@ struct mpc_parser_t;
|
||||
typedef struct mpc_parser_t mpc_parser_t;
|
||||
|
||||
int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r);
|
||||
int mpc_parse_file(const char *filename, FILE* file, mpc_parser_t *p, mpc_result_t *r);
|
||||
int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r);
|
||||
int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r);
|
||||
int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r);
|
||||
|
||||
/*
|
||||
@@ -129,9 +130,9 @@ mpc_parser_t *mpc_predictive(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_eoi(void);
|
||||
mpc_parser_t *mpc_soi(void);
|
||||
|
||||
mpc_parser_t *mpc_space(void);
|
||||
mpc_parser_t *mpc_spaces(void);
|
||||
mpc_parser_t *mpc_whitespace(void);
|
||||
mpc_parser_t *mpc_whitespaces(void);
|
||||
mpc_parser_t *mpc_blank(void);
|
||||
|
||||
mpc_parser_t *mpc_newline(void);
|
||||
mpc_parser_t *mpc_tab(void);
|
||||
@@ -168,10 +169,12 @@ mpc_parser_t *mpc_ident(void);
|
||||
** Useful Parsers
|
||||
*/
|
||||
|
||||
mpc_parser_t *mpc_start(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_end(mpc_parser_t *a, mpc_dtor_t da);
|
||||
mpc_parser_t *mpc_enclose(mpc_parser_t *a, mpc_dtor_t da);
|
||||
mpc_parser_t *mpc_startwith(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da);
|
||||
mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da);
|
||||
|
||||
mpc_parser_t *mpc_stripl(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_stripr(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_strip(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_tok(mpc_parser_t *a);
|
||||
mpc_parser_t *mpc_sym(const char *s);
|
||||
@@ -208,11 +211,13 @@ mpc_val_t *mpcf_escape(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_escape_regex(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x);
|
||||
|
||||
mpc_val_t *mpcf_unescape(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_unescape_regex(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x);
|
||||
mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x);
|
||||
|
||||
mpc_val_t *mpcf_null(int n, mpc_val_t** xs);
|
||||
mpc_val_t *mpcf_fst(int n, mpc_val_t** xs);
|
||||
mpc_val_t *mpcf_snd(int n, mpc_val_t** xs);
|
||||
mpc_val_t *mpcf_trd(int n, mpc_val_t** xs);
|
||||
@@ -271,11 +276,18 @@ mpc_parser_t *mpca_count(int n, mpc_parser_t *a);
|
||||
mpc_parser_t *mpca_or(int n, ...);
|
||||
mpc_parser_t *mpca_and(int n, ...);
|
||||
|
||||
mpc_parser_t *mpca_grammar(const char *grammar, ...);
|
||||
enum {
|
||||
MPC_LANG_DEFAULT = 0,
|
||||
MPC_LANG_PREDICTIVE = 1,
|
||||
MPC_LANG_WHITESPACE_SENSITIVE = 2
|
||||
};
|
||||
|
||||
mpc_err_t *mpca_lang(const char *language, ...);
|
||||
mpc_err_t *mpca_lang_file(FILE *f, ...);
|
||||
mpc_err_t *mpca_lang_contents(const char *filename, ...);
|
||||
mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...);
|
||||
|
||||
mpc_err_t *mpca_lang(int flags, const char *language, ...);
|
||||
mpc_err_t *mpca_lang_file(int flags, FILE *f, ...);
|
||||
mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...);
|
||||
mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...);
|
||||
|
||||
/*
|
||||
** Debug & Testing
|
||||
|
@@ -1,12 +0,0 @@
|
||||
/*
|
||||
** Just Some Ideas:
|
||||
**
|
||||
** - Predictive Optimisation. Check all first character of all possible roots. If no conflict then predictive.
|
||||
** - Or Optimisation. Check if any terminal parsers are _ored_ together. If so condence into single large range.
|
||||
** - And Optimisation. Check if any terminal parsers are _anded_ together. If so condence into single large string.
|
||||
** - Not Optimisation. Similar to the above. Convert _nots_ into positive cases by inverting full range of characters.
|
||||
** - Also Optimisation. Two Character parsers together can be condensed to a single string parser.
|
||||
** - Lookup Optimisation. Finite State Machine Parser.
|
||||
** - Or Fail Removal.
|
||||
**
|
||||
*/
|
@@ -13,7 +13,7 @@ void test_ident(void) {
|
||||
|
||||
/* ^[a-zA-Z_][a-zA-Z0-9_]*$ */
|
||||
|
||||
mpc_parser_t* Ident = mpc_enclose(
|
||||
mpc_parser_t* Ident = mpc_whole(
|
||||
mpc_and(2, mpcf_strfold,
|
||||
mpc_or(2, mpc_alpha(), mpc_underscore()),
|
||||
mpc_many1(mpcf_strfold, mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit())),
|
||||
@@ -62,7 +62,7 @@ void test_maths(void) {
|
||||
mpc_parens(Expr, free)
|
||||
));
|
||||
|
||||
mpc_define(Maths, mpc_enclose(Expr, free));
|
||||
mpc_define(Maths, mpc_whole(Expr, free));
|
||||
|
||||
PT_ASSERT(mpc_match(Maths, "1", &r0, int_eq, free, int_print));
|
||||
PT_ASSERT(mpc_match(Maths, "(5)", &r1, int_eq, free, int_print));
|
||||
|
@@ -11,9 +11,9 @@ void test_grammar(void) {
|
||||
Value = mpc_new("value");
|
||||
Maths = mpc_new("maths");
|
||||
|
||||
mpc_define(Expr, mpca_grammar(" <product> (('+' | '-') <product>)* ", Prod));
|
||||
mpc_define(Prod, mpca_grammar(" <value> (('*' | '/') <value>)* ", Value));
|
||||
mpc_define(Value, mpca_grammar(" /[0-9]+/ | '(' <expression> ')' ", Expr));
|
||||
mpc_define(Expr, mpca_grammar(MPC_LANG_DEFAULT, " <product> (('+' | '-') <product>)* ", Prod));
|
||||
mpc_define(Prod, mpca_grammar(MPC_LANG_DEFAULT, " <value> (('*' | '/') <value>)* ", Value));
|
||||
mpc_define(Value, mpca_grammar(MPC_LANG_DEFAULT, " /[0-9]+/ | '(' <expression> ')' ", Expr));
|
||||
mpc_define(Maths, mpca_total(Expr));
|
||||
|
||||
t0 = mpc_ast_new("product|value|regex", "24");
|
||||
@@ -66,7 +66,7 @@ void test_language(void) {
|
||||
Value = mpc_new("value");
|
||||
Maths = mpc_new("maths");
|
||||
|
||||
mpca_lang(
|
||||
mpca_lang(MPC_LANG_DEFAULT,
|
||||
" \
|
||||
expression : <product> (('+' | '-') <product>)*; \
|
||||
product : <value> (('*' | '/') <value>)*; \
|
||||
@@ -87,7 +87,7 @@ void test_language_file(void) {
|
||||
Value = mpc_new("value");
|
||||
Maths = mpc_new("maths");
|
||||
|
||||
mpca_lang_contents("./tests/maths.grammar", Expr, Prod, Value, Maths);
|
||||
mpca_lang_contents(MPC_LANG_DEFAULT,"./tests/maths.grammar", Expr, Prod, Value, Maths);
|
||||
|
||||
mpc_cleanup(4, Expr, Prod, Value, Maths);
|
||||
|
||||
|
Reference in New Issue
Block a user