WIP prediction stuff

This commit is contained in:
Daniel Holden
2013-09-30 17:58:52 +01:00
parent 5a50398629
commit 3933a3a1bd
4 changed files with 336 additions and 133 deletions

View File

@@ -9,13 +9,14 @@ Using _mpc_ might be of interest to you if you are...
* Building a new data format
* Parsing an existing data format
* Embedding a Domain Specific Language
* [Adding an ad-hoc embedded Lisp to your application](http://en.wikipedia.org/wiki/Greenspun%27s_tenth_rule)
* Implementing [Greenspun's Tenth Rule](http://en.wikipedia.org/wiki/Greenspun%27s_tenth_rule)
Features
--------
* Type-Generic Parser Combinators
* Lookahead Recursive Descent
* Error Message Support
* Regular Expression Support
* Abstract Syntax Tree Support
@@ -38,7 +39,7 @@ The main advantages of _mpc_ over this are:
View From the Top
-----------------
In this example I specify a parse for a basic maths language. This function takes as input some mathematical expression and outputs an instance of `mpc_ast_t`.
In this example I create a parser for a basic maths language. This function takes as input some mathematical expression and outputs an instance of `mpc_ast_t`.
```c
#include "mpc.h"
@@ -51,18 +52,18 @@ mpc_ast_t* parse_maths(const char* input) {
mpc_parser_t* Maths = mpc_new("maths");
mpca_lang(
" \
expression : <product> (('+' | '-') <product>)*; \
product : <value> (('*' | '/') <value>)*; \
value : /[0-9]+/ | '(' <expression> ')'; \
maths : /^/ <expression> /$/; \
" \
expression : <product> (('+' | '-') <product>)*; \
product : <value> (('*' | '/') <value> )*; \
value : /[0-9]+/ | '(' <expression> ')'; \
maths : /^/ <expression> /$/; \
",
Expr, Prod, Value, Maths);
mpc_result_t r;
if (!mpc_parse("parse_maths", input, Maths, &r)) {
mpc_err_print(r.error);
abort();
exit(EXIT_FAILURE);
}
mpc_cleanup(4, Expr, Prod, Value, Maths);
@@ -73,29 +74,29 @@ mpc_ast_t* parse_maths(const char* input) {
The output for some input like `(4 * 2 * 11 + 2) - 5` might look something like this
```xml
<root>
<value>
<char> '('
<expression>
<product>
<value> '4'
<char> '*'
<value> '2'
<char> '*'
<value> '11'
<char> '+'
<value> '2'
<char> ')'
<char> '-'
<value> '5'
```c
root:
value:
char: '('
expression:
product:
value: '4'
char: '*'
value: '2'
char: '*'
value: '11'
char: '+'
value: '2'
char: ')'
char: '-'
value: '5'
```
View From the Bottom
--------------------
Parser Combinators are structures that encode how to parse a particular language. They can be combined using a number of intuitive operators to create new parsers of ever increasing complexity. With these, detailed grammars and languages can be processed easily.
Parser Combinators are structures that encode how to parse a particular language. They can be combined using intuitive operators to create new parsers of increasing complexity. With these - detailed grammars and languages can be parsed and processed easily.
The trick behind Parser Combinators is the observation that by structuring the library in a particular way, one can make building parser combinators look like writing a grammar itself. Therefore instead of describing _how to parse a language_, a user must only specify _the language itself_, and the computer will work out how to parse it ... as if by magic!
@@ -201,10 +202,10 @@ Consumes no input, always successful, always returns `NULL`
* * *
```c
mpc_parser_t* mpc_fail(void);
mpc_parser_t* mpc_fail(const char* m);
```
Consumes no input, always fails
Consumes no input, always fails with message `m`.
* * *
@@ -251,6 +252,14 @@ Returns a parser that applies function `f` (optionality taking extra input `x`)
* * *
```c
mpc_parser_t* mpc_predict(mpc_parser_t* a);
```
Returns a parser that runs `a` with backtracking disabled. This means if `a` consumes any input, it will not be reverted. Turning backtracking off has good performance benefits for grammars which are `LL(1)`. These are grammars where the first character completely determines the parse result - such as the decision of parsing either a C identifier, number, or string literal.
* * *
```c
mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da);
mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf);
@@ -551,8 +560,8 @@ A number of common fold functions a user might want are included. They reside un
* `mpc_val_t* mpcf_maths(int n, mpc_val_t** xs);` Examines second argument as string to see which operator it is, then operators on first and third argument as if they are `int*`.
Secod Example
-------------
Second Example
--------------
Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own syntax tree types, as well as allows them perform specific house-keeping or data processing in the parsing phase. For example we can specify a simple maths grammar that computes the result of the expression as it goes along.

13
TODO.md
View File

@@ -1,8 +1,11 @@
- Test All Regex Features
- Test Regex Range Feature
- Parser Naming
- Test Grammar Trees
- Change Grammar Many Fold Operator
- Add proper tests for everything in general
- Find some good demo grammars to show
- Add Tutorial Teaching from scratch
- Add Reference
- Write about how to use `mpca_lang` and `mpca_grammar`
- Write about error reporting
- Use while loop instead of recursion in parse function
- Support for stream based input

378
mpc.c
View File

@@ -16,6 +16,16 @@ typedef struct {
int col;
} mpc_state_t;
static mpc_state_t mpc_state_null(void) {
mpc_state_t s;
s.last = '\0';
s.next = '\0';
s.pos = 0;
s.row = 0;
s.col = 0;
return s;
}
/*
** Error Type
*/
@@ -25,9 +35,10 @@ struct mpc_err_t {
mpc_state_t state;
int expected_num;
char** expected;
char* failure;
};
static mpc_err_t* mpc_err_new(char* filename, mpc_state_t s, char* expected) {
static mpc_err_t* mpc_err_new(const char* filename, mpc_state_t s, const char* expected) {
mpc_err_t* x = malloc(sizeof(mpc_err_t));
x->filename = malloc(strlen(filename) + 1);
strcpy(x->filename, filename);
@@ -36,6 +47,19 @@ static mpc_err_t* mpc_err_new(char* filename, mpc_state_t s, char* expected) {
x->expected = malloc(sizeof(char*));
x->expected[0] = malloc(strlen(expected) + 1);
strcpy(x->expected[0], expected);
x->failure = NULL;
return x;
}
static mpc_err_t* mpc_err_new_fail(const char* filename, mpc_state_t s, const char* failure) {
mpc_err_t* x = malloc(sizeof(mpc_err_t));
x->filename = malloc(strlen(filename) + 1);
strcpy(x->filename, filename);
x->state = s;
x->expected_num = 0;
x->expected = NULL;
x->failure = malloc(strlen(failure) + 1);
strcpy(x->failure, failure);
return x;
}
@@ -48,6 +72,7 @@ void mpc_err_delete(mpc_err_t* x) {
free(x->expected);
free(x->filename);
free(x->failure);
free(x);
}
@@ -90,6 +115,13 @@ void mpc_err_print(mpc_err_t* x) {
void mpc_err_print_to(mpc_err_t* x, FILE* f) {
if (x->failure) {
fprintf(f, "%s:%i:%i: error: %s\n",
x->filename, x->state.row,
x->state.col, x->failure);
return;
}
fprintf(f, "%s:%i:%i: error: expected ", x->filename, x->state.row, x->state.col);
if (x->expected_num == 0) {
@@ -127,8 +159,57 @@ void mpc_err_print_to(mpc_err_t* x, FILE* f) {
}
void mpc_err_msg(mpc_err_t* x, char* out, int* outn, int outmax) {
/* TODO: Implement */
char* mpc_err_string_new(mpc_err_t* x) {
char* buffer = malloc(1024);
if (x->failure) {
snprintf(buffer, 1023, "%s:%i:%i: error: %s\n",
x->filename, x->state.row,
x->state.col, x->failure);
return buffer;
}
int left = 1023;
left -= snprintf(buffer, left, "%s:%i:%i: error: expected ", x->filename, x->state.row, x->state.col);
if (x->expected_num == 0) {
left -= snprintf(buffer, left, "ERROR: NOTHING EXPECTED");
} else if (x->expected_num == 1) {
left -= snprintf(buffer, left, "%s", x->expected[0]);
} else {
int i;
for (i = 0; i < x->expected_num-2; i++) {
left -= snprintf(buffer, left, "%s, ", x->expected[i]);
}
left -= snprintf(buffer, left, "%s or %s",
x->expected[x->expected_num-2],
x->expected[x->expected_num-1]);
}
left -= snprintf(buffer, left, " at ");
if (x->state.next == '\a') { left -= snprintf(buffer, left, "bell"); }
else if (x->state.next == '\b') { left -= snprintf(buffer, left, "backspace"); }
else if (x->state.next == '\f') { left -= snprintf(buffer, left, "formfeed"); }
else if (x->state.next == '\r') { left -= snprintf(buffer, left, "carriage return"); }
else if (x->state.next == '\v') { left -= snprintf(buffer, left, "vertical tab"); }
else if (x->state.next == '\0') { left -= snprintf(buffer, left, "end of input"); }
else if (x->state.next == '\n') { left -= snprintf(buffer, left, "newline"); }
else if (x->state.next == '\t') { left -= snprintf(buffer, left, "tab"); }
else { left -= snprintf(buffer, left, "'%c'", x->state.next); }
left -= snprintf(buffer, left, "\n");
buffer = realloc(buffer, strlen(buffer) + 1);
return buffer;
}
static mpc_err_t* mpc_err_either(mpc_err_t* x, mpc_err_t* y) {
@@ -244,6 +325,7 @@ typedef struct {
char* str;
mpc_state_t state;
bool backtrack;
int marks_num;
mpc_state_t* marks;
@@ -264,6 +346,7 @@ static mpc_input_t* mpc_input_new(const char* filename, const char* str) {
i->state.row = 0;
i->state.col = 0;
i->backtrack = true;
i->marks_num = 0;
i->marks = NULL;
@@ -277,18 +360,29 @@ static void mpc_input_delete(mpc_input_t* i) {
free(i);
}
static void mpc_input_backtrack_disable(mpc_input_t* i) {
i->backtrack = false;
}
static void mpc_input_backtrack_enable(mpc_input_t* i) {
i->backtrack = true;
}
static void mpc_input_mark(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->marks_num++;
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
i->marks[i->marks_num-1] = i->state;
}
static void mpc_input_unmark(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->marks_num--;
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
}
static void mpc_input_rewind(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->state = i->marks[i->marks_num-1];
mpc_input_unmark(i);
}
@@ -444,18 +538,20 @@ enum {
MPC_TYPE_APPLY = 15,
MPC_TYPE_APPLY_TO = 16,
MPC_TYPE_NOT = 17,
MPC_TYPE_MAYBE = 18,
MPC_TYPE_MANY = 19,
MPC_TYPE_MANY1 = 20,
MPC_TYPE_COUNT = 21,
MPC_TYPE_PREDICT = 17,
MPC_TYPE_NOT = 18,
MPC_TYPE_MAYBE = 19,
MPC_TYPE_MANY = 20,
MPC_TYPE_MANY1 = 21,
MPC_TYPE_COUNT = 22,
MPC_TYPE_ELSE = 22,
MPC_TYPE_ALSO = 23,
MPC_TYPE_OR = 24,
MPC_TYPE_AND = 25,
MPC_TYPE_ELSE = 23,
MPC_TYPE_ALSO = 24,
MPC_TYPE_OR = 25,
MPC_TYPE_AND = 26,
};
typedef struct { char* m; } mpc_pdata_fail_t;
typedef struct { mpc_lift_t lf; void* x; } mpc_pdata_lift_t;
typedef struct { mpc_parser_t* x; char* m; } mpc_pdata_expect_t;
typedef struct { char x; } mpc_pdata_single_t;
@@ -464,6 +560,7 @@ typedef struct { bool(*f)(char); } mpc_pdata_satisfy_t;
typedef struct { char* x; } mpc_pdata_string_t;
typedef struct { mpc_parser_t* x; mpc_apply_t f; } mpc_pdata_apply_t;
typedef struct { mpc_parser_t* x; mpc_apply_to_t f; void* d; } mpc_pdata_apply_to_t;
typedef struct { mpc_parser_t* x; } mpc_pdata_predict_t;
typedef struct { mpc_parser_t* x; mpc_dtor_t dx; mpc_lift_t lf; } mpc_pdata_not_t;
typedef struct { mpc_parser_t* x; mpc_fold_t f; int n; mpc_dtor_t dx; mpc_lift_t lf; } mpc_pdata_repeat_t;
typedef struct { mpc_parser_t* x; mpc_parser_t* y; } mpc_pdata_else_t;
@@ -472,6 +569,7 @@ typedef struct { int n; mpc_parser_t** xs; } mpc_pdata_or_t;
typedef struct { int n; mpc_parser_t** xs; mpc_dtor_t* dxs; mpc_afold_t f; } mpc_pdata_and_t;
typedef union {
mpc_pdata_fail_t fail;
mpc_pdata_lift_t lift;
mpc_pdata_expect_t expect;
mpc_pdata_single_t single;
@@ -480,6 +578,7 @@ typedef union {
mpc_pdata_string_t string;
mpc_pdata_apply_t apply;
mpc_pdata_apply_to_t apply_to;
mpc_pdata_predict_t predict;
mpc_pdata_not_t not;
mpc_pdata_repeat_t repeat;
mpc_pdata_else_t orelse;
@@ -509,28 +608,28 @@ struct mpc_parser_t {
#define MPC_SUCCESS(x) r->output = x; return true;
#define MPC_FAILURE(x) r->error = x; return false;
#define MPC_TRY(x, f) if (f) { MPC_SUCCESS(x) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character")); }
#define MPC_TRY(x, f) if (f) { MPC_SUCCESS(x) } else { MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, "Incorrect Input")); }
bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) {
memset(r, 0, sizeof(mpc_result_t));
if (p->type == MPC_TYPE_UNDEFINED) { fprintf(stderr, "\nError: Parser Undefined!\n"); abort(); }
if (p->type == MPC_TYPE_UNDEFINED) { MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, "Parser Undefined!")); }
/* Trivial Parsers */
if (p->type == MPC_TYPE_PASS) { MPC_SUCCESS(NULL); }
if (p->type == MPC_TYPE_FAIL) { MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character")); }
if (p->type == MPC_TYPE_FAIL) { MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, p->data.fail.m)); }
if (p->type == MPC_TYPE_LIFT) { MPC_SUCCESS(p->data.lift.lf()); }
if (p->type == MPC_TYPE_LIFT_VAL) { MPC_SUCCESS(p->data.lift.x); }
/* Basic Parsers */
char* s = NULL;
if (p->type == MPC_TYPE_SOI) { if (mpc_input_soi(i)) { MPC_SUCCESS(NULL) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "start of input")); } }
if (p->type == MPC_TYPE_EOI) { if (mpc_input_eoi(i)) { MPC_SUCCESS(NULL) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "end of input")); } }
char* s = NULL;
if (p->type == MPC_TYPE_ANY) { MPC_TRY(s, mpc_input_any(i, &s)); }
if (p->type == MPC_TYPE_SINGLE) { MPC_TRY(s, mpc_input_char(i, p->data.single.x, &s)); }
if (p->type == MPC_TYPE_RANGE) { MPC_TRY(s, mpc_input_range(i, p->data.range.x, p->data.range.y, &s)); }
@@ -572,12 +671,19 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) {
}
}
if (p->type == MPC_TYPE_PREDICT) {
mpc_input_backtrack_disable(i);
bool res = mpc_parse_input(i, p->data.predict.x, r);
mpc_input_backtrack_enable(i);
return res;
}
if (p->type == MPC_TYPE_NOT) {
mpc_input_mark(i);
if (mpc_parse_input(i, p->data.not.x, &x)) {
mpc_input_rewind(i);
p->data.not.dx(x.output);
MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character"));
MPC_FAILURE(mpc_err_new(i->filename, i->state, "opposite"));
} else {
mpc_input_unmark(i);
mpc_err_delete(x.error);
@@ -710,8 +816,7 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) {
}
fprintf(stderr, "\nError: Unknown Parser Type Id %i!\n", p->type);
abort();
MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, "Unknown Parser Type Id!"));
}
@@ -743,9 +848,11 @@ bool mpc_parse_file(const char* filename, FILE* f, mpc_parser_t* p, mpc_result_t
bool mpc_parse_filename(const char* filename, mpc_parser_t* p, mpc_result_t* r) {
FILE* f = fopen(filename, "r");
if (f == NULL) {
fprintf(stderr, "Error: Unable to open file '%s'\n", filename);
abort();
r->output = NULL;
r->error = mpc_err_new_fail(filename, mpc_state_null(), "Unable to open file!");
return false;
}
bool res = mpc_parse_file(filename, f, p, r);
@@ -786,6 +893,10 @@ static void mpc_undefine_unretained(mpc_parser_t* p, bool force) {
switch (p->type) {
case MPC_TYPE_FAIL:
free(p->data.fail.m);
break;
case MPC_TYPE_ONEOF:
case MPC_TYPE_NONEOF:
case MPC_TYPE_STRING:
@@ -800,6 +911,10 @@ static void mpc_undefine_unretained(mpc_parser_t* p, bool force) {
mpc_undefine_unretained(p->data.apply_to.x, false);
break;
case MPC_TYPE_PREDICT:
mpc_undefine_unretained(p->data.predict.x, false);
break;
case MPC_TYPE_NOT:
mpc_undefine_unretained(p->data.not.x, false);
break;
@@ -848,12 +963,11 @@ void mpc_delete(mpc_parser_t* p) {
if (p->retained) {
if (p->type != MPC_TYPE_UNDEFINED) {
fprintf(stderr, "\nError: Parser still Defined! Use `mpc_undefine` before delete!\n");
abort();
} else {
free(p->name);
free(p);
}
mpc_undefine_unretained(p, false);
}
free(p->name);
free(p);
} else {
mpc_undefine_unretained(p, false);
@@ -888,8 +1002,15 @@ mpc_parser_t* mpc_undefine(mpc_parser_t* p) {
mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a) {
p->type = a->type;
p->data = a->data;
if (p->retained) {
p->type = a->type;
p->data = a->data;
} else {
mpc_parser_t* a2 = mpc_failf("Attempt to assign to Unretained Parser!");
p->type = a2->type;
p->data = a2->data;
free(a2);
}
free(a);
return p;
@@ -928,12 +1049,30 @@ mpc_parser_t* mpc_pass(void) {
return p;
}
mpc_parser_t* mpc_fail(void) {
mpc_parser_t* mpc_fail(const char* m) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_FAIL;
p->data.fail.m = malloc(strlen(m) + 1);
strcpy(p->data.fail.m, m);
return p;
}
mpc_parser_t* mpc_failf(const char* fmt, ...) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_FAIL;
va_list va;
va_start(va, fmt);
char* buffer = malloc(1024);
vsnprintf(buffer, 1023, fmt, va);
va_end(va);
buffer = realloc(buffer, strlen(buffer) + 1);
return p;
}
mpc_parser_t* mpc_lift_val(mpc_val_t* x) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_LIFT_VAL;
@@ -1080,6 +1219,13 @@ mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x) {
return p;
}
mpc_parser_t* mpc_predict(mpc_parser_t* a) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_PREDICT;
p->data.predict.x = a;
return p;
}
mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_NOT;
@@ -1425,14 +1571,14 @@ static mpc_val_t* mpc_re_range(mpc_val_t* x) {
char* s = x;
bool comp = false;
if (*s == '\0') { free(x); return mpc_fail(); }
if (*s == '\0') { free(x); return mpc_failf("Invalid Regex Range Specifier '%s'", x); }
if (*s == '^') {
comp = true;
s++;
}
if (*s == '\0') { free(x); return mpc_fail(); }
if (*s == '\0') { free(x); return mpc_failf("Invalid Regex Range Specifier '%s'", x); }
char* range = calloc(1, 1);
@@ -1522,9 +1668,10 @@ mpc_parser_t* mpc_re(const char* re) {
mpc_result_t r;
if(!mpc_parse("<mpc_re_compiler>", re, RegexEnclose, &r)) {
fprintf(stderr, "\nError Compiling Regex: '%s' ", re);
mpc_err_print(r.error);
abort();
char* err_msg = mpc_err_string_new(r.error);
r.output = mpc_failf("Invalid Regex: %s", err_msg);
free(err_msg);
mpc_err_delete(r.error);
}
mpc_delete(RegexEnclose);
@@ -1583,30 +1730,43 @@ mpc_val_t* mpcf_float(mpc_val_t* x) {
return y;
}
static char* mpc_escape_input = (char[]){
'\a', '\b', '\f', '\n', '\r',
'\t', '\v', '\\', '\'', '\"', '\0'};
static char* mpc_escape_input_c = (char[]) {
'\a', '\b', '\f', '\n', '\r',
'\t', '\v', '\\', '\'', '\"', '\0'};
static char** mpc_escape_output = (char*[]){
"\\a", "\\b", "\\f", "\\n", "\\r",
"\\t", "\\v", "\\\\", "\\'", "\\\"", "\\0"};
static char** mpc_escape_output_c = (char*[]) {
"\\a", "\\b", "\\f", "\\n", "\\r", "\\t",
"\\v", "\\\\", "\\'", "\\\"", "\\0", NULL};
static mpc_val_t* mpcf_escape_new(mpc_val_t* x) {
static char* mpc_escape_input_raw_re = (char[]) { '/' };
static char** mpc_escape_output_raw_re = (char*[]) { "\\/", NULL };
static char* mpc_escape_input_raw_cstr = (char[]) { '"' };
static char** mpc_escape_output_raw_cstr = (char*[]) { "\\\"", NULL };
static char* mpc_escape_input_raw_cchar = (char[]) { '\'' };
static char** mpc_escape_output_raw_cchar = (char*[]) { "\\'", NULL };
static mpc_val_t* mpcf_escape_new(mpc_val_t* x, char* input, char** output) {
int i;
bool found;
char* s = x;
char* y = calloc(1, 1);
while (*s) {
bool found = false;
for (i = 0; i < 11; i++) {
if (*s == mpc_escape_input[i]) {
y = realloc(y, strlen(y) + strlen(mpc_escape_output[i]) + 1);
strcat(y, mpc_escape_output[i]);
i = 0;
found = false;
while (output[i]) {
if (*s == input[i]) {
y = realloc(y, strlen(y) + strlen(output[i]) + 1);
strcat(y, output[i]);
found = true;
break;
}
i++;
}
if (!found) {
@@ -1621,28 +1781,29 @@ static mpc_val_t* mpcf_escape_new(mpc_val_t* x) {
return y;
}
static mpc_val_t* mpcf_unescape_new(mpc_val_t* x) {
static mpc_val_t* mpcf_unescape_new(mpc_val_t* x, char* input, char** output) {
int i;
bool found = false;
char* s = x;
char* y = calloc(1, 1);
while (*s) {
if (*s == '\\') {
s++;
if (*s == '\0') { break; }
for (i = 0; i < 11; i++) {
if (*s == mpc_escape_output[i][1]) {
y = realloc(y, strlen(y) + 2);
strcat(y, (char[]){ mpc_escape_input[i], '\0' });
break;
}
i = 0;
found = false;
while (output[i]) {
if (((*s) == output[i][0]) && (*(s+1) == output[i][1])) {
y = realloc(y, strlen(y) + 2);
strcat(y, (char[]){ input[i], '\0' });
found = true;
break;
}
i++;
}
} else {
if (!found) {
y = realloc(y, strlen(y) + 2);
strcat(y, (char[]){ *s, '\0' });
}
@@ -1655,17 +1816,23 @@ static mpc_val_t* mpcf_unescape_new(mpc_val_t* x) {
}
mpc_val_t* mpcf_escape(mpc_val_t* x) {
mpc_val_t* y = mpcf_escape_new(x);
mpc_val_t* y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c);
free(x);
return y;
}
mpc_val_t* mpcf_unescape(mpc_val_t* x) {
mpc_val_t* y = mpcf_unescape_new(x);
mpc_val_t* y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c);
free(x);
return y;
}
mpc_val_t* mpcf_unescape_regex(mpc_val_t* x) {
mpc_val_t* y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re);
free(x);
return y;
}
mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y) {
return x;
}
@@ -1763,39 +1930,58 @@ static void mpc_print_unretained(mpc_parser_t* p, bool force) {
if (p->type == MPC_TYPE_SATISFY) { printf("<satisfy %p>", p->data.satisfy.f); }
if (p->type == MPC_TYPE_SINGLE) {
char* s = mpcf_escape_new((char[]){ p->data.single.x, '\0' });
char* s = mpcf_escape_new(
(char[]){ p->data.single.x, '\0' },
mpc_escape_input_c,
mpc_escape_output_c);
printf("'%s'", s);
free(s);
}
if (p->type == MPC_TYPE_RANGE) {
char* s = mpcf_escape_new((char[]){ p->data.range.x, '\0' });
char* e = mpcf_escape_new((char[]){ p->data.range.y, '\0' });
char* s = mpcf_escape_new(
(char[]){ p->data.range.x, '\0' },
mpc_escape_input_c,
mpc_escape_output_c);
char* e = mpcf_escape_new(
(char[]){ p->data.range.y, '\0' },
mpc_escape_input_c,
mpc_escape_output_c);
printf("[%s-%s]", s, e);
free(s);
free(e);
}
if (p->type == MPC_TYPE_ONEOF) {
char* s = mpcf_escape_new(p->data.string.x);
char* s = mpcf_escape_new(
p->data.string.x,
mpc_escape_input_c,
mpc_escape_output_c);
printf("[%s]", s);
free(s);
}
if (p->type == MPC_TYPE_NONEOF) {
char* s = mpcf_escape_new(p->data.string.x);
char* s = mpcf_escape_new(
p->data.string.x,
mpc_escape_input_c,
mpc_escape_output_c);
printf("[^%s]", s);
free(s);
}
if (p->type == MPC_TYPE_STRING) {
char* s = mpcf_escape_new(p->data.string.x);
char* s = mpcf_escape_new(
p->data.string.x,
mpc_escape_input_c,
mpc_escape_output_c);
printf("\"%s\"", s);
free(s);
}
if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, false); }
if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, false); }
if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, false); }
if (p->type == MPC_TYPE_NOT) { printf("!"); mpc_print_unretained(p->data.not.x, false); }
if (p->type == MPC_TYPE_MAYBE) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf(")?"); }
if (p->type == MPC_TYPE_MANY) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf(")*"); }
@@ -2008,9 +2194,9 @@ static void mpc_ast_print_depth(mpc_ast_t* a, int d) {
for (i = 0; i < d; i++) { printf("\t"); }
if (strlen(a->contents)) {
printf("<%s> '%s'\n", a->tag, a->contents);
printf("%s: '%s'\n", a->tag, a->contents);
} else {
printf("<%s>\n", a->tag);
printf("%s:\n", a->tag);
}
@@ -2215,9 +2401,9 @@ static mpc_val_t* mpca_grammar_apply_char(mpc_val_t* x) {
}
static mpc_val_t* mpca_grammar_apply_regex(mpc_val_t* x) {
/* TODO: Unescape Regex */
mpc_parser_t* p = mpc_tok(mpc_re(x));
free(x);
char* y = mpcf_unescape_regex(x);
mpc_parser_t* p = mpc_tok(mpc_re(y));
free(y);
return mpca_tag(mpc_apply(p, mpcf_apply_str_ast), "regex");
}
@@ -2238,6 +2424,9 @@ static mpc_parser_t* mpca_grammar_find_parser(char* x, mpca_grammar_st_t* st) {
st->parsers_num++;
st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num);
st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*);
if (st->parsers[st->parsers_num-1] == NULL) {
return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num);
}
}
return st->parsers[st->parsers_num-1];
@@ -2263,8 +2452,7 @@ static mpc_parser_t* mpca_grammar_find_parser(char* x, mpca_grammar_st_t* st) {
st->parsers[st->parsers_num-1] = p;
if (p == NULL) {
fprintf(stderr, "Error: Unable to find specified parser '%s' in arguments\n", (char*)x);
abort();
return mpc_failf("Unknown Parser '%s'!", x);
}
if (p->name && strcmp(p->name, x) == 0) { return p; }
@@ -2327,9 +2515,10 @@ mpc_parser_t* mpca_grammar_st(const char* grammar, mpca_grammar_st_t* st) {
mpc_result_t r;
if(!mpc_parse("<mpc_grammar_compiler>", grammar, GrammarTotal, &r)) {
fprintf(stderr, "\nError Compiling Grammar: '%s' ", grammar);
mpc_err_print(r.error);
abort();
char* err_msg = mpc_err_string_new(r.error);
r.output = mpc_failf("Invalid Grammar: %s", err_msg);
free(err_msg);
mpc_err_delete(r.error);
}
mpc_delete(GrammarTotal);
@@ -2422,7 +2611,7 @@ static mpc_val_t* mpca_stmt_list_apply_to(mpc_val_t* x, void* st) {
return NULL;
}
static void mpca_lang_st(const char* language, mpca_grammar_st_t* st) {
static mpc_err_t* mpca_lang_st(const char* language, mpca_grammar_st_t* st) {
mpc_parser_t* Lang = mpc_new("lang");
mpc_parser_t* Stmt = mpc_new("stmt");
@@ -2446,13 +2635,13 @@ static void mpca_lang_st(const char* language, mpca_grammar_st_t* st) {
Base
));
mpc_define(Base, mpc_or(5,
mpc_define(Base, mpc_predict(mpc_or(5,
mpc_apply(mpc_tok(mpc_string_lit()), mpca_grammar_apply_string),
mpc_apply(mpc_tok(mpc_char_lit()), mpca_grammar_apply_char),
mpc_apply(mpc_tok(mpc_regex_lit()), mpca_grammar_apply_regex),
mpc_apply_to(mpc_tok_braces(mpc_tok(mpc_else(mpc_digits(), mpc_ident())), free), mpca_grammar_apply_id, st),
mpc_tok_parens(Grammar, mpc_soft_delete)
));
)));
mpc_define(Lang, mpc_apply_to(
mpc_total(mpc_many(Stmt, mpca_stmt_fold), mpca_stmt_list_delete),
@@ -2466,33 +2655,30 @@ static void mpca_lang_st(const char* language, mpca_grammar_st_t* st) {
free, free, mpc_soft_delete
));
mpc_result_t r;
if (!mpc_parse("<mpc_lang_compiler>", language, Lang, &r)) {
fprintf(stderr, "\nError Compiling Language");
mpc_err_print(r.error);
abort();
}
mpc_result_t r = {0};
mpc_parse("<mpc_lang_compiler>", language, Lang, &r);
mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base);
return r.error;
}
void mpca_lang(const char* language, ...) {
mpc_err_t* mpca_lang(const char* language, ...) {
va_list va;
va_start(va, language);
mpca_grammar_st_t st = { (va_list*)&va, 0, NULL };
mpca_lang_st(language, &st);
mpc_err_t* err = mpca_lang_st(language, &st);
free(st.parsers);
va_end(va);
va_end(va);
return err;
}
void mpca_lang_file(const char* filename, ...) {
mpc_err_t* mpca_lang_file(const char* filename, ...) {
FILE* f = fopen(filename, "r");
if (f == NULL) {
fprintf(stderr, "Error: Unable to open file '%s'\n", filename);
abort();
return mpc_err_new_fail(filename, mpc_state_null(), "Unable to open file!");
}
fseek(f, 0, SEEK_END);
@@ -2506,9 +2692,11 @@ void mpca_lang_file(const char* filename, ...) {
va_list va;
va_start(va, filename);
mpca_grammar_st_t st = { (va_list*)&va, 0, NULL };
mpca_lang_st(buff, &st);
mpc_err_t* err = mpca_lang_st(buff, &st);
free(st.parsers);
va_end(va);
free(buff);
return err;
}

11
mpc.h
View File

@@ -24,11 +24,12 @@ int mpc_err_column(mpc_err_t* x);
char mpc_err_unexpected(mpc_err_t* x);
char** mpc_err_expected(mpc_err_t* x, int* num);
char* mpc_err_filename(mpc_err_t* x);
char* mpc_err_failure(mpc_err_t* x);
void mpc_err_delete(mpc_err_t* x);
void mpc_err_print(mpc_err_t* x);
void mpc_err_print_to(mpc_err_t* x, FILE* f);
void mpc_err_msg(mpc_err_t* x, char* out, int* outn, int outmax);
char* mpc_err_string_new(mpc_err_t* x);
/*
** Parsing
@@ -77,7 +78,8 @@ void mpc_cleanup_va(int n, va_list va);
*/
mpc_parser_t* mpc_pass(void);
mpc_parser_t* mpc_fail(void);
mpc_parser_t* mpc_fail(const char* m);
mpc_parser_t* mpc_failf(const char* fmt, ...);
mpc_parser_t* mpc_lift(mpc_lift_t f);
mpc_parser_t* mpc_lift_val(mpc_val_t* x);
@@ -96,6 +98,7 @@ mpc_parser_t* mpc_string(const char* s);
mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected);
mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f);
mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x);
mpc_parser_t* mpc_predict(mpc_parser_t* a);
mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da);
mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf);
mpc_parser_t* mpc_maybe(mpc_parser_t* a);
@@ -264,8 +267,8 @@ mpc_parser_t* mpca_or(int n, ...);
mpc_parser_t* mpca_and(int n, ...);
mpc_parser_t* mpca_grammar(const char* grammar, ...);
void mpca_lang(const char* language, ...);
void mpca_lang_file(const char* filename, ...);
mpc_err_t* mpca_lang(const char* language, ...);
mpc_err_t* mpca_lang_file(const char* filename, ...);
/*
** Testing