diff --git a/README.md b/README.md index 58fbdd2..3d382f1 100644 --- a/README.md +++ b/README.md @@ -3,18 +3,24 @@ Micro Parser Combinators _mpc_ is a lightweight Parser Combinator library for C. -The current main alternative is a branch of (https://github.com/wbhart/Cesium3)[Cesium3]. - -Features & Advantages ---------------------- +Features +-------- +* Full Type Generic Parser Combinator * Error Message Support * Regular Expression Support -* Parser Grammar Support -* Works for Generic Types -* AST Extension -* Single source & header files +* Packaged with AST generator +* Easy to including in source * Written in clean ANSI C + +Alternatives +------------ + +The current main alternative is a branch of (https://github.com/wbhart/Cesium3)[Cesium3]. + +The main advantages of _mpc_ over this are: + +* Works for Generic Types * Doesn't rely on Boehm-Demers-Weiser Garbage Collection * Doesn't use `setjmp` and `longjmp` for errors * Doesn't pollute namespace @@ -24,51 +30,8 @@ Example ```c -mpc_val_t* combine_maths(int n, mpc_val_t** xs) { - - int** vs = (int**)xs; - - if (*vs[1] == '*') { *vs[0] *= *vs[2]; } - if (*vs[1] == '/') { *vs[0] /= *vs[2]; } - if (*vs[1] == '+') { *vs[0] += *vs[2]; } - if (*vs[1] == '-') { *vs[0] -= *vs[2]; } - - free(vs[1]); - free(vs[2]); - - return vs[0]; -} -int main(int argc, char** argv) { - mpc_parser_t* Expr = mpc_new(); - mpc_parser_t* Factor = mpc_new(); - mpc_parser_t* Term = mpc_new(); - mpc_parser_t* Maths = mpc_new(); - - mpc_define(Expr, - mpc_pc("cmaths ( fact ['*' | '/'] fact ) | fact", - combine_maths, Factor, free, Factor, free, Factor), - ); - - mpc_define(Factor, - mpc_pc("cmaths ( term ['+' | '-'] term ) | term", - combine_maths, Term, free, Term, free, Term), - ); - - mpc_define(Term, - mpc_pc("num | snd ('(' expr ')')", - mpc_int(), mpcf_asnd_free, Expr, free) - ); - - mpc_define(Maths, mpc_ends(Expr, free)); - - mpc_delete(Expr); - mpc_delete(Factor); - mpc_delete(Term); - mpc_delete(Maths); - -} ``` Parsers @@ -90,6 +53,17 @@ Combinator Grammars Abstract Syntax Tree -------------------- +If you want to do all the data processing after the parsing stage _mpc_ comes packaged with a basic AST type which makes the grammar declaration much cleaner as you don't have to pass around destructors and fold functions. All these functions reside under `mpca_*`. + +This also allows for the use of parser grammars that can be declared directly in C strings similarly to regular expressions. + +```c + + +``` + + + Reference --------- diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..545dfc5 --- /dev/null +++ b/TODO.md @@ -0,0 +1,11 @@ +- Special Start of Input +- Special End of Input +- Integrate ptest +- Test All Regex Features +- Test Regex Range Feature +- Parser Naming +- Test Grammar Trees +- Change Grammar Many Fold Operator +- Find some good demo grammars to show +- Add Tutorial Teaching from scratch +- Add Reference \ No newline at end of file diff --git a/mpc.c b/mpc.c index 7b0bd9a..8b34cf4 100644 --- a/mpc.c +++ b/mpc.c @@ -21,13 +21,16 @@ typedef struct { */ struct mpc_err_t { + char* filename; mpc_state_t state; int expected_num; char** expected; }; -static mpc_err_t* mpc_err_new(mpc_state_t s, char* expected) { +static mpc_err_t* mpc_err_new(char* filename, mpc_state_t s, char* expected) { mpc_err_t* x = malloc(sizeof(mpc_err_t)); + x->filename = malloc(strlen(filename) + 1); + strcpy(x->filename, filename); x->state = s; x->expected_num = 1; x->expected = malloc(sizeof(char*)); @@ -42,8 +45,9 @@ void mpc_err_delete(mpc_err_t* x) { for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } - + free(x->expected); + free(x->filename); free(x); } @@ -67,13 +71,26 @@ static void mpc_err_add_expected(mpc_err_t* x, char* expected) { } +static void mpc_err_clear_expected(mpc_err_t* x, char* expected) { + + int i; + for (i = 0; i < x->expected_num; i++) { + free(x->expected[i]); + } + x->expected_num = 1; + x->expected = realloc(x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = malloc(strlen(expected) + 1); + strcpy(x->expected[0], expected); + +} + void mpc_err_print(mpc_err_t* x) { mpc_err_print_to(x, stdout); } void mpc_err_print_to(mpc_err_t* x, FILE* f) { - fprintf(f, ":%i:%i: error: expected ", x->state.row, x->state.col); + fprintf(f, "%s:%i:%i: error: expected ", x->filename, x->state.row, x->state.col); if (x->expected_num == 0) { @@ -96,7 +113,17 @@ void mpc_err_print_to(mpc_err_t* x, FILE* f) { } - printf(" at '%c'\n", x->state.next); + printf(" at "); + if (x->state.next == '\a') { printf("bell"); } + else if (x->state.next == '\b') { printf("backspace"); } + else if (x->state.next == '\f') { printf("formfeed"); } + else if (x->state.next == '\r') { printf("carriage return"); } + else if (x->state.next == '\v') { printf("vertical tab"); } + else if (x->state.next == '\0') { printf("end of input"); } + else if (x->state.next == '\n') { printf("newline"); } + else if (x->state.next == '\t') { printf("tab"); } + else { printf("'%c'", x->state.next); } + printf("\n"); } @@ -144,26 +171,76 @@ static mpc_err_t* mpc_err_or(mpc_err_t** x, int n) { } static mpc_err_t* mpc_err_many1(mpc_err_t* x) { - /* TODO: Collapse expected list and add `one or more` prefix */ + + char* expect = malloc(strlen("one or more of ") + 1); + strcpy(expect, "one or more of "); + + int i; + for (i = 0; i < x->expected_num - 1; i++) { + expect = realloc(expect, strlen(expect) + strlen(x->expected[i]) + strlen(", ") + 1); + strcat(expect, x->expected[i]); + strcat(expect, ", "); + } + + expect = realloc(expect, strlen(expect) + strlen(x->expected[x->expected_num-1]) + 1); + strcat(expect, x->expected[x->expected_num-1]); + + mpc_err_clear_expected(x, expect); + free(expect); + return x; } static mpc_err_t* mpc_err_count(mpc_err_t* x, int n) { - /* TODO: Collapse expected list and add `N of` prefix */ + + int digits = n/10 + 1; + char* expect = malloc(digits + strlen(" of ") + 1); + sprintf(expect, "%i of ", n); + + int i; + for (i = 0; i < x->expected_num - 1; i++) { + expect = realloc(expect, strlen(expect) + strlen(x->expected[i]) + strlen(", ") + 1); + strcat(expect, x->expected[i]); + strcat(expect, ", "); + } + + expect = realloc(expect, strlen(expect) + strlen(x->expected[x->expected_num-1]) + 1); + strcat(expect, x->expected[x->expected_num-1]); + + mpc_err_clear_expected(x, expect); + free(expect); + return x; } +char* mpc_err_filename(mpc_err_t* x) { + return x->filename; +} + char** mpc_err_expected(mpc_err_t* x, int* num) { *num = x->expected_num; return x->expected; } +int mpc_err_line(mpc_err_t* x) { + return x->state.row; +} + +int mpc_err_column(mpc_err_t* x) { + return x->state.col; +} + +char mpc_err_unexpected(mpc_err_t* x) { + return x->state.next; +} + /* ** Input Type */ typedef struct { + char* filename; char* str; mpc_state_t state; @@ -172,12 +249,15 @@ typedef struct { } mpc_input_t; -static mpc_input_t* mpc_input_new(const char* str) { +static mpc_input_t* mpc_input_new(const char* filename, const char* str) { mpc_input_t* i = malloc(sizeof(mpc_input_t)); i->str = malloc(strlen(str) + 1); strcpy(i->str, str); + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->state.next = i->str[0]; i->state.last = '\0'; i->state.pos = 0; @@ -191,6 +271,7 @@ static mpc_input_t* mpc_input_new(const char* str) { } static void mpc_input_delete(mpc_input_t* i) { + free(i->filename); free(i->str); free(i->marks); free(i); @@ -340,38 +421,42 @@ enum { MPC_TYPE_PASS = 1, MPC_TYPE_FAIL = 2, MPC_TYPE_LIFT = 3, - MPC_TYPE_EXPECT = 4, + MPC_TYPE_LIFT_VAL = 4, + MPC_TYPE_EXPECT = 5, - MPC_TYPE_ANY = 5, - MPC_TYPE_SINGLE = 6, - MPC_TYPE_ONEOF = 7, - MPC_TYPE_NONEOF = 8, - MPC_TYPE_RANGE = 9, - MPC_TYPE_SATISFY = 10, - MPC_TYPE_STRING = 11, + MPC_TYPE_ANY = 6, + MPC_TYPE_SINGLE = 7, + MPC_TYPE_ONEOF = 8, + MPC_TYPE_NONEOF = 9, + MPC_TYPE_RANGE = 10, + MPC_TYPE_SATISFY = 11, + MPC_TYPE_STRING = 12, - MPC_TYPE_APPLY = 12, + MPC_TYPE_APPLY = 13, + MPC_TYPE_APPLY_TO = 14, + MPC_TYPE_NOT = 15, + MPC_TYPE_MAYBE = 16, + MPC_TYPE_MANY = 17, + MPC_TYPE_MANY1 = 18, + MPC_TYPE_COUNT = 19, - MPC_TYPE_MAYBE = 13, - MPC_TYPE_MANY = 14, - MPC_TYPE_MANY1 = 15, - MPC_TYPE_COUNT = 16, - - MPC_TYPE_EITHER = 17, - MPC_TYPE_ALSO = 18, - MPC_TYPE_OR = 19, - MPC_TYPE_AND = 20, + MPC_TYPE_ELSE = 20, + MPC_TYPE_ALSO = 21, + MPC_TYPE_OR = 22, + MPC_TYPE_AND = 23, }; -typedef struct { void* x; } mpc_pdata_lift_t; +typedef struct { mpc_lift_t lf; void* x; } mpc_pdata_lift_t; typedef struct { mpc_parser_t* x; char* m; } mpc_pdata_expect_t; typedef struct { char x; } mpc_pdata_single_t; typedef struct { char x; char y; } mpc_pdata_range_t; typedef struct { bool(*f)(char); } mpc_pdata_satisfy_t; typedef struct { char* x; } mpc_pdata_string_t; typedef struct { mpc_parser_t* x; mpc_apply_t f; } mpc_pdata_apply_t; -typedef struct { mpc_parser_t* x; mpc_fold_t f; int n; mpc_dtor_t dx; } mpc_pdata_repeat_t; -typedef struct { mpc_parser_t* x; mpc_parser_t* y; } mpc_pdata_either_t; +typedef struct { mpc_parser_t* x; mpc_apply_to_t f; void* d; } mpc_pdata_apply_to_t; +typedef struct { mpc_parser_t* x; mpc_dtor_t dx; mpc_lift_t lf; } mpc_pdata_not_t; +typedef struct { mpc_parser_t* x; mpc_fold_t f; int n; mpc_dtor_t dx; mpc_lift_t lf; } mpc_pdata_repeat_t; +typedef struct { mpc_parser_t* x; mpc_parser_t* y; } mpc_pdata_else_t; typedef struct { mpc_parser_t* x; mpc_parser_t* y; mpc_dtor_t dx; mpc_fold_t f; } mpc_pdata_also_t; typedef struct { int n; mpc_parser_t** xs; } mpc_pdata_or_t; typedef struct { int n; mpc_parser_t** xs; mpc_dtor_t* dxs; mpc_afold_t f; } mpc_pdata_and_t; @@ -384,8 +469,10 @@ typedef union { mpc_pdata_satisfy_t satisfy; mpc_pdata_string_t string; mpc_pdata_apply_t apply; + mpc_pdata_apply_to_t apply_to; + mpc_pdata_not_t not; mpc_pdata_repeat_t repeat; - mpc_pdata_either_t either; + mpc_pdata_else_t orelse; mpc_pdata_also_t also; mpc_pdata_and_t and; mpc_pdata_or_t or; @@ -411,19 +498,20 @@ struct mpc_parser_t { #define MPC_SUCCESS(x) r->output = x; return true; #define MPC_FAILURE(x) r->error = x; return false; -#define MPC_TRY(x, f) if (f) { MPC_SUCCESS(x) } else { MPC_FAILURE(mpc_err_new(i->state, "different character")); } +#define MPC_TRY(x, f) if (f) { MPC_SUCCESS(x) } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character")); } bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { memset(r, 0, sizeof(mpc_result_t)); - if (p->type == MPC_TYPE_UNDEFINED) { fprintf(stderr, "Error: Parser Undefined!\n"); abort(); } + if (p->type == MPC_TYPE_UNDEFINED) { fprintf(stderr, "\nError: Parser Undefined!\n"); abort(); } /* Trivial Parsers */ if (p->type == MPC_TYPE_PASS) { MPC_SUCCESS(NULL); } - if (p->type == MPC_TYPE_FAIL) { MPC_FAILURE(mpc_err_new(i->state, "different character")); } - if (p->type == MPC_TYPE_LIFT) { MPC_SUCCESS(p->data.lift.x); } + if (p->type == MPC_TYPE_FAIL) { MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character")); } + if (p->type == MPC_TYPE_LIFT) { MPC_SUCCESS(p->data.lift.lf()); } + if (p->type == MPC_TYPE_LIFT_VAL) { MPC_SUCCESS(p->data.lift.x); } /* Basic Parsers */ @@ -449,7 +537,7 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { MPC_SUCCESS(x.output); } else { mpc_err_delete(x.error); - MPC_FAILURE(mpc_err_new(i->state, p->data.expect.m)); + MPC_FAILURE(mpc_err_new(i->filename, i->state, p->data.expect.m)); } } @@ -461,16 +549,34 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { } } + if (p->type == MPC_TYPE_APPLY_TO) { + if (mpc_parse_input(i, p->data.apply_to.x, &x)) { + MPC_SUCCESS(p->data.apply_to.f(x.output, p->data.apply_to.d)); + } else { + MPC_FAILURE(x.error); + } + } + + if (p->type == MPC_TYPE_NOT) { + if (mpc_parse_input(i, p->data.not.x, &x)) { + p->data.not.dx(x.output); + MPC_FAILURE(mpc_err_new(i->filename, i->state, "different character")); + } else { + mpc_err_delete(x.error); + MPC_SUCCESS(p->data.not.lf()); + } + } + if (p->type == MPC_TYPE_MAYBE) { if (mpc_parse_input(i, p->data.repeat.x, &x)) { MPC_SUCCESS(x.output); } mpc_err_delete(x.error); - MPC_SUCCESS(NULL); + MPC_SUCCESS(p->data.repeat.lf()); } if (p->type == MPC_TYPE_MANY) { while (mpc_parse_input(i, p->data.repeat.x, &x)) { t = p->data.repeat.f(t, x.output); } mpc_err_delete(x.error); - MPC_SUCCESS(t); + MPC_SUCCESS(t ? t : p->data.repeat.lf()); } if (p->type == MPC_TYPE_MANY1) { @@ -497,7 +603,7 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { if (c == p->data.repeat.n) { mpc_input_unmark(i); - MPC_SUCCESS(t); + MPC_SUCCESS(t ? t : p->data.repeat.lf()); } else { p->data.repeat.dx(t); mpc_input_rewind(i); @@ -508,9 +614,9 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { /* Combinatory Parsers */ - if (p->type == MPC_TYPE_EITHER) { - if (mpc_parse_input(i, p->data.either.x, &x)) { MPC_SUCCESS(x.output); } - if (mpc_parse_input(i, p->data.either.y, &y)) { mpc_err_delete(x.error); MPC_SUCCESS(y.output); } + if (p->type == MPC_TYPE_ELSE) { + if (mpc_parse_input(i, p->data.orelse.x, &x)) { MPC_SUCCESS(x.output); } + if (mpc_parse_input(i, p->data.orelse.y, &y)) { mpc_err_delete(x.error); MPC_SUCCESS(y.output); } MPC_FAILURE(mpc_err_either(x.error, y.error)); } @@ -586,7 +692,7 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { } - fprintf(stderr, "Unknown Parser Type Id %i!\n", p->type); + fprintf(stderr, "\nError: Unknown Parser Type Id %i!\n", p->type); abort(); } @@ -595,14 +701,14 @@ bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { #undef MPC_FAILURE #undef MPC_TRY -bool mpc_parse(const char* s, mpc_parser_t* p, mpc_result_t* r) { - mpc_input_t* i = mpc_input_new(s); +bool mpc_parse(const char* filename, const char* s, mpc_parser_t* p, mpc_result_t* r) { + mpc_input_t* i = mpc_input_new(filename, s); bool x = mpc_parse_input(i, p, r); mpc_input_delete(i); return x; } -bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r) { +bool mpc_parse_file(const char* filename, FILE* f, mpc_parser_t* p, mpc_result_t* r) { fseek(f, 0, SEEK_END); int len = ftell(f); fseek(f, 0, SEEK_SET); @@ -610,7 +716,7 @@ bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r) { fread(buff, 1, len, f); buff[len] = '\0'; - bool x = mpc_parse(buff, p, r); + bool x = mpc_parse(filename, buff, p, r); free(buff); return x; @@ -618,7 +724,7 @@ bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r) { bool mpc_parse_filename(const char* filename, mpc_parser_t* p, mpc_result_t* r) { FILE* f = fopen(filename, "r"); - bool res = mpc_parse_file(f, p, r); + bool res = mpc_parse_file(filename, f, p, r); fclose(f); return res; } @@ -678,9 +784,9 @@ static void mpc_undefine_unretained(mpc_parser_t* p, bool force) { mpc_undefine_unretained(p->data.repeat.x, false); break; - case MPC_TYPE_EITHER: - mpc_undefine_unretained(p->data.either.x, false); - mpc_undefine_unretained(p->data.either.y, false); + case MPC_TYPE_ELSE: + mpc_undefine_unretained(p->data.orelse.x, false); + mpc_undefine_unretained(p->data.orelse.y, false); break; case MPC_TYPE_ALSO: @@ -708,7 +814,7 @@ void mpc_delete(mpc_parser_t* p) { if (p->retained) { if (p->type != MPC_TYPE_UNDEFINED) { - fprintf(stderr, "Error: Parser still Defined! Use `mpc_undefine` before delete!\n"); + fprintf(stderr, "\nError: Parser still Defined! Use `mpc_undefine` before delete!\n"); abort(); } else { free(p); @@ -761,13 +867,19 @@ mpc_parser_t* mpc_fail(void) { return p; } -mpc_parser_t* mpc_lift(mpc_val_t* x) { +mpc_parser_t* mpc_lift_val(mpc_val_t* x) { mpc_parser_t* p = mpc_undefined(); - p->type = MPC_TYPE_LIFT; + p->type = MPC_TYPE_LIFT_VAL; p->data.lift.x = x; return p; } +mpc_parser_t* mpc_lift(mpc_lift_t lf) { + mpc_parser_t* p = mpc_undefined(); + p->type = MPC_TYPE_LIFT; + p->data.lift.lf = lf; + return p; +} mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected) { mpc_parser_t* p = mpc_undefined(); @@ -879,8 +991,6 @@ mpc_parser_t* mpc_string(const char* s) { } -void mpc_dtor_null(mpc_val_t* x) { return; } - /* ** Core Parsers */ @@ -893,18 +1003,50 @@ mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f) { return p; } -mpc_parser_t* mpc_maybe(mpc_parser_t* a) { +mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x) { mpc_parser_t* p = mpc_undefined(); - p->type = MPC_TYPE_MAYBE; - p->data.repeat.x = a; + p->type = MPC_TYPE_APPLY_TO; + p->data.apply_to.x = a; + p->data.apply_to.f = f; + p->data.apply_to.d = x; return p; } +mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf) { + mpc_parser_t* p = mpc_undefined(); + p->type = MPC_TYPE_NOT; + p->data.not.x = a; + p->data.not.dx = da; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da) { + return mpc_not_else(a, da, mpcf_lift_null); +} + +mpc_parser_t* mpc_maybe_else(mpc_parser_t* a, mpc_lift_t lf) { + mpc_parser_t* p = mpc_undefined(); + p->type = MPC_TYPE_MAYBE; + p->data.repeat.x = a; + p->data.repeat.lf = lf; + return p; +} + +mpc_parser_t* mpc_maybe(mpc_parser_t* a) { + return mpc_maybe_else(a, mpcf_lift_null); +} + mpc_parser_t* mpc_many(mpc_parser_t* a, mpc_fold_t f) { + return mpc_many_else(a, f, mpcf_lift_null); +} + +mpc_parser_t* mpc_many_else(mpc_parser_t* a, mpc_fold_t f, mpc_lift_t lf) { mpc_parser_t* p = mpc_undefined(); p->type = MPC_TYPE_MANY; p->data.repeat.x = a; p->data.repeat.f = f; + p->data.repeat.lf = lf; return p; } @@ -916,21 +1058,26 @@ mpc_parser_t* mpc_many1(mpc_parser_t* a, mpc_fold_t f) { return p; } -mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n) { +mpc_parser_t* mpc_count_else(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n, mpc_lift_t lf) { mpc_parser_t* p = mpc_undefined(); p->type = MPC_TYPE_COUNT; p->data.repeat.x = a; p->data.repeat.dx = da; p->data.repeat.f = f; p->data.repeat.n = n; + p->data.repeat.lf = lf; return p; } -mpc_parser_t* mpc_either(mpc_parser_t* a, mpc_parser_t* b) { +mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n) { + return mpc_count_else(a, da, f, n, mpcf_lift_null); +} + +mpc_parser_t* mpc_else(mpc_parser_t* a, mpc_parser_t* b) { mpc_parser_t* p = mpc_undefined(); - p->type = MPC_TYPE_EITHER; - p->data.either.x = a; - p->data.either.y = b; + p->type = MPC_TYPE_ELSE; + p->data.orelse.x = a; + p->data.orelse.y = b; return p; } @@ -948,7 +1095,7 @@ mpc_parser_t* mpc_bind(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold return mpc_also(a, b, da, f); } -mpc_parser_t* mpc_or(int n, ...) { +mpc_parser_t* mpc_or_va(int n, va_list va) { mpc_parser_t* p = mpc_undefined(); @@ -956,19 +1103,16 @@ mpc_parser_t* mpc_or(int n, ...) { p->data.or.n = n; p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - va_list va; - va_start(va, n); int i; for (i = 0; i < n; i++) { p->data.or.xs[i] = va_arg(va, mpc_parser_t*); } - va_end(va); return p; + } -mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { - +mpc_parser_t* mpc_and_va(int n, mpc_afold_t f, va_list va) { mpc_parser_t* p = mpc_undefined(); p->type = MPC_TYPE_AND; @@ -977,8 +1121,6 @@ mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - va_list va; - va_start(va, f); int i; for (i = 0; i < n; i++) { p->data.and.xs[i] = va_arg(va, mpc_parser_t*); @@ -986,10 +1128,24 @@ mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { for (i = 0; i < (n-1); i++) { p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); } - va_end(va); return p; +} +mpc_parser_t* mpc_or(int n, ...) { + va_list va; + va_start(va, n); + mpc_parser_t* p = mpc_or_va(n, va); + va_end(va); + return p; +} + +mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { + va_list va; + va_start(va, f); + mpc_parser_t* p = mpc_and_va(n, f, va); + va_end(va); + return p; } /* @@ -1014,25 +1170,61 @@ mpc_parser_t* mpc_octdigits(void) { return mpc_expect(mpc_many1(mpc_octdigit(), mpc_parser_t* mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } mpc_parser_t* mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } -mpc_parser_t* mpc_alpha(void) { return mpc_expect(mpc_either(mpc_lower(), mpc_upper()), "letter"); } +mpc_parser_t* mpc_alpha(void) { return mpc_expect(mpc_else(mpc_lower(), mpc_upper()), "letter"); } mpc_parser_t* mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } +mpc_parser_t* mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } mpc_parser_t* mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } mpc_parser_t* mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } mpc_parser_t* mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } mpc_parser_t* mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } -mpc_parser_t* mpc_float(void); +mpc_parser_t* mpc_real(void) { + + /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ + + mpc_parser_t* p0 = mpc_maybe_else(mpc_oneof("+-"), mpcf_lift_emptystr); + mpc_parser_t* p1 = mpc_digits(); + mpc_parser_t* p2 = mpc_maybe_else(mpc_also(mpc_char('.'), mpc_digits(), free, mpcf_strfold), mpcf_lift_emptystr); + mpc_parser_t* p30 = mpc_oneof("eE"); + mpc_parser_t* p31 = mpc_maybe_else(mpc_oneof("+-"), mpcf_lift_emptystr); + mpc_parser_t* p32 = mpc_digits(); + mpc_parser_t* p3 = mpc_maybe_else(mpc_and(3, mpcf_astrfold, p30, p31, p32, free, free), mpcf_lift_emptystr); + + return mpc_expect(mpc_and(4, mpcf_astrfold, p0, p1, p2, p3, free, free, free), "real"); + +} + +mpc_parser_t* mpc_float(void) { + return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); +} mpc_parser_t* mpc_semi(void) { return mpc_char(';'); } mpc_parser_t* mpc_comma(void) { return mpc_char(','); } mpc_parser_t* mpc_colon(void) { return mpc_char(':'); } mpc_parser_t* mpc_dot(void) { return mpc_char('.'); } -mpc_parser_t* mpc_char_lit(void); -mpc_parser_t* mpc_string_lit(void); +mpc_parser_t* mpc_char_lit(void) { + return mpc_expect(mpc_between(mpc_else(mpc_escape(), mpc_any()), free, "'", "'"), "char"); +} -mpc_parser_t* mpc_ident(void); +mpc_parser_t* mpc_string_lit(void) { + mpc_parser_t* strchar = mpc_else(mpc_escape(), mpc_noneof("\"")); + return mpc_expect(mpc_between(mpc_many_else(strchar, mpcf_strfold, mpcf_lift_emptystr), free, "\"", "\""), "string"); +} + +mpc_parser_t* mpc_regex_lit(void) { + mpc_parser_t* regexchar = mpc_else(mpc_escape(), mpc_noneof("/")); + return mpc_expect(mpc_between(mpc_many_else(regexchar, mpcf_strfold, mpcf_lift_emptystr), free, "/", "/"), "regex"); +} + +mpc_parser_t* mpc_ident(void) { + + mpc_parser_t* p0 = mpc_else(mpc_alpha(), mpc_underscore()); + mpc_parser_t* p1 = mpc_many_else(mpc_alphanum(), mpcf_strfold, mpcf_lift_emptystr); + + return mpc_also(p0, p1, free, mpcf_strfold); +} /* ** Useful Parsers @@ -1041,7 +1233,7 @@ mpc_parser_t* mpc_ident(void); mpc_parser_t* mpc_ends(mpc_parser_t* a, mpc_dtor_t da) { return mpc_also(a, mpc_eoi(), da, mpcf_fst_free); } mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f) { return mpc_many(a, f); } mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f) { return mpc_many1(a, f); } -mpc_parser_t* mpc_tok(mpc_parser_t* a) { return mpc_also(a, mpc_whitespace(), mpc_dtor_null, mpcf_fst); } +mpc_parser_t* mpc_tok(mpc_parser_t* a) { return mpc_also(a, mpc_whitespace(), mpcf_dtor_null, mpcf_fst); } mpc_parser_t* mpc_sym(const char* s) { return mpc_tok(mpc_string(s)); } mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c) { @@ -1071,18 +1263,18 @@ mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between( ** mpc functions look a lot like `fold` ** functions and so can be used indirectly ** by many of the parsing functions to build -** a parser directly - as we parse. +** a parser directly - as we are parsing. ** ** This is certainly something that ** would be less elegant/interesting ** in a two-phase parser which first -** built an AST and then traversed it +** builds an AST and then traverses it ** to generate the object. ** ** This whole thing acts as a great ** case study for how trivial it can be ** to write a great parser in a few -** lines of code using this library. +** lines of code using mpc. */ /* @@ -1106,22 +1298,23 @@ mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between( ** | "[" "]" */ -static mpc_val_t* mpc_re_fold_or(int n, mpc_val_t** xs) { +static mpc_val_t* mpc_re_afold_or(int n, mpc_val_t** xs) { free(xs[1]); - return mpc_either(xs[0], xs[2]); + return mpc_else(xs[0], xs[2]); } static mpc_val_t* mpc_re_fold_repeat(mpc_val_t* x, mpc_val_t* y) { - if (strcmp(y, "*") == 0) { free(y); return mpc_many(x, mpcf_strfold); } + if (strcmp(y, "*") == 0) { free(y); return mpc_many_else(x, mpcf_strfold, mpcf_lift_emptystr); } if (strcmp(y, "+") == 0) { free(y); return mpc_many1(x, mpcf_strfold); } - if (strcmp(y, "?") == 0) { free(y); return mpc_maybe(x); } - int n = strtol(y, NULL, 10); + if (strcmp(y, "?") == 0) { free(y); return mpc_maybe_else(x, mpcf_lift_emptystr); } + int n = *(int*)y; free(y); - return mpc_count(x, free, mpcf_strfold, n); + return mpc_count_else(x, free, mpcf_strfold, n, mpcf_lift_emptystr); } static mpc_val_t* mpc_re_fold_many(mpc_val_t* t, mpc_val_t* x) { if (t == NULL) { return x; } + if (x == NULL) { return t; } return mpc_also(t, x, free, mpcf_strfold); } @@ -1133,20 +1326,99 @@ static mpc_val_t* mpc_re_escape(mpc_val_t* x) { if (s[0] == '$') { free(x); return mpc_eoi(); } if (s[0] == '\\') { + + if (s[1] == 'd') { free(x); return mpc_digit(); } + if (s[1] == 'D') { free(x); return mpc_not_else(mpc_digit(), free, mpcf_lift_emptystr); } + if (s[1] == 's') { free(x); return mpc_space(); } + if (s[1] == 'S') { free(x); return mpc_not_else(mpc_space(), free, mpcf_lift_emptystr); } + if (s[1] == 'w') { free(x); return mpc_alphanum(); } + if (s[1] == 'W') { free(x); return mpc_not_else(mpc_alphanum(), free, mpcf_lift_emptystr); } + if (s[1] == 'Z') { free(x); return mpc_eoi(); } + mpc_parser_t* p = mpc_char(s[1]); - free(s); - return p; + free(x); return p; } else { mpc_parser_t* p = mpc_char(s[0]); - free(s); - return p; + free(x); return p; } } +static char* mpc_re_unescape(char c) { + + if (c == 'a') { return "\a"; } + else if (c == 'b') { return "\b"; } + else if (c == 'f') { return "\f"; } + else if (c == 'n') { return "\n"; } + else if (c == 'r') { return "\r"; } + else if (c == 't') { return "\t"; } + else if (c == 'v') { return "\v"; } + else if (c == '0') { return "\0"; } + else { return (char[]){ c, '\0' }; } + +} + static mpc_val_t* mpc_re_range(mpc_val_t* x) { - /* TODO: Implement proper range scanning */ - return mpc_oneof(x); + + char* s = x; + bool comp = false; + + if (*s == '\0') { free(x); return mpc_fail(); } + + if (*s == '^') { + comp = true; + s++; + } + + if (*s == '\0') { free(x); return mpc_fail(); } + + char* range = calloc(1, 1); + + while (*s) { + + /* TODO: Deal Properly with Escape characters */ + if (*s == '\\') { + if (*(s+1) == '\0') { break; } + range = realloc(range, strlen(range) + 2); + strcat(range, (char[]){ *(s+1), '\0' }); + s++; + } + + else if (*s == '-') { + + char start = *(s-1); + char end = *(s+1); + + if (end == '\0') { break; } + if (end < start) { s++; continue; } + + range = realloc(range, strlen(range) + 1 + (end-start)); + + int i; + for (i = 0; i < (end-start); i++) { + strcat(range, (char[]){start+i+1, '\0'}); + } + + s++; + } + + else { + range = realloc(range, strlen(range) + 2); + strcat(range, (char[]){*s, '\0'}); + } + + s++; + } + + mpc_parser_t* p = (comp ? mpc_noneof(range) : mpc_oneof(range)); + + free(range); + free(x); + return p; +} + +static mpc_val_t* mpc_re_lift(void) { + return mpc_pass(); } mpc_parser_t* mpc_re(const char* re) { @@ -1157,18 +1429,18 @@ mpc_parser_t* mpc_re(const char* re) { mpc_parser_t* Base = mpc_new(); mpc_parser_t* Range = mpc_new(); - mpc_define(Regex, mpc_either( - mpc_and(3, mpc_re_fold_or, Term, mpc_char('|'), Regex, mpc_delete, free), + mpc_define(Regex, mpc_else( + mpc_and(3, mpc_re_afold_or, Term, mpc_char('|'), Regex, mpc_delete, free), Term )); - mpc_define(Term, mpc_many(Factor, mpc_re_fold_many)); + mpc_define(Term, mpc_many_else(Factor, mpc_re_fold_many, mpc_re_lift)); mpc_define(Factor, mpc_or(5, mpc_also(Base, mpc_char('*'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), mpc_also(Base, mpc_char('+'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), mpc_also(Base, mpc_char('?'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), - mpc_also(Base, mpc_braces(mpc_digits(), free), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), + mpc_also(Base, mpc_brackets(mpc_int(), free), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), Base )); @@ -1181,12 +1453,12 @@ mpc_parser_t* mpc_re(const char* re) { )); mpc_define(Range, mpc_apply( - mpc_many(mpc_either(mpc_escape(), mpc_noneof("]")), mpcf_strfold), + mpc_many_else(mpc_else(mpc_escape(), mpc_noneof("]")), mpcf_strfold, mpcf_lift_emptystr), mpc_re_range )); mpc_result_t r; - bool res = mpc_parse(re, Regex, &r); + bool res = mpc_parse("", re, mpc_ends(Regex, (mpc_dtor_t)mpc_delete), &r); mpc_undefine(Regex); mpc_undefine(Term); @@ -1203,8 +1475,9 @@ mpc_parser_t* mpc_re(const char* re) { if (res) { return r.output; } else { - mpc_err_delete(r.error); - return NULL; + fprintf(stderr, "\nError Compiling Regex: '%s' ", re); + mpc_err_print(r.error); + abort(); } } @@ -1212,6 +1485,17 @@ mpc_parser_t* mpc_re(const char* re) { /* ** Common Fold Functions */ +void mpcf_dtor_null(mpc_val_t* x) { + return; +} + +mpc_val_t* mpcf_lift_null(void) { + return NULL; +} + +mpc_val_t* mpcf_lift_emptystr(void) { + return calloc(1, 1); +} mpc_val_t* mpcf_free(mpc_val_t* x) { free(x); @@ -1239,15 +1523,22 @@ mpc_val_t* mpcf_oct(mpc_val_t* x) { return y; } -static mpc_val_t* mpcf_escape_new(mpc_val_t* x) { - - char* input = (char[]){ +mpc_val_t* mpcf_float(mpc_val_t* x) { + float* y = malloc(sizeof(float)); + *y = strtod(x, NULL); + free(x); + return y; +} + +static char* mpc_escape_input = (char[]){ '\a', '\b', '\f', '\n', '\r', '\t', '\v', '\\', '\'', '\"', '\0'}; - char** output = (char*[]){ +static char** mpc_escape_output = (char*[]){ "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v", "\\\\", "\\'", "\\\"", "\\0"}; + +static mpc_val_t* mpcf_escape_new(mpc_val_t* x) { int i; char* s = x; @@ -1257,9 +1548,9 @@ static mpc_val_t* mpcf_escape_new(mpc_val_t* x) { bool found = false; for (i = 0; i < 11; i++) { - if (*s == input[i]) { - y = realloc(y, strlen(y) + strlen(output[i]) + 1); - strcat(y, output[i]); + if (*s == mpc_escape_input[i]) { + y = realloc(y, strlen(y) + strlen(mpc_escape_output[i]) + 1); + strcat(y, mpc_escape_output[i]); found = true; break; } @@ -1277,12 +1568,51 @@ static mpc_val_t* mpcf_escape_new(mpc_val_t* x) { return y; } +static mpc_val_t* mpcf_unescape_new(mpc_val_t* x) { + + int i; + char* s = x; + char* y = calloc(1, 1); + + while (*s) { + + if (*s == '\\') { + + s++; + if (*s == '\0') { break; } + + for (i = 0; i < 11; i++) { + if (*s == mpc_escape_output[i][1]) { + y = realloc(y, strlen(y) + 2); + strcat(y, (char[]){ mpc_escape_input[i], '\0' }); + break; + } + } + + } else { + y = realloc(y, strlen(y) + 2); + strcat(y, (char[]){ *s, '\0' }); + } + + s++; + } + + return y; + +} + mpc_val_t* mpcf_escape(mpc_val_t* x) { mpc_val_t* y = mpcf_escape_new(x); free(x); return y; } +mpc_val_t* mpcf_unescape(mpc_val_t* x) { + mpc_val_t* y = mpcf_unescape_new(x); + free(x); + return y; +} + mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y) { return x; } @@ -1309,6 +1639,7 @@ mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x) { mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x) { if (t == NULL) { return x; } + if (x == NULL) { return t; } t = realloc(t, strlen(t) + strlen(x) + 1); strcat(t, x); @@ -1317,6 +1648,15 @@ mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x) { return t; } +mpc_val_t* mpcf_astrfold(int n, mpc_val_t** xs) { + mpc_val_t* t = NULL; + int i; + for (i = 0; i < n; i++) { + t = mpcf_strfold(t, xs[i]); + } + return t; +} + mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs) { free(xs[0]); free(xs[2]); @@ -1344,13 +1684,16 @@ mpc_val_t* mpcf_maths(int n, mpc_val_t** xs) { static void mpc_print_unretained(mpc_parser_t* p, bool force) { - if (p->retained && !force) { printf(""); return; } + if (p->retained && !force) { printf("

"); return; } if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } if (p->type == MPC_TYPE_PASS) { printf(""); } if (p->type == MPC_TYPE_FAIL) { printf(""); } if (p->type == MPC_TYPE_LIFT) { printf(""); } - if (p->type == MPC_TYPE_EXPECT) { mpc_print_unretained(p->data.expect.x, false); } + if (p->type == MPC_TYPE_EXPECT) { + printf(p->data.expect.m); + /*mpc_print_unretained(p->data.expect.x, false);*/ + } if (p->type == MPC_TYPE_ANY) { printf(""); } if (p->type == MPC_TYPE_SATISFY) { printf("", p->data.satisfy.f); } @@ -1388,16 +1731,17 @@ static void mpc_print_unretained(mpc_parser_t* p, bool force) { } if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, false); } + if (p->type == MPC_TYPE_NOT) { printf("!"); mpc_print_unretained(p->data.not.x, false); } if (p->type == MPC_TYPE_MAYBE) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf(")?"); } if (p->type == MPC_TYPE_MANY) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf(")*"); } if (p->type == MPC_TYPE_MANY1) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf(")+"); } if (p->type == MPC_TYPE_COUNT) { printf("("); mpc_print_unretained(p->data.repeat.x, false); printf("){%i}", p->data.repeat.n); } - if (p->type == MPC_TYPE_EITHER) { + if (p->type == MPC_TYPE_ELSE) { printf("("); - mpc_print_unretained(p->data.either.x, false); + mpc_print_unretained(p->data.orelse.x, false); printf(" | "); - mpc_print_unretained(p->data.either.y, false); + mpc_print_unretained(p->data.orelse.y, false); printf(")"); } @@ -1438,31 +1782,51 @@ void mpc_print(mpc_parser_t* p) { ** Testing */ -bool mpc_test(mpc_parser_t* p, const char* s, void* data, - bool(*tester)(void*, void*), - void(*destructor)(void*), + +bool mpc_unmatch(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, void(*printer)(void*)) { - - printf("== Test ==: '%s'\n", s); - + mpc_result_t r; - if (mpc_parse(s, p, &r)) { + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return false; + } else { + destructor(r.output); + return true; + } + + } else { + mpc_err_delete(r.error); + return true; + } + +} + +bool mpc_match(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, + void(*printer)(void*)) { + + mpc_result_t r; + if (mpc_parse("", s, p, &r)) { - if (tester(r.output, data)) { - printf("Passed with "); printer(r.output); printf("\n"); + if (tester(r.output, d)) { + /*printf("Passed with "); printer(r.output); printf("\n");*/ destructor(r.output); return true; } else { printf("Failed!\n"); printf("Got "); printer(r.output); printf("\n"); - printf("Expected "); printer(data); printf("\n"); + printf("Expected "); printer(d); printf("\n"); destructor(r.output); return false; } - } else { - - printf("Failed!\n"); + } else { mpc_err_print(r.error); mpc_err_delete(r.error); return false; @@ -1471,3 +1835,321 @@ bool mpc_test(mpc_parser_t* p, const char* s, void* data, } + +/* +** AST +*/ + +void mpc_ast_delete(mpc_ast_t* a) { + + int i; + for (i = 0; i < a->children_num; i++) { + mpc_ast_delete(a->children[i]); + } + + free(a->children); + free(a->contents); + free(a); + +} + +mpc_ast_t* mpc_ast_new(char* contents) { + + mpc_ast_t* a = malloc(sizeof(mpc_ast_t)); + a->tag = 0; + a->contents = calloc(1, 1); + a->children_num = 0; + a->children = NULL; + return a; + +} + +mpc_ast_t* mpc_ast_empty(void) { + return mpc_ast_new(""); +} + +void mpc_ast_add_child(mpc_ast_t* r, mpc_ast_t* a) { + + a->children_num++; + a->children = realloc(a->children, sizeof(mpc_ast_t*) * a->children_num); + a->children[a->children_num-1] = a; + +} + +mpc_ast_t* mpc_ast_tag(mpc_ast_t* a, int t) { + a->tag = t; + return a; +} + +static void mpc_ast_print_depth(mpc_ast_t* a, int d) { + + int i; + for (i = 0; i < d; i++) { printf("\t"); } + + printf("-> %s", a->contents); + + for (i = 0; i < a->children_num; i++) { + mpc_ast_print_depth(a->children[i], d+1); + } + +} + +void mpc_ast_print(mpc_ast_t* a) { + mpc_ast_print_depth(a, 0); +} + +mpc_val_t* mpcf_fold_ast(mpc_val_t* a, mpc_val_t* b) { + + if (a == NULL) { return b; } + if (b == NULL) { return a; } + + mpc_ast_t* r = mpc_ast_empty(); + mpc_ast_add_child(r, a); + mpc_ast_add_child(r, b); + return r; +} + +mpc_val_t* mpcf_afold_ast(int n, mpc_val_t** as) { + + mpc_val_t* t = NULL; + int i; + for (i = 0; i < n; i++) { + t = mpcf_fold_ast(t, as[i]); + } + + return t; +} + +mpc_val_t* mpcf_apply_str_ast(mpc_val_t* c) { + mpc_ast_t* a = mpc_ast_new(c); + free(c); + return a; +} + +mpc_val_t* mpcf_lift_ast(void) { + return mpc_ast_empty(); +} + +mpc_parser_t* mpc_ast(mpc_parser_t* a) { + return mpc_apply(a, mpcf_apply_str_ast); +} + +mpc_parser_t* mpca_not(mpc_parser_t* a) { return mpc_not_else(a, (mpc_dtor_t)mpc_ast_delete, mpcf_lift_ast); } +mpc_parser_t* mpca_maybe(mpc_parser_t* a) { return mpc_maybe_else(a, mpcf_lift_ast); } +mpc_parser_t* mpca_many(mpc_parser_t* a) { return mpc_many_else(a, mpcf_fold_ast, mpcf_lift_ast); } +mpc_parser_t* mpca_many1(mpc_parser_t* a) { return mpc_many1(a, mpcf_fold_ast); } +mpc_parser_t* mpca_count(mpc_parser_t* a, int n) { return mpc_count_else(a, (mpc_dtor_t)mpc_ast_delete, mpcf_fold_ast, n, mpcf_lift_ast); } +mpc_parser_t* mpca_else(mpc_parser_t* a, mpc_parser_t* b) { return mpc_else(a, b); } +mpc_parser_t* mpca_also(mpc_parser_t* a, mpc_parser_t* b) { return mpc_also(a, b, (mpc_dtor_t)mpc_ast_delete, mpcf_fold_ast); } +mpc_parser_t* mpca_bind(mpc_parser_t* a, mpc_parser_t* b) { return mpca_also(a, b); } + +mpc_parser_t* mpca_or(int n, ...) { + va_list va; + va_start(va, n); + mpc_parser_t* p = mpc_ast(mpc_or_va(n, va)); + va_end(va); + return p; +} + +mpc_parser_t* mpca_and(int n, ...) { + + va_list va; + va_start(va, n); + + mpc_parser_t* p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = mpcf_afold_ast; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + int i; + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; + } + + va_end(va); + + return mpc_ast(p); +} + +mpc_parser_t* mpca_ends(mpc_parser_t* a) { return mpc_ends(a, (mpc_dtor_t)mpc_ast_delete); } + +/* +** Grammar Parser +*/ + +/* +** This is another interesting bootstrapping. +** +** Having a general purpose AST type allows +** users to specify the grammar alone and +** let all fold rules be automatically taken +** care of by existing functions. +** +** You don't get to control the type spat +** out but this means you can make a nice +** parser to take in some grammar in nice +** syntax and spit out a parser that works. +** +** The grammar for this looks surprisingly +** like regex but the main difference is that +** it is now whitespace insensitive and the +** base type takes literals of some form. +*/ + +/* +** +** ### Grammar Grammar +** +** : ( "|" ) | +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : "<" ">" +** | +** | +** | +** | "(" ")" +*/ + +static mpc_val_t* mpca_grammar_afold_or(int n, mpc_val_t** xs) { + free(xs[1]); + return mpca_else(xs[0], xs[2]); +} + +static mpc_val_t* mpc_grammar_fold_many(mpc_val_t* x, mpc_val_t* y) { + if (x == NULL) { return y; } + if (y == NULL) { return x; } + return mpca_also(x, y); +} + +static mpc_val_t* mpca_grammar_lift(void) { + return mpc_lift(mpcf_lift_ast); +} + +static mpc_val_t* mpca_grammar_fold_repeat(mpc_val_t* x, mpc_val_t* y) { + + printf("Got Repeat '%s'\n", (char*)y); + + if (strcmp(y, "*") == 0) { free(y); return mpca_many(x); } + if (strcmp(y, "+") == 0) { free(y); return mpca_many1(x); } + if (strcmp(y, "?") == 0) { free(y); return mpca_maybe(x); } + int n = *((int*)y); + free(y); + return mpca_count(x, n); +} + +static mpc_val_t* mpc_grammar_apply_string(mpc_val_t* x) { + mpc_parser_t* p = mpc_ast(mpc_string(mpcf_unescape(x))); + free(x); + return p; +} + +static mpc_val_t* mpc_grammar_apply_char(mpc_val_t* x) { + mpc_parser_t* p = mpc_ast(mpc_char(*(char*)mpcf_unescape(x))); + free(x); + return p; +} + +static mpc_val_t* mpc_grammar_apply_regex(mpc_val_t* x) { + /* TODO: Unescape Regex */ + mpc_parser_t* p = mpc_ast(mpc_re(x)); + free(x); + return p; +} + +typedef struct { + va_list* va; + int parsers_num; + mpc_parser_t** parsers; +} mpc_grammar_st_t; + +static mpc_val_t* mpc_grammar_apply_id(mpc_val_t* x, void* y) { + int i = *((int*)x); + mpc_grammar_st_t* st = y; + + while (st->parsers_num <= i) { + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); + } + + return st->parsers[i]; +} + +static void mpc_soft_delete(mpc_val_t* x) { + mpc_undefine_unretained(x, false); +} + +mpc_parser_t* mpca_grammar(const char* grammar, ...) { + + mpc_parser_t* Grammar = mpc_new(); + mpc_parser_t* Term = mpc_new(); + mpc_parser_t* Factor = mpc_new(); + mpc_parser_t* Base = mpc_new(); + + mpc_define(Grammar, mpc_else( + mpc_and(3, mpca_grammar_afold_or, Term, mpc_sym("|"), Grammar, mpc_soft_delete, free), + Term + )); + + mpc_define(Term, mpc_many_else(Factor, mpc_grammar_fold_many, mpca_grammar_lift)); + + mpc_define(Factor, mpc_or(5, + mpc_also(Base, mpc_sym("*"), mpc_soft_delete, mpca_grammar_fold_repeat), + mpc_also(Base, mpc_sym("+"), mpc_soft_delete, mpca_grammar_fold_repeat), + mpc_also(Base, mpc_sym("?"), mpc_soft_delete, mpca_grammar_fold_repeat), + mpc_also(Base, mpc_tok(mpc_brackets(mpc_int(), free)), mpc_soft_delete, mpca_grammar_fold_repeat), + Base + )); + + va_list va; + va_start(va, grammar); + + mpc_grammar_st_t st = { &va, 0, NULL }; + + mpc_define(Base, mpc_or(5, + mpc_apply(mpc_tok(mpc_string_lit()), mpc_grammar_apply_string), + mpc_apply(mpc_tok(mpc_char_lit()), mpc_grammar_apply_char), + mpc_apply(mpc_tok(mpc_regex_lit()), mpc_grammar_apply_regex), + mpc_apply_to(mpc_tok(mpc_braces(mpc_int(), free)), mpc_grammar_apply_id, &st), + mpc_tok(mpc_parens(Grammar, mpc_soft_delete)) + )); + + mpc_result_t r; + bool res = mpc_parse("", grammar, mpc_ends(Grammar, mpc_soft_delete), &r); + + free(st.parsers); + va_end(va); + + mpc_undefine(Grammar); + mpc_undefine(Term); + mpc_undefine(Factor); + mpc_undefine(Base); + + mpc_delete(Grammar); + mpc_delete(Term); + mpc_delete(Factor); + mpc_delete(Base); + + if (res) { + return r.output; + } else { + fprintf(stderr, "\nError Compiling Grammar: '%s' ", grammar); + mpc_err_print(r.error); + abort(); + } + +} + diff --git a/mpc.h b/mpc.h index a37b6a6..13d3bdc 100644 --- a/mpc.h +++ b/mpc.h @@ -22,6 +22,7 @@ int mpc_err_line(mpc_err_t* x); int mpc_err_column(mpc_err_t* x); char mpc_err_unexpected(mpc_err_t* x); char** mpc_err_expected(mpc_err_t* x, int* num); +char* mpc_err_filename(mpc_err_t* x); void mpc_err_delete(mpc_err_t* x); void mpc_err_print(mpc_err_t* x); @@ -42,10 +43,21 @@ typedef union { struct mpc_parser_t; typedef struct mpc_parser_t mpc_parser_t; -bool mpc_parse(const char* s, mpc_parser_t* p, mpc_result_t* r); -bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r); +bool mpc_parse(const char* filename, const char* s, mpc_parser_t* p, mpc_result_t* r); +bool mpc_parse_file(const char* filename, FILE* f, mpc_parser_t* p, mpc_result_t* r); bool mpc_parse_filename(const char* filename, mpc_parser_t* p, mpc_result_t* r); +/* +** Function Types +*/ + +typedef void(*mpc_dtor_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +typedef mpc_val_t*(*mpc_fold_t)(mpc_val_t*,mpc_val_t*); +typedef mpc_val_t*(*mpc_afold_t)(int,mpc_val_t**); +typedef mpc_val_t*(*mpc_lift_t)(void); + /* ** Building a Parser */ @@ -56,11 +68,11 @@ mpc_parser_t* mpc_new(void); mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a); mpc_parser_t* mpc_undefine(mpc_parser_t* p); -mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); - mpc_parser_t* mpc_pass(void); mpc_parser_t* mpc_fail(void); -mpc_parser_t* mpc_lift(mpc_val_t* x); +mpc_parser_t* mpc_lift(mpc_lift_t f); +mpc_parser_t* mpc_lift_val(mpc_val_t* x); +mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); /* ** Basic Parsers @@ -74,31 +86,28 @@ mpc_parser_t* mpc_noneof(const char* s); mpc_parser_t* mpc_satisfy(bool(*f)(char)); mpc_parser_t* mpc_string(const char* s); -/* -** Function Types -*/ - -typedef void (*mpc_dtor_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_fold_t)(mpc_val_t*,mpc_val_t*); -typedef mpc_val_t*(*mpc_afold_t)(int,mpc_val_t**); - -void mpc_dtor_null(mpc_val_t* x); - /* ** Core Parsers */ mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f); +mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x); +mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da); +mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf); mpc_parser_t* mpc_maybe(mpc_parser_t* a); +mpc_parser_t* mpc_maybe_else(mpc_parser_t* a, mpc_lift_t lf); mpc_parser_t* mpc_many(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_many_else(mpc_parser_t* a, mpc_fold_t f, mpc_lift_t lf); mpc_parser_t* mpc_many1(mpc_parser_t* a, mpc_fold_t f); mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n); -mpc_parser_t* mpc_either(mpc_parser_t* a, mpc_parser_t* b); +mpc_parser_t* mpc_count_else(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n, mpc_lift_t lf); +mpc_parser_t* mpc_else(mpc_parser_t* a, mpc_parser_t* b); mpc_parser_t* mpc_also(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); mpc_parser_t* mpc_bind(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); mpc_parser_t* mpc_or(int n, ...); mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...); +mpc_parser_t* mpc_or_va(int n, va_list va); +mpc_parser_t* mpc_and_va(int n, mpc_afold_t f, va_list va); /* ** Common Parsers @@ -124,12 +133,14 @@ mpc_parser_t* mpc_lower(void); mpc_parser_t* mpc_upper(void); mpc_parser_t* mpc_alpha(void); mpc_parser_t* mpc_underscore(void); +mpc_parser_t* mpc_alphanum(void); mpc_parser_t* mpc_int(void); mpc_parser_t* mpc_hex(void); mpc_parser_t* mpc_oct(void); mpc_parser_t* mpc_number(void); +mpc_parser_t* mpc_real(void); mpc_parser_t* mpc_float(void); mpc_parser_t* mpc_semi(void); @@ -168,11 +179,17 @@ mpc_parser_t* mpc_re(const char* re); ** Common Fold Functions */ +void mpcf_dtor_null(mpc_val_t* x); +mpc_val_t* mpcf_lift_null(void); +mpc_val_t* mpcf_lift_emptystr(void); + mpc_val_t* mpcf_free(mpc_val_t* x); mpc_val_t* mpcf_int(mpc_val_t* x); mpc_val_t* mpcf_hex(mpc_val_t* x); mpc_val_t* mpcf_oct(mpc_val_t* x); +mpc_val_t* mpcf_float(mpc_val_t* x); mpc_val_t* mpcf_escape(mpc_val_t* x); +mpc_val_t* mpcf_unescape(mpc_val_t* x); mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y); mpc_val_t* mpcf_snd(mpc_val_t* x, mpc_val_t* y); @@ -183,6 +200,7 @@ mpc_val_t* mpcf_snd_free(mpc_val_t* x, mpc_val_t* y); mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x); mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x); +mpc_val_t* mpcf_astrfold(int n, mpc_val_t** xs); mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs); mpc_val_t* mpcf_maths(int n, mpc_val_t** xs); @@ -193,13 +211,59 @@ mpc_val_t* mpcf_maths(int n, mpc_val_t** xs); void mpc_print(mpc_parser_t* p); + /* ** Testing */ -bool mpc_test(mpc_parser_t* p, const char* input, void* data, - bool(*tester)(void*, void*), - void(*destructor)(void*), +bool mpc_unmatch(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, void(*printer)(void*)); +bool mpc_match(mpc_parser_t* p, const char* s, void* d, + bool(*tester)(void*, void*), + mpc_dtor_t destructor, + void(*printer)(void*)); + + +/* +** AST +*/ + +typedef struct mpc_ast_t { + int tag; + char* contents; + int children_num; + struct mpc_ast_t** children; +} mpc_ast_t; + +void mpc_ast_delete(mpc_ast_t* a); +mpc_ast_t* mpc_ast_empty(void); +mpc_ast_t* mpc_ast_new(char* contents); + +void mpc_ast_add_child(mpc_ast_t* r, mpc_ast_t* a); +mpc_ast_t* mpc_ast_tag(mpc_ast_t* a, int t); +void mpc_ast_print(mpc_ast_t* a); + +mpc_val_t* mpcf_fold_ast(mpc_val_t* a, mpc_val_t* b); +mpc_val_t* mpcf_afold_ast(int n, mpc_val_t** as); +mpc_val_t* mpcf_apply_str_ast(mpc_val_t* c); +mpc_val_t* mpcf_lift_ast(void); + +mpc_parser_t* mpc_ast(mpc_parser_t* a); + +mpc_parser_t* mpca_not(mpc_parser_t* a); +mpc_parser_t* mpca_maybe(mpc_parser_t* a); +mpc_parser_t* mpca_many(mpc_parser_t* a); +mpc_parser_t* mpca_many1(mpc_parser_t* a); +mpc_parser_t* mpca_count(mpc_parser_t* a, int n); +mpc_parser_t* mpca_else(mpc_parser_t* a, mpc_parser_t* b); +mpc_parser_t* mpca_also(mpc_parser_t* a, mpc_parser_t* b); +mpc_parser_t* mpca_bind(mpc_parser_t* a, mpc_parser_t* b); +mpc_parser_t* mpca_or(int n, ...); +mpc_parser_t* mpca_and(int n, ...); +mpc_parser_t* mpca_ends(mpc_parser_t* a); +mpc_parser_t* mpca_grammar(const char* grammar, ...); + #endif \ No newline at end of file diff --git a/tests/core.c b/tests/core.c new file mode 100644 index 0000000..9fdf062 --- /dev/null +++ b/tests/core.c @@ -0,0 +1,81 @@ +#include "ptest.h" +#include "../mpc.h" + +#include +#include + +static bool int_eq(void* x, void* y) { return (*(int*)x == *(int*)y); } +static void int_print(void* x) { printf("'%i'", *((int*)x)); } +static bool string_eq(void* x, void* y) { return (strcmp(x, y) == 0); } +static void string_print(void* x) { printf("'%s'", (char*)x); } + +void test_ident(void) { + + /* ^[a-zA-Z_][a-zA-Z0-9_]*$ */ + + mpc_parser_t* Ident = mpc_ends( + mpc_also( + mpc_else(mpc_alpha(), mpc_underscore()), + mpc_many1(mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit()), mpcf_strfold), + free, mpcf_strfold), + free + ); + + PT_ASSERT(mpc_match(Ident, "test", "test", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(Ident, " blah", "", string_eq, free, string_print)); + PT_ASSERT(mpc_match(Ident, "anoth21er", "anoth21er", string_eq, free, string_print)); + PT_ASSERT(mpc_match(Ident, "du__de", "du__de", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(Ident, "some spaces", "", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(Ident, "", "", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(Ident, "18nums", "", string_eq, free, string_print)); + + mpc_delete(Ident); + +} + +void test_maths(void) { + + mpc_parser_t* Expr = mpc_new(); + mpc_parser_t* Factor = mpc_new(); + mpc_parser_t* Term = mpc_new(); + mpc_parser_t* Maths = mpc_new(); + + mpc_define(Expr, mpc_else( + mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), + Factor + )); + + mpc_define(Factor, mpc_else( + mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free), + Term + )); + + mpc_define(Term, mpc_else( + mpc_int(), + mpc_parens(Expr, free) + )); + + mpc_define(Maths, mpc_ends(Expr, free)); + + PT_ASSERT(mpc_match(Maths, "1", (int[]){ 1 }, int_eq, free, int_print)); + PT_ASSERT(mpc_match(Maths, "(5)", (int[]){ 5 }, int_eq, free, int_print)); + PT_ASSERT(mpc_match(Maths, "(4*2)+5", (int[]){ 13 }, int_eq, free, int_print)); + PT_ASSERT(mpc_unmatch(Maths, "a", (int[]){ 0 }, int_eq, free, int_print)); + PT_ASSERT(mpc_unmatch(Maths, "2b+4", (int[]){ 2 }, int_eq, free, int_print)); + + mpc_undefine(Expr); + mpc_undefine(Factor); + mpc_undefine(Term); + mpc_undefine(Maths); + + mpc_delete(Expr); + mpc_delete(Factor); + mpc_delete(Term); + mpc_delete(Maths); + +} + +void suite_core(void) { + pt_add_test(test_ident, "Test Ident", "Suite Core"); + pt_add_test(test_maths, "Test Maths", "Suite Core"); +} diff --git a/tests/grammar.c b/tests/grammar.c new file mode 100644 index 0000000..d49d733 --- /dev/null +++ b/tests/grammar.c @@ -0,0 +1,56 @@ +#include "ptest.h" +#include "../mpc.h" + +bool ast_eq(void* x, void* y) { + return false; +} + +void test_grammar(void) { + + mpc_parser_t* Test = mpc_new(); + + mpc_define(Test, mpca_grammar("'c'*")); + + mpc_print(Test); + + mpc_undefine(Test); + + mpc_delete(Test); + + mpc_parser_t* Expression = mpc_new(); + mpc_parser_t* Product = mpc_new(); + mpc_parser_t* Value = mpc_new(); + + mpc_define(Expression, mpca_grammar("<0> (('+' | '-') <0>)*", Product)); + mpc_define(Product, mpca_grammar("<0> (('*' | '/') <0>)*", Value)); + mpc_define(Value, mpca_grammar("/[0-9]/ | '(' <0> ')'", Expression)); + + mpc_print(Expression); + mpc_print(Product); + mpc_print(Value); + + mpc_ast_t* empty = mpc_ast_empty(); + + /* + PT_ASSERT(mpc_match(Expression, "1", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Expression, "(5)", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Expression, "(4*2)+5", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Expression, "a", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + PT_ASSERT(mpc_match(Expression, "2b+4", empty, ast_eq, (mpc_dtor_t)mpc_ast_delete, (void(*)(void*))mpc_ast_print)); + */ + + mpc_ast_delete(empty); + + mpc_undefine(Expression); + mpc_undefine(Product); + mpc_undefine(Value); + + mpc_delete(Expression); + mpc_delete(Product); + mpc_delete(Value); + +} + +void suite_grammar(void) { + pt_add_test(test_grammar, "Test Grammar", "Suite Grammar"); +} \ No newline at end of file diff --git a/tests/ptest.c b/tests/ptest.c new file mode 100644 index 0000000..61d2d8a --- /dev/null +++ b/tests/ptest.c @@ -0,0 +1,301 @@ +#include "ptest.h" + +#include +#include +#include +#include +#include + +/* Globals */ + +#define MAX_NAME 512 +#define MAX_ERROR 2048 +#define MAX_TESTS 2048 + +static bool test_passing = false; +static bool suite_passing = false; + +/* Colors */ + +enum { + BLACK = 0x0, + BLUE = 0x1, + GREEN = 0x2, + AQUA = 0x3, + RED = 0x4, + PURPLE = 0x5, + YELLOW = 0x6, + WHITE = 0x7, + GRAY = 0x8, + LIGHT_BLUE = 0x9, + LIGHT_GREEN = 0xA, + LIGHT_AQUA = 0xB, + LIGHT_RED = 0xC, + LIGHT_PURPLE = 0xD, + LIGHT_YELLOW = 0xE, + LIGHT_WHITE = 0xF, +}; + +#ifdef _WIN32 + + #include + + static void pt_color(int color) { + HANDLE hCon = GetStdHandle(STD_OUTPUT_HANDLE); + SetConsoleTextAttribute(hCon, color); + } + +#else + +static const char* colors[] = { + "\x1B[0m", + "\x1B[34m", + "\x1B[32m", + "\x1B[36m", + "\x1B[31m", + "\x1B[35m", + "\x1B[33m", + "\x1B[37m", + "", + "\x1B[34m", + "\x1B[32m", + "\x1B[36m", + "\x1B[31m", + "\x1B[35m", + "\x1B[33m", + "\x1B[37m" +}; + + static void pt_color(int color) { + + printf("%s", colors[color]); + + } + +#endif + +/* Asserts */ + +static int num_asserts = 0; +static int num_assert_passes = 0; +static int num_assert_fails = 0; + +static char assert_err[MAX_ERROR]; +static char assert_err_buff[MAX_ERROR]; +static int assert_err_num = 0; + +void pt_assert_run(bool result, const char* expr, const char* func, const char* file, int line) { + + num_asserts++; + test_passing = test_passing && result; + + if (result) { + num_assert_passes++; + } else { + sprintf(assert_err_buff, " %i. Assert [ %s ] (%s:%i)\n", assert_err_num+1, expr, file, line ); + strcat(assert_err, assert_err_buff); + assert_err_num++; + num_assert_fails++; + } + +} + +static void ptest_signal(int sig) { + + test_passing = false; + + switch( sig ) { + case SIGFPE: sprintf(assert_err_buff, " %i. Division by Zero\n", assert_err_num+1); break; + case SIGILL: sprintf(assert_err_buff, " %i. Illegal Instruction\n", assert_err_num+1); break; + case SIGSEGV: sprintf(assert_err_buff, " %i. Segmentation Fault\n", assert_err_num+1); break; + } + + assert_err_num++; + strcat(assert_err, assert_err_buff); + + pt_color(RED); printf("Failed! \n\n%s\n", assert_err); pt_color(WHITE); + + printf(" | Stopping Execution.\n"); + fflush(stdout); + exit(0); + +} + +/* Tests */ + +static void pt_title_case(char* output, const char* input) { + + bool space = true; + + strcpy(output, input); + + unsigned int i; + for(i = 0; i < strlen(output); i++) { + + if (output[i] == '_' || output[i] == ' ') { + space = true; + output[i] = ' '; + continue; + } + + if (space && output[i] >= 'a' && output[i] <= 'z') { + output[i] = output[i] - 32; + continue; + } + + space = false; + } + +} + +typedef struct { + void (*func)(void); + char name[MAX_NAME]; + char suite[MAX_NAME]; +} test_t; + +static test_t tests[MAX_TESTS]; + +static int num_tests = 0; +static int num_tests_passes = 0; +static int num_tests_fails = 0; + +void pt_add_test(void (*func)(void), const char* name, const char* suite) { + + if (num_tests == MAX_TESTS) { + printf("ERROR: Exceeded maximum test count of %i!\n", MAX_TESTS); abort(); + } + + if (strlen(name) >= MAX_NAME) { + printf("ERROR: Test name '%s' too long (Maximum is %i characters)\n", name, MAX_NAME); abort(); + } + + if (strlen(suite) >= MAX_NAME) { + printf("ERROR: Test suite '%s' too long (Maximum is %i characters)\n", suite, MAX_NAME); abort(); + } + + test_t test; + test.func = func; + pt_title_case(test.name, name); + pt_title_case(test.suite, suite); + + tests[num_tests] = test; + num_tests++; + +} + +/* Suites */ + +static int num_suites = 0; +static int num_suites_passes = 0; +static int num_suites_fails = 0; + +void pt_add_suite(void (*func)(void)) { + num_suites++; + func(); +} + +/* Running */ + +static clock_t start, end; +static char current_suite[MAX_NAME]; + +int pt_run(void) { + + printf(" \n"); + printf(" +-------------------------------------------+\n"); + printf(" | ptest MicroTesting Magic for C |\n"); + printf(" | |\n"); + printf(" | http://github.com/orangeduck/ptest |\n"); + printf(" | |\n"); + printf(" | Daniel Holden (contact@theorangeduck.com) |\n"); + printf(" +-------------------------------------------+\n"); + + signal(SIGFPE, ptest_signal); + signal(SIGILL, ptest_signal); + signal(SIGSEGV, ptest_signal); + + start = clock(); + strcpy(current_suite, ""); + + unsigned int i; + for(i = 0; i < num_tests; i++) { + + test_t test = tests[i]; + + /* Check for transition to a new suite */ + if (strcmp(test.suite, current_suite)) { + + /* Don't increment any counter for first entrance */ + if (strcmp(current_suite, "")) { + if (suite_passing) { + num_suites_passes++; + } else { + num_suites_fails++; + } + } + + suite_passing = true; + strcpy(current_suite, test.suite); + printf("\n\n ===== %s =====\n\n", current_suite); + } + + /* Run Test */ + + test_passing = true; + strcpy(assert_err, ""); + strcpy(assert_err_buff, ""); + assert_err_num = 0; + printf(" | %s ... ", test.name); + + test.func(); + + suite_passing = suite_passing && test_passing; + + if (test_passing) { + num_tests_passes++; + pt_color(GREEN); printf("Passed! \n"); pt_color(WHITE); + } else { + num_tests_fails++; + pt_color(RED); printf("Failed! \n\n%s\n", assert_err); pt_color(WHITE); + } + + } + + if (suite_passing) { + num_suites_passes++; + } else { + num_suites_fails++; + } + + end = clock(); + + printf(" \n"); + printf(" +---------------------------------------------------+\n"); + printf(" | Summary |\n"); + printf(" +---------++------------+-------------+-------------+\n"); + + printf(" | Suites ||"); + pt_color(YELLOW); printf(" Total %4d ", num_suites); pt_color(WHITE); printf("|"); + pt_color(GREEN); printf(" Passed %4d ", num_suites_passes); pt_color(WHITE); printf("|"); + pt_color(RED); printf(" Failed %4d ", num_suites_fails); pt_color(WHITE); printf("|\n"); + + printf(" | Tests ||"); + pt_color(YELLOW); printf(" Total %4d ", num_tests); pt_color(WHITE); printf("|"); + pt_color(GREEN); printf(" Passed %4d ", num_tests_passes); pt_color(WHITE); printf("|"); + pt_color(RED); printf(" Failed %4d ", num_tests_fails); pt_color(WHITE); printf("|\n"); + + printf(" | Asserts ||"); + pt_color(YELLOW); printf(" Total %4d ", num_asserts); pt_color(WHITE); printf("|"); + pt_color(GREEN); printf(" Passed %4d ", num_assert_passes); pt_color(WHITE); printf("|"); + pt_color(RED); printf(" Failed %4d ", num_assert_fails); pt_color(WHITE); printf("|\n"); + + printf(" +---------++------------+-------------+-------------+\n"); + printf(" \n"); + + double total = (double)(end - start) / CLOCKS_PER_SEC; + + printf(" Total Running Time: %0.3fs\n\n", total); + + if (num_suites_fails > 0) { return 1; } else { return 0; } +} diff --git a/tests/ptest.h b/tests/ptest.h new file mode 100644 index 0000000..27fde9c --- /dev/null +++ b/tests/ptest.h @@ -0,0 +1,19 @@ +#ifndef ptest_h +#define ptest_h + +#include +#include + +#define PT_SUITE(name) void name(void) +#define PT_TEST(name) auto void name(void); pt_add_test(name, #name, __func__); void name(void) + +#define PT_ASSERT(expr) pt_assert_run((bool)(expr), #expr, __func__, __FILE__, __LINE__) +#define PT_ASSERT_STR_EQ(fst, snd) pt_assert_run(strcmp(fst, snd) == 0, "strcmp( " #fst ", " #snd " ) == 0", __func__, __FILE__, __LINE__) + +void pt_assert_run(bool result, const char* expr, const char* func, const char* file, int line); + +void pt_add_test(void (*func)(void), const char* name, const char* suite); +void pt_add_suite(void (*func)(void)); +int pt_run(void); + +#endif \ No newline at end of file diff --git a/tests/regex.c b/tests/regex.c index a700099..8288e5b 100644 --- a/tests/regex.c +++ b/tests/regex.c @@ -1,27 +1,29 @@ +#include "ptest.h" #include "../mpc.h" #include +#include -/* static bool string_eq(void* x, void* y) { return (strcmp(x, y) == 0); } static void string_print(void* x) { printf("'%s'", (char*)x); } -*/ -bool suite_regex(void) { - +void test_regex_basic(void) { + mpc_parser_t* re0 = mpc_re("abc|bcd"); mpc_parser_t* re1 = mpc_re("abc|bcd|e"); - mpc_parser_t* re2 = mpc_re("abc(ab)*"); + mpc_parser_t* re2 = mpc_re("ab()c(ab)*"); mpc_parser_t* re3 = mpc_re("abc(abdd)?"); mpc_parser_t* re4 = mpc_re("ab|c(abdd)?"); mpc_parser_t* re5 = mpc_re("abc(ab|dd)+g$"); - mpc_print(re0); - mpc_print(re1); - mpc_print(re2); - mpc_print(re3); - mpc_print(re4); - mpc_print(re5); + PT_ASSERT(mpc_match(re0, "abc", "abc", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re0, "bcd", "bcd", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(re0, "bc", "bc", string_eq, free, string_print)); + PT_ASSERT(mpc_unmatch(re0, "ab", "ab", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re1, "e", "e", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re2, "abc", "abc", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re2, "abcabab", "abcabab", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re2, "abcababd", "abcabab", string_eq, free, string_print)); mpc_delete(re0); mpc_delete(re1); @@ -29,6 +31,29 @@ bool suite_regex(void) { mpc_delete(re3); mpc_delete(re4); mpc_delete(re5); + +} + +void test_regex_range(void) { + + mpc_parser_t* re0 = mpc_re("abg[abcdef]"); + mpc_parser_t* re1 = mpc_re("y*[a-z]"); + mpc_parser_t* re2 = mpc_re("zz(p+)?[A-Z_0\\]123]*"); + mpc_parser_t* re3 = mpc_re("[^56hy].*$"); + + mpc_print(re0); + mpc_print(re1); + mpc_print(re2); + mpc_print(re3); - return true; + mpc_delete(re0); + mpc_delete(re1); + mpc_delete(re2); + mpc_delete(re3); + +} + +void suite_regex(void) { + pt_add_test(test_regex_basic, "Test Regex Basic", "Suite Regex"); + pt_add_test(test_regex_range, "Test Regex Range", "Suite Regex"); } \ No newline at end of file diff --git a/tests/test.c b/tests/test.c index 3346f7f..39015ba 100644 --- a/tests/test.c +++ b/tests/test.c @@ -1,15 +1,12 @@ -#include +#include "ptest.h" -bool suite_ident(void); -bool suite_math(void); -bool suite_regex(void); +void suite_core(void); +void suite_regex(void); +void suite_grammar(void); int main(int argc, char** argv) { - - suite_ident(); - suite_math(); - suite_regex(); - - return 0; - + pt_add_suite(suite_core); + pt_add_suite(suite_regex); + pt_add_suite(suite_grammar); + return pt_run(); } \ No newline at end of file