commit c2f936bcb7e61bd1605c4b2e3ef3bcd5124b181b Author: Daniel Holden Date: Thu Sep 19 20:57:40 2013 +0100 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9bb4877 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +*.exe diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0517255 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CC = gcc +CFLAGS = -ansi -Wall -Werror -Wno-unused -g + +TESTS = $(wildcard tests/*.c) + +all: check + +check: $(TESTS) mpc.c + $(CC) $(CFLAGS) $^ -o test + ./test + +clean: + rm test.exe \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..58fbdd2 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +Micro Parser Combinators +======================== + +_mpc_ is a lightweight Parser Combinator library for C. + +The current main alternative is a branch of (https://github.com/wbhart/Cesium3)[Cesium3]. + +Features & Advantages +--------------------- + +* Error Message Support +* Regular Expression Support +* Parser Grammar Support +* Works for Generic Types +* AST Extension +* Single source & header files +* Written in clean ANSI C +* Doesn't rely on Boehm-Demers-Weiser Garbage Collection +* Doesn't use `setjmp` and `longjmp` for errors +* Doesn't pollute namespace + +Example +------- + +```c + +mpc_val_t* combine_maths(int n, mpc_val_t** xs) { + + int** vs = (int**)xs; + + if (*vs[1] == '*') { *vs[0] *= *vs[2]; } + if (*vs[1] == '/') { *vs[0] /= *vs[2]; } + if (*vs[1] == '+') { *vs[0] += *vs[2]; } + if (*vs[1] == '-') { *vs[0] -= *vs[2]; } + + free(vs[1]); + free(vs[2]); + + return vs[0]; +} + +int main(int argc, char** argv) { + + mpc_parser_t* Expr = mpc_new(); + mpc_parser_t* Factor = mpc_new(); + mpc_parser_t* Term = mpc_new(); + mpc_parser_t* Maths = mpc_new(); + + mpc_define(Expr, + mpc_pc("cmaths ( fact ['*' | '/'] fact ) | fact", + combine_maths, Factor, free, Factor, free, Factor), + ); + + mpc_define(Factor, + mpc_pc("cmaths ( term ['+' | '-'] term ) | term", + combine_maths, Term, free, Term, free, Term), + ); + + mpc_define(Term, + mpc_pc("num | snd ('(' expr ')')", + mpc_int(), mpcf_asnd_free, Expr, free) + ); + + mpc_define(Maths, mpc_ends(Expr, free)); + + mpc_delete(Expr); + mpc_delete(Factor); + mpc_delete(Term); + mpc_delete(Maths); + +} +``` + +Parsers +------- + + +Combinators +----------- + + +Regular Expressions +------------------- + + +Combinator Grammars +------------------- + + +Abstract Syntax Tree +-------------------- + + +Reference +--------- + + + + + + diff --git a/mpc.c b/mpc.c new file mode 100644 index 0000000..dea16dd --- /dev/null +++ b/mpc.c @@ -0,0 +1,1422 @@ +#include "mpc.h" + +#include +#include +#include + +/* +** State Type +*/ + +typedef struct { + char last; + char next; + int pos; + int row; + int col; +} mpc_state_t; + +/* +** Error Type +*/ + +struct mpc_err_t { + mpc_state_t state; + int expected_num; + char** expected; +}; + +static mpc_err_t* mpc_err_new(mpc_state_t s, char* expected) { + mpc_err_t* x = malloc(sizeof(mpc_err_t)); + x->state = s; + x->expected_num = 1; + x->expected = malloc(sizeof(char*)); + x->expected[0] = malloc(strlen(expected) + 1); + strcpy(x->expected[0], expected); + return x; +} + +static bool mpc_err_contains_expected(mpc_err_t* x, char* expected) { + + int i; + for (i = 0; i < x->expected_num; i++) { + if (strcmp(x->expected[i], expected) == 0) { return true; } + } + + return false; + +} + +static void mpc_err_add_expected(mpc_err_t* x, char* expected) { + + x->expected_num++; + x->expected = realloc(x->expected, sizeof(char*) * x->expected_num); + x->expected[x->expected_num-1] = malloc(strlen(expected) + 1); + strcpy(x->expected[x->expected_num-1], expected); + +} + +void mpc_err_delete(mpc_err_t* x) { + + int i; + for (i = 0; i < x->expected_num; i++) { + free(x->expected[i]); + } + + free(x->expected); + free(x); +} + +void mpc_err_print(mpc_err_t* x) { + mpc_err_print_to(x, stdout); +} + +void mpc_err_print_to(mpc_err_t* x, FILE* f) { + + fprintf(f, ":%i:%i: error: expected ", x->state.row, x->state.col); + + if (x->expected_num == 0) { + + fprintf(f, "ERROR: NOTHING EXPECTED"); + + } else if (x->expected_num == 1) { + + fprintf(f, "%s", x->expected[0]); + + } else { + + int i; + for (i = 0; i < x->expected_num-2; i++) { + fprintf(f, "%s, ", x->expected[i]); + } + + fprintf(f, "%s or %s", + x->expected[x->expected_num-2], + x->expected[x->expected_num-1]); + + } + + printf(" at '%c'\n", x->state.next); + +} + +void mpc_err_msg(mpc_err_t* x, char* out, int* outn, int outmax) { + /* TODO: Implement */ +} + +static mpc_err_t* mpc_err_either(mpc_err_t* x, mpc_err_t* y) { + + if (x->state.pos > y->state.pos) { + mpc_err_delete(y); + return x; + } + + if (x->state.pos < y->state.pos) { + mpc_err_delete(x); + return y; + } + + if (x->state.pos == y->state.pos) { + + int i; + for (i = 0; i < y->expected_num; i++) { + if (mpc_err_contains_expected(x, y->expected[i])) { continue; } + else { mpc_err_add_expected(x, y->expected[i]); } + } + + mpc_err_delete(y); + return x; + } + + return NULL; + +} + +static mpc_err_t* mpc_err_many1(mpc_err_t* x) { + /* TODO: Collapse expected list and add `one or more` prefix */ + return x; +} + +static mpc_err_t* mpc_err_count(mpc_err_t* x, int n) { + /* TODO: Collapse expected list and add `N of` prefix */ + return x; +} + +char** mpc_err_expected(mpc_err_t* x, int* num) { + *num = x->expected_num; + return x->expected; +} + +/* +** Input Type +*/ + +typedef struct { + + char* str; + mpc_state_t state; + + int marks_num; + mpc_state_t* marks; + +} mpc_input_t; + +static mpc_input_t* mpc_input_new(const char* str) { + + mpc_input_t* i = malloc(sizeof(mpc_input_t)); + i->str = malloc(strlen(str) + 1); + strcpy(i->str, str); + + i->state.next = i->str[0]; + i->state.last = '\0'; + i->state.pos = 0; + i->state.row = 0; + i->state.col = 0; + + i->marks_num = 0; + i->marks = NULL; + + return i; +} + +static void mpc_input_delete(mpc_input_t* i) { + free(i->str); + free(i->marks); + free(i); +} + +static void mpc_input_mark(mpc_input_t* i) { + i->marks_num++; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num); + i->marks[i->marks_num-1] = i->state; +} + +static void mpc_input_unmark(mpc_input_t* i) { + i->marks_num--; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num); +} + +static void mpc_input_rewind(mpc_input_t* i) { + i->state = i->marks[i->marks_num-1]; + mpc_input_unmark(i); +} + +static bool mpc_input_next(mpc_input_t* i, char** o) { + + i->state.last = i->str[i->state.pos]; + i->state.pos++; + i->state.col++; + + if (i->state.last == '\n') { + i->state.col = 0; + i->state.row++; + } + + (*o) = malloc(2); + (*o)[0] = i->state.last; + (*o)[1] = '\0'; + return true; + +} + +static bool mpc_input_any(mpc_input_t* i, char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->str[i->state.pos] == '\0') { + i->state.next = i->str[i->state.pos]; + return false; + } + + return mpc_input_next(i, o); + +} + +static bool mpc_input_char(mpc_input_t* i, char c, char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->str[i->state.pos] != c) { + i->state.next = i->str[i->state.pos]; + return false; + } + + return mpc_input_next(i, o); + +} + +static bool mpc_input_range(mpc_input_t* i, char c, char d, char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (i->str[i->state.pos] < c || + i->str[i->state.pos] > d) { + i->state.next = i->str[i->state.pos]; + return false; + } + + return mpc_input_next(i, o); + +} + +static bool char_in_string(char c, const char* x) { + + while (*x) { + if (*x == c) { return true; } + x++; + } + + return false; +} + +static bool mpc_input_oneof(mpc_input_t* i, const char* c, char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (!char_in_string(i->str[i->state.pos], c)) { + i->state.next = i->str[i->state.pos]; + return false; + } + + return mpc_input_next(i, o); + +} + +static bool mpc_input_noneof(mpc_input_t* i, const char* c, char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (char_in_string(i->str[i->state.pos], c) || (i->str[i->state.pos] == '\0')) { + i->state.next = i->str[i->state.pos]; + return false; + } + + return mpc_input_next(i, o); + +} + +static bool mpc_input_satisfy(mpc_input_t* i, bool(*cond)(char), char** o) { + + if (i->state.pos > strlen(i->str)) { i->state.next = '\0'; return false; } + if (!cond(i->str[i->state.pos])) { i->state.next = i->str[i->state.pos]; return false; } + + return mpc_input_next(i, o); + +} + +bool mpc_input_string(mpc_input_t* i, const char* c, char** o) { + + mpc_input_mark(i); + char* co = NULL; + const char* x = c; + while (*x) { + if (mpc_input_char(i, *x, &co)) { + free(co); + } else { + mpc_input_rewind(i); + return false; + } + x++; + } + mpc_input_unmark(i); + + *o = malloc(strlen(c) + 1); + strcpy(*o, c); + return true; +} + +/* +** Parser Type +*/ + +enum { + MPC_TYPE_PASS = 0, + MPC_TYPE_FAIL = 1, + MPC_TYPE_LIFT = 2, + MPC_TYPE_EXPECT = 3, + + MPC_TYPE_ANY = 4, + MPC_TYPE_SINGLE = 5, + MPC_TYPE_ONEOF = 6, + MPC_TYPE_NONEOF = 7, + MPC_TYPE_RANGE = 8, + MPC_TYPE_SATISFY = 9, + MPC_TYPE_STRING = 10, + + MPC_TYPE_APPLY = 11, + + MPC_TYPE_MAYBE = 12, + MPC_TYPE_MANY = 13, + MPC_TYPE_MANY1 = 14, + MPC_TYPE_COUNT = 15, + + MPC_TYPE_EITHER = 16, + MPC_TYPE_ALSO = 17, + MPC_TYPE_OR = 18, + MPC_TYPE_AND = 19, +}; + +typedef struct { void* x; } mpc_pdata_lift_t; +typedef struct { mpc_parser_t* x; char* m; } mpc_pdata_expect_t; +typedef struct { char x; } mpc_pdata_single_t; +typedef struct { char x; char y; } mpc_pdata_range_t; +typedef struct { bool(*f)(char); } mpc_pdata_satisfy_t; +typedef struct { char* x; } mpc_pdata_string_t; +typedef struct { mpc_parser_t* x; mpc_apply_t f; } mpc_pdata_apply_t; +typedef struct { mpc_parser_t* x; mpc_fold_t f; int n; mpc_dtor_t dx; } mpc_pdata_repeat_t; +typedef struct { mpc_parser_t* x; mpc_parser_t* y; } mpc_pdata_either_t; +typedef struct { mpc_parser_t* x; mpc_parser_t* y; mpc_dtor_t dx; mpc_fold_t f; } mpc_pdata_also_t; +typedef struct { int n; mpc_parser_t** xs; } mpc_pdata_or_t; +typedef struct { int n; mpc_parser_t** xs; mpc_dtor_t* dxs; mpc_afold_t f; } mpc_pdata_and_t; + +typedef union { + mpc_pdata_lift_t lift; + mpc_pdata_expect_t expect; + mpc_pdata_single_t single; + mpc_pdata_range_t range; + mpc_pdata_satisfy_t satisfy; + mpc_pdata_string_t string; + mpc_pdata_apply_t apply; + mpc_pdata_repeat_t repeat; + mpc_pdata_either_t either; + mpc_pdata_also_t also; + mpc_pdata_and_t and; + mpc_pdata_or_t or; +} mpc_pdata_t; + +struct mpc_parser_t { + bool retained; + uint8_t type; + mpc_pdata_t data; +}; + +/* +** This is rather pleasant. The core parsing routine +** is written in about 500 lines of C. +** +** I also love the way in which each parsing type +** concisely matches some construct or idiom in C. +** Particularly nice is are the `either` and `also` +** types which have a broken but mirrored structure +** with return value and error reflected. +** +*/ + +#define MPC_SUCCESS(x) r->output = x; return true; +#define MPC_FAILURE(x) r->error = x; return false; +#define MPC_TRY(x, f) if (f) { MPC_SUCCESS(x) } else { MPC_FAILURE(mpc_err_new(i->state, "different character")); } + +bool mpc_parse_input(mpc_input_t* i, mpc_parser_t* p, mpc_result_t* r) { + + memset(r, 0, sizeof(mpc_result_t)); + + /* Trivial Parsers */ + + if (p->type == MPC_TYPE_PASS) { MPC_SUCCESS(NULL); } + if (p->type == MPC_TYPE_FAIL) { MPC_FAILURE(mpc_err_new(i->state, "different character")); } + if (p->type == MPC_TYPE_LIFT) { MPC_SUCCESS(p->data.lift.x); } + + /* Basic Parsers */ + + char* s = NULL; + if (p->type == MPC_TYPE_ANY) { MPC_TRY(s, mpc_input_any(i, &s)); } + if (p->type == MPC_TYPE_SINGLE) { MPC_TRY(s, mpc_input_char(i, p->data.single.x, &s)); } + if (p->type == MPC_TYPE_RANGE) { MPC_TRY(s, mpc_input_range(i, p->data.range.x, p->data.range.y, &s)); } + if (p->type == MPC_TYPE_ONEOF) { MPC_TRY(s, mpc_input_oneof(i, p->data.string.x, &s)); } + if (p->type == MPC_TYPE_NONEOF) { MPC_TRY(s, mpc_input_noneof(i, p->data.string.x, &s)); } + if (p->type == MPC_TYPE_SATISFY) { MPC_TRY(s, mpc_input_satisfy(i, p->data.satisfy.f, &s)); } + if (p->type == MPC_TYPE_STRING) { MPC_TRY(s, mpc_input_string(i, p->data.string.x, &s)); } + + /* Advanced Parsers */ + + int c = 0; + mpc_val_t* t = NULL; + mpc_result_t x, y; + memset(&x, 0, sizeof(mpc_result_t)); + memset(&y, 0, sizeof(mpc_result_t)); + + if (p->type == MPC_TYPE_EXPECT) { + if (mpc_parse_input(i, p->data.expect.x, &x)) { + MPC_SUCCESS(x.output); + } else { + mpc_err_delete(x.error); + MPC_FAILURE(mpc_err_new(i->state, p->data.expect.m)); + } + } + + if (p->type == MPC_TYPE_APPLY) { + if (mpc_parse_input(i, p->data.apply.x, &x)) { + MPC_SUCCESS(p->data.apply.f(x.output)); + } else { + MPC_FAILURE(x.error); + } + } + + if (p->type == MPC_TYPE_MAYBE) { + if (mpc_parse_input(i, p->data.repeat.x, &x)) { MPC_SUCCESS(x.output); } + mpc_err_delete(x.error); + MPC_SUCCESS(NULL); + } + + if (p->type == MPC_TYPE_MANY) { + while (mpc_parse_input(i, p->data.repeat.x, &x)) { t = p->data.repeat.f(t, x.output); } + mpc_err_delete(x.error); + MPC_SUCCESS(t); + } + + if (p->type == MPC_TYPE_MANY1) { + + while (mpc_parse_input(i, p->data.repeat.x, &x)) { t = p->data.repeat.f(t, x.output); c++; } + + if (c >= 1) { + mpc_err_delete(x.error); + MPC_SUCCESS(t); + } else { + MPC_FAILURE(mpc_err_many1(x.error)); + } + + } + + if (p->type == MPC_TYPE_COUNT) { + + mpc_input_mark(i); + while (mpc_parse_input(i, p->data.repeat.x, &x)) { + t = p->data.repeat.f(t, x.output); + c++; + if (c == p->data.repeat.n) { break; } + } + + if (c == p->data.repeat.n) { + mpc_input_unmark(i); + MPC_SUCCESS(t); + } else { + p->data.repeat.dx(t); + mpc_input_rewind(i); + MPC_FAILURE(mpc_err_count(x.error, p->data.repeat.n)); + } + + } + + /* Combinatory Parsers */ + + if (p->type == MPC_TYPE_EITHER) { + if (mpc_parse_input(i, p->data.either.x, &x)) { MPC_SUCCESS(x.output); } + if (mpc_parse_input(i, p->data.either.y, &y)) { mpc_err_delete(x.error); MPC_SUCCESS(y.output); } + MPC_FAILURE(mpc_err_either(x.error, y.error)); + } + + if (p->type == MPC_TYPE_ALSO) { + mpc_input_mark(i); + if (!mpc_parse_input(i, p->data.also.x, &x)) { mpc_input_rewind(i); MPC_FAILURE(x.error); } + if (!mpc_parse_input(i, p->data.also.y, &y)) { mpc_input_rewind(i); p->data.also.dx(x.output); MPC_FAILURE(y.error); } + mpc_input_unmark(i); + MPC_SUCCESS(p->data.also.f(x.output, y.output)); + } + + if (p->type == MPC_TYPE_OR) { + + mpc_result_t* rs = malloc(sizeof(mpc_result_t) * p->data.or.n); + int ri, pri; + + for (ri = 0; ri < p->data.or.n; ri++) { + if (mpc_parse_input(i, p->data.or.xs[ri], &rs[ri])) { + + for (pri = 0; pri < ri; pri++) { mpc_err_delete(rs[pri].error); } + r->output = rs[ri].output; + free(rs); + return true; + + } + } + + /* TODO: Implement */ + r->error = rs[0].error; + free(rs); + return false; + + } + + if (p->type == MPC_TYPE_AND) { + + mpc_input_mark(i); + + mpc_result_t* rs = malloc(sizeof(mpc_result_t) * p->data.and.n); + int ri, pri; + + for (ri = 0; ri < p->data.and.n; ri++) { + if (!mpc_parse_input(i, p->data.and.xs[ri], &rs[ri])) { + + for (pri = 0; pri < ri; pri++) { p->data.and.dxs[pri](rs[pri].output); } + r->error = rs[ri].error; + free(rs); + mpc_input_rewind(i); + return false; + + } + } + + mpc_val_t** vals = malloc(sizeof(mpc_val_t*) * p->data.and.n); + + for (ri = 0; ri < p->data.and.n; ri++) { + vals[ri] = rs[ri].output; + } + + r->output = p->data.and.f(p->data.and.n, vals); + + free(rs); + free(vals); + mpc_input_unmark(i); + return true; + + } + + fprintf(stderr, "Unknown Parser Type Id %i!\n", p->type); + abort(); + +} + +#undef MPC_SUCCESS +#undef MPC_FAILURE +#undef MPC_TRY + +bool mpc_parse(const char* s, mpc_parser_t* p, mpc_result_t* r) { + mpc_input_t* i = mpc_input_new(s); + bool x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r) { + fseek(f, 0, SEEK_END); + int len = ftell(f); + fseek(f, 0, SEEK_SET); + char* buff = malloc(len + 1); + fread(buff, 1, len, f); + buff[len] = '\0'; + + bool x = mpc_parse(buff, p, r); + + free(buff); + return x; +} + +bool mpc_parse_filename(const char* filename, mpc_parser_t* p, mpc_result_t* r) { + FILE* f = fopen(filename, "r"); + bool res = mpc_parse_file(f, p, r); + fclose(f); + return res; +} + +/* +** Building a Parser +*/ + +static void mpc_delete_unretained(mpc_parser_t* p, bool force); + +static void mpc_delete_or(mpc_parser_t* p) { + + int i; + for (i = 0; i < p->data.or.n; i++) { + mpc_delete_unretained(p->data.or.xs[i], false); + } + free(p->data.or.xs); + +} + +static void mpc_delete_and(mpc_parser_t* p) { + + int i; + for (i = 0; i < p->data.and.n; i++) { + mpc_delete_unretained(p->data.and.xs[i], false); + } + free(p->data.and.xs); + free(p->data.and.dxs); + +} + +static void mpc_delete_unretained(mpc_parser_t* p, bool force) { + + if (p->retained && !force) { return; } + + switch (p->type) { + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + free(p->data.string.x); + break; + + case MPC_TYPE_APPLY: + mpc_delete_unretained(p->data.apply.x, false); + break; + + case MPC_TYPE_EXPECT: + mpc_delete_unretained(p->data.expect.x, false); + free(p->data.expect.m); + break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + mpc_delete_unretained(p->data.repeat.x, false); + break; + + case MPC_TYPE_EITHER: + mpc_delete_unretained(p->data.either.x, false); + mpc_delete_unretained(p->data.either.y, false); + break; + + case MPC_TYPE_ALSO: + mpc_delete_unretained(p->data.also.x, false); + mpc_delete_unretained(p->data.also.y, false); + break; + + case MPC_TYPE_OR: + mpc_delete_or(p); + break; + + case MPC_TYPE_AND: + mpc_delete_and(p); + break; + + default: break; + } + + free(p); + +} + +void mpc_delete(mpc_parser_t* p) { + mpc_delete_unretained(p, true); +} + +mpc_parser_t* mpc_new(void) { + return mpc_pass(); +} + +mpc_parser_t* mpc_assign(mpc_parser_t* p, mpc_parser_t* a) { + memcpy(p, a, sizeof(mpc_parser_t)); + free(a); + return p; +} + +mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a) { + return mpc_retain(mpc_assign(p, a)); +} + +mpc_parser_t* mpc_retain(mpc_parser_t* p) { + p->retained = true; + return p; +} + +mpc_parser_t* mpc_pass(void) { + mpc_parser_t* p = calloc(1, sizeof(mpc_parser_t)); + p->retained = false; + p->type = MPC_TYPE_PASS; + return p; +} + +mpc_parser_t* mpc_fail(void) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_FAIL; + return p; +} + +mpc_parser_t* mpc_lift(mpc_val_t* x) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_LIFT; + p->data.lift.x = x; + return p; +} + + +mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_EXPECT; + p->data.expect.x = a; + p->data.expect.m = malloc(strlen(expected) + 1); + strcpy(p->data.expect.m, expected); + return p; +} + +/* +** Basic Parsers +*/ + +mpc_parser_t* mpc_any(void) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_ANY; + return mpc_expect(p, "any character"); +} + +mpc_parser_t* mpc_char(char c) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_SINGLE; + p->data.single.x = c; + return mpc_expect(p, (char[]){ '\'', c, '\'', '\0' } ); +} + +mpc_parser_t* mpc_range(char s, char e) { + + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_RANGE; + p->data.range.x = s; + p->data.range.y = e; + + char expected[30]; + strcpy(expected, "character between '"); + strcat(expected, (char[]){ s, '\0' }); + strcat(expected, "' and '"); + strcat(expected, (char[]){ e, '\0' }); + strcat(expected, "'"); + + return mpc_expect(p, expected); +} + +mpc_parser_t* mpc_oneof(const char* s) { + + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_ONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + + char* expected = malloc(strlen(s) + 10); + strcpy(expected, "one of '"); + strcat(expected, s); + strcat(expected, "'"); + + p = mpc_expect(p, expected); + + free(expected); + + return p; +} + +mpc_parser_t* mpc_noneof(const char* s) { + + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_NONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + + char* expected = malloc(strlen(s) + 11); + strcpy(expected, "none of '"); + strcat(expected, s); + strcat(expected, "'"); + + p = mpc_expect(p, expected); + + free(expected); + + return p; + +} + +mpc_parser_t* mpc_satisfy(bool(*f)(char)) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_SATISFY; + p->data.satisfy.f = f; + + return p; +} + +mpc_parser_t* mpc_string(const char* s) { + + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_STRING; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + + char* expected = malloc(strlen(s) + 3); + strcpy(expected, "\""); + strcat(expected, s); + strcat(expected, "\""); + + p = mpc_expect(p, expected); + + free(expected); + + return p; + +} + +void mpc_dtor_null(mpc_val_t* x) { return; } + +/* +** Core Parsers +*/ + +mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_APPLY; + p->data.apply.x = a; + p->data.apply.f = f; + return p; +} + +mpc_parser_t* mpc_maybe(mpc_parser_t* a) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_MAYBE; + p->data.repeat.x = a; + return p; +} + +mpc_parser_t* mpc_many(mpc_parser_t* a, mpc_fold_t f) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_MANY; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t* mpc_many1(mpc_parser_t* a, mpc_fold_t f) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_MANY1; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_COUNT; + p->data.repeat.x = a; + p->data.repeat.dx = da; + p->data.repeat.f = f; + p->data.repeat.n = n; + return p; +} + +mpc_parser_t* mpc_either(mpc_parser_t* a, mpc_parser_t* b) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_EITHER; + p->data.either.x = a; + p->data.either.y = b; + return p; +} + +mpc_parser_t* mpc_also(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f) { + mpc_parser_t* p = mpc_pass(); + p->type = MPC_TYPE_ALSO; + p->data.also.x = a; + p->data.also.y = b; + p->data.also.dx = da; + p->data.also.f = f; + return p; +} + +mpc_parser_t* mpc_bind(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f) { + return mpc_also(a, b, da, f); +} + +mpc_parser_t* mpc_or(int n, ...) { + + mpc_parser_t* p = mpc_pass(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_list va; + va_start(va, n); + int i; + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; +} + +mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...) { + + mpc_parser_t* p = mpc_pass(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = f; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_list va; + va_start(va, f); + int i; + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); + } + va_end(va); + + return p; + +} + +/* +** Common Parsers +*/ + +mpc_parser_t* mpc_space(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "space"); } +mpc_parser_t* mpc_spaces(void) { return mpc_expect(mpc_many(mpc_space(), mpcf_strfold), "spaces"); } +mpc_parser_t* mpc_whitespace(void) { return mpc_expect(mpc_apply(mpc_spaces(), mpcf_free), "whitespace"); } + +mpc_parser_t* mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } +mpc_parser_t* mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } +mpc_parser_t* mpc_eoi(void) { return mpc_expect(mpc_char('\0'), "end of input"); } +mpc_parser_t* mpc_escape(void) { return mpc_also(mpc_char('\\'), mpc_any(), free, mpcf_strfold); } + +mpc_parser_t* mpc_digit(void) { return mpc_expect(mpc_oneof("012345689"), "digit"); } +mpc_parser_t* mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } +mpc_parser_t* mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } +mpc_parser_t* mpc_digits(void) { return mpc_expect(mpc_many1(mpc_digit(), mpcf_strfold), "digits"); } +mpc_parser_t* mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpc_hexdigit(), mpcf_strfold), "hex digits"); } +mpc_parser_t* mpc_octdigits(void) { return mpc_expect(mpc_many1(mpc_octdigit(), mpcf_strfold), "oct digits"); } + +mpc_parser_t* mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } +mpc_parser_t* mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } +mpc_parser_t* mpc_alpha(void) { return mpc_expect(mpc_either(mpc_lower(), mpc_upper()), "letter"); } +mpc_parser_t* mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } + +mpc_parser_t* mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } +mpc_parser_t* mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } +mpc_parser_t* mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } +mpc_parser_t* mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } + +mpc_parser_t* mpc_float(void); + +mpc_parser_t* mpc_semi(void) { return mpc_char(';'); } +mpc_parser_t* mpc_comma(void) { return mpc_char(','); } +mpc_parser_t* mpc_colon(void) { return mpc_char(':'); } +mpc_parser_t* mpc_dot(void) { return mpc_char('.'); } + +mpc_parser_t* mpc_char_lit(void); +mpc_parser_t* mpc_string_lit(void); + +mpc_parser_t* mpc_ident(void); + +/* +** Useful Parsers +*/ + +mpc_parser_t* mpc_ends(mpc_parser_t* a, mpc_dtor_t da) { return mpc_also(a, mpc_eoi(), da, mpcf_fst_free); } +mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f) { return mpc_many(a, f); } +mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f) { return mpc_many1(a, f); } +mpc_parser_t* mpc_tok(mpc_parser_t* a) { return mpc_also(a, mpc_whitespace(), mpc_dtor_null, mpcf_fst); } +mpc_parser_t* mpc_sym(const char* s) { return mpc_tok(mpc_string(s)); } + +mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c) { + return mpc_and(3, mpcf_between_free, + mpc_string(o), a, mpc_string(c), + free, ad); +} + +mpc_parser_t* mpc_parens(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } +mpc_parser_t* mpc_braces(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } +mpc_parser_t* mpc_brackets(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } +mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } + +/* +** Regular Expression Parsers +*/ + +/* +** So here is a cute bootstrapping. +** +** I'm using the previously defined +** mpc constructs and functions to +** parse the user regex string and +** construct a parser from it. +** +** As it turns out lots of the standard +** mpc functions look a lot like `fold` +** functions and so can be used indirectly +** by many of the parsing functions to build +** a parser directly - as we parse. +** +** This is certainly something that +** would be less elegant/interesting +** in a two-phase parser which first +** built an AST and then traversed it +** to generate the object. +** +** This whole thing acts as a great +** case study for how trivial it can be +** to write a great parser in a few +** lines of code using this library. +*/ + +/* +** +** ### Regular Expression Grammar +** +** : ( "|" ) | +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : +** | "\" +** | "." +** | "(" ")" +** | "[" "]" +*/ + +static mpc_val_t* mpc_re_fold_or(int n, mpc_val_t** xs) { + free(xs[1]); + return mpc_either(xs[0], xs[2]); +} + +static mpc_val_t* mpc_re_fold_repeat(mpc_val_t* x, mpc_val_t* y) { + if (strcmp(y, "*") == 0) { free(y); return mpc_many(x, mpcf_strfold); } + if (strcmp(y, "+") == 0) { free(y); return mpc_many1(x, mpcf_strfold); } + if (strcmp(y, "?") == 0) { free(y); return mpc_maybe(x); } + int n = strtol(y, NULL, 10); + free(y); + return mpc_count(x, free, mpcf_strfold, n); +} + +static mpc_val_t* mpc_re_fold_many(mpc_val_t* t, mpc_val_t* x) { + if (t == NULL) { return x; } + return mpc_also(t, x, free, mpcf_strfold); +} + +static mpc_val_t* mpc_re_escape(mpc_val_t* x) { + + char* s = x; + + if (s[0] == '.') { free(x); return mpc_any(); } + if (s[0] == '$') { free(x); return mpc_eoi(); } + + if (s[0] == '\\') { + mpc_parser_t* p = mpc_char(s[1]); + free(s); + return p; + } else { + mpc_parser_t* p = mpc_char(s[0]); + free(s); + return p; + } + +} + +static mpc_val_t* mpc_re_range(mpc_val_t* x) { + /* TODO: Implement proper range scanning */ + return mpc_oneof(x); +} + +mpc_parser_t* mpc_re(const char* re) { + + mpc_parser_t* Regex = mpc_new(); + mpc_parser_t* Term = mpc_new(); + mpc_parser_t* Factor = mpc_new(); + mpc_parser_t* Base = mpc_new(); + mpc_parser_t* Range = mpc_new(); + + mpc_define(Regex, mpc_either( + mpc_and(3, mpc_re_fold_or, Term, mpc_char('|'), Regex, mpc_delete, free), + Term + )); + + mpc_define(Term, mpc_many(Factor, mpc_re_fold_many)); + + mpc_define(Factor, mpc_or(5, + mpc_also(Base, mpc_char('*'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), + mpc_also(Base, mpc_char('+'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), + mpc_also(Base, mpc_char('?'), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), + mpc_also(Base, mpc_braces(mpc_digits(), free), (mpc_dtor_t)mpc_delete, mpc_re_fold_repeat), + Base + )); + + mpc_define(Base, mpc_or(5, + mpc_parens(Regex, (mpc_dtor_t)mpc_delete), + mpc_squares(Range, (mpc_dtor_t)mpc_delete), + mpc_apply(mpc_oneof(".$"), mpc_re_escape), + mpc_apply(mpc_escape(), mpc_re_escape), + mpc_apply(mpc_noneof(")|"), mpc_re_escape) + )); + + mpc_define(Range, mpc_apply( + mpc_many(mpc_either(mpc_escape(), mpc_noneof("]")), mpcf_strfold), + mpc_re_range + )); + + mpc_result_t r; + if (mpc_parse(re, Regex, &r)) { + return r.output; + } else { + /* TODO: Proper Error Reporting */ + return NULL; + } + + mpc_delete(Regex); + mpc_delete(Term); + mpc_delete(Factor); + mpc_delete(Base); + mpc_delete(Range); + +} + +/* +** Common Fold Functions +*/ + +mpc_val_t* mpcf_free(mpc_val_t* x) { + free(x); + return NULL; +} + +mpc_val_t* mpcf_int(mpc_val_t* x) { + int* y = malloc(sizeof(int)); + *y = strtol(x, NULL, 10); + free(x); + return y; +} + +mpc_val_t* mpcf_hex(mpc_val_t* x) { + int* y = malloc(sizeof(int)); + *y = strtol(x, NULL, 16); + free(x); + return y; +} + +mpc_val_t* mpcf_oct(mpc_val_t* x) { + int* y = malloc(sizeof(int)); + *y = strtol(x, NULL, 8); + free(x); + return y; +} + +static mpc_val_t* mpcf_escape_new(mpc_val_t* x) { + + char* input = (char[]){ + '\a', '\b', '\f', '\n', '\r', + '\t', '\v', '\\', '\'', '\"', '\0'}; + + char** output = (char*[]){ + "\\a", "\\b", "\\f", "\\n", "\\r", + "\\t", "\\v", "\\\\", "\\'", "\\\"", "\\0"}; + + int i; + char* s = x; + char* y = calloc(1, 1); + + while (*s) { + + bool found = false; + for (i = 0; i < 11; i++) { + if (*s == input[i]) { + y = realloc(y, strlen(y) + strlen(output[i]) + 1); + strcat(y, output[i]); + found = true; + break; + } + } + + if (!found) { + y = realloc(y, strlen(y) + 1); + strcat(y, (char[]){*s, '\0'}); + } + + s++; + } + + + return y; +} + +mpc_val_t* mpcf_escape(mpc_val_t* x) { + mpc_val_t* y = mpcf_escape_new(x); + free(x); + return y; +} + +mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y) { + return x; +} + +mpc_val_t* mpcf_snd(mpc_val_t* x, mpc_val_t* y) { + return y; +} + +mpc_val_t* mpcf_fst_free(mpc_val_t* x, mpc_val_t* y) { + free(y); + return x; +} + +mpc_val_t* mpcf_snd_free(mpc_val_t* x, mpc_val_t* y) { + free(x); + return y; +} + +mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x) { + free(x); + return NULL; +} + +mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x) { + + if (t == NULL) { t = calloc(1, 1); } + + t = realloc(t, strlen(t) + strlen(x) + 1); + strcat(t, x); + free(x); + + return t; +} + +mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs) { + free(xs[0]); + free(xs[2]); + return xs[1]; +} + +mpc_val_t* mpcf_maths(int n, mpc_val_t** xs) { + + int** vs = (int**)xs; + + if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } + if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } + if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } + if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } + if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } + + free(xs[1]); free(xs[2]); + + return xs[0]; +} + +/* +** Printing +*/ + + + +static void mpc_print_rt(mpc_parser_t* p, bool force) { + + if (p->retained && !force) { printf(""); return; } + + if (p->type == MPC_TYPE_PASS) { printf(""); } + if (p->type == MPC_TYPE_FAIL) { printf(""); } + if (p->type == MPC_TYPE_LIFT) { printf(""); } + if (p->type == MPC_TYPE_EXPECT) { mpc_print_rt(p->data.expect.x, false); } + + if (p->type == MPC_TYPE_ANY) { printf(""); } + if (p->type == MPC_TYPE_SATISFY) { printf("", p->data.satisfy.f); } + + if (p->type == MPC_TYPE_SINGLE) { + char* s = mpcf_escape_new((char[]){ p->data.single.x, '\0' }); + printf("'%s'", s); + free(s); + } + + if (p->type == MPC_TYPE_RANGE) { + char* s = mpcf_escape_new((char[]){ p->data.range.x, '\0' }); + char* e = mpcf_escape_new((char[]){ p->data.range.y, '\0' }); + printf("[%s-%s]", s, e); + free(s); + free(e); + } + + if (p->type == MPC_TYPE_ONEOF) { + char* s = mpcf_escape_new(p->data.string.x); + printf("[%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_NONEOF) { + char* s = mpcf_escape_new(p->data.string.x); + printf("[^%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_STRING) { + char* s = mpcf_escape_new(p->data.string.x); + printf("\"%s\"", s); + free(s); + } + + if (p->type == MPC_TYPE_APPLY) { mpc_print_rt(p->data.apply.x, false); } + if (p->type == MPC_TYPE_MAYBE) { printf("("); mpc_print_rt(p->data.repeat.x, false); printf(")?"); } + if (p->type == MPC_TYPE_MANY) { printf("("); mpc_print_rt(p->data.repeat.x, false); printf(")*"); } + if (p->type == MPC_TYPE_MANY1) { printf("("); mpc_print_rt(p->data.repeat.x, false); printf(")+"); } + if (p->type == MPC_TYPE_COUNT) { printf("("); mpc_print_rt(p->data.repeat.x, false); printf("){%i}", p->data.repeat.n); } + + if (p->type == MPC_TYPE_EITHER) { + printf("("); + mpc_print_rt(p->data.either.x, false); + printf(" | "); + mpc_print_rt(p->data.either.y, false); + printf(")"); + } + + if (p->type == MPC_TYPE_ALSO) { + mpc_print_rt(p->data.also.x, false); + printf(" "); + mpc_print_rt(p->data.also.y, false); + } + + if (p->type == MPC_TYPE_OR) { + printf("("); + int i; + for(i = 0; i < p->data.or.n-1; i++) { + mpc_print_rt(p->data.or.xs[i], false); + printf(" | "); + } + mpc_print_rt(p->data.or.xs[p->data.or.n-1], false); + printf(")"); + } + + if (p->type == MPC_TYPE_AND) { + int i; + for(i = 0; i < p->data.and.n-1; i++) { + mpc_print_rt(p->data.and.xs[i], false); + printf(" "); + } + mpc_print_rt(p->data.and.xs[p->data.and.n-1], false); + } + +} + +void mpc_print(mpc_parser_t* p) { + mpc_print_rt(p, true); + printf("\n"); +} + +/* +** Testing +*/ + +bool mpc_test(mpc_parser_t* p, const char* s, void* data, + bool(*tester)(void*, void*), + void(*destructor)(void*), + void(*printer)(void*)) { + + printf("== Test ==: '%s'\n", s); + + mpc_result_t r; + if (mpc_parse(s, p, &r)) { + + if (tester(r.output, data)) { + printf("Passed with "); printer(r.output); printf("\n"); + destructor(r.output); + return true; + } else { + printf("Failed!\n"); + printf("Got "); printer(r.output); printf("\n"); + printf("Expected "); printer(data); printf("\n"); + return false; + } + + } else { + + printf("Failed!\n"); + mpc_err_print(r.error); + mpc_err_delete(r.error); + return false; + + } + +} + diff --git a/mpc.h b/mpc.h new file mode 100644 index 0000000..bee8225 --- /dev/null +++ b/mpc.h @@ -0,0 +1,204 @@ +/* +** mpc - Micro Parser Combinator library for C +** https://github.com/orangeduck/mpc +** Daniel Holden - contact@daniel-holden.com +** Licensed under BSD3 +*/ +#ifndef mpc_h +#define mpc_h + +#include +#include +#include + +/* +** Error Type +*/ + +struct mpc_err_t; +typedef struct mpc_err_t mpc_err_t; + +int mpc_err_line(mpc_err_t* x); +int mpc_err_column(mpc_err_t* x); +char mpc_err_unexpected(mpc_err_t* x); +char** mpc_err_expected(mpc_err_t* x, int* num); + +void mpc_err_delete(mpc_err_t* x); +void mpc_err_print(mpc_err_t* x); +void mpc_err_print_to(mpc_err_t* x, FILE* f); +void mpc_err_msg(mpc_err_t* x, char* out, int* outn, int outmax); + +/* +** Parsing +*/ + +typedef void mpc_val_t; + +typedef union { + mpc_err_t* error; + mpc_val_t* output; +} mpc_result_t; + +struct mpc_parser_t; +typedef struct mpc_parser_t mpc_parser_t; + +bool mpc_parse(const char* s, mpc_parser_t* p, mpc_result_t* r); +bool mpc_parse_file(FILE* f, mpc_parser_t* p, mpc_result_t* r); +bool mpc_parse_filename(const char* filename, mpc_parser_t* p, mpc_result_t* r); + +/* +** Building a Parser +*/ + +void mpc_delete(mpc_parser_t* p); +mpc_parser_t* mpc_new(void); +mpc_parser_t* mpc_assign(mpc_parser_t* p, mpc_parser_t* a); +mpc_parser_t* mpc_define(mpc_parser_t* p, mpc_parser_t* a); +mpc_parser_t* mpc_retain(mpc_parser_t* p); +mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected); + +mpc_parser_t* mpc_pass(void); +mpc_parser_t* mpc_fail(void); +mpc_parser_t* mpc_lift(mpc_val_t* x); + +/* +** Basic Parsers +*/ + +mpc_parser_t* mpc_any(void); +mpc_parser_t* mpc_char(char c); +mpc_parser_t* mpc_range(char s, char e); +mpc_parser_t* mpc_oneof(const char* s); +mpc_parser_t* mpc_noneof(const char* s); +mpc_parser_t* mpc_satisfy(bool(*f)(char)); +mpc_parser_t* mpc_string(const char* s); + +/* +** Function Types +*/ + +typedef void (*mpc_dtor_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_fold_t)(mpc_val_t*,mpc_val_t*); +typedef mpc_val_t*(*mpc_afold_t)(int,mpc_val_t**); + +void mpc_dtor_null(mpc_val_t* x); + +/* +** Core Parsers +*/ + +mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f); +mpc_parser_t* mpc_maybe(mpc_parser_t* a); +mpc_parser_t* mpc_many(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_many1(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_count(mpc_parser_t* a, mpc_dtor_t da, mpc_fold_t f, int n); +mpc_parser_t* mpc_either(mpc_parser_t* a, mpc_parser_t* b); +mpc_parser_t* mpc_also(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); +mpc_parser_t* mpc_bind(mpc_parser_t* a, mpc_parser_t* b, mpc_dtor_t da, mpc_fold_t f); +mpc_parser_t* mpc_or(int n, ...); +mpc_parser_t* mpc_and(int n, mpc_afold_t f, ...); + +/* +** Common Parsers +*/ + +mpc_parser_t* mpc_space(void); +mpc_parser_t* mpc_spaces(void); +mpc_parser_t* mpc_whitespace(void); + +mpc_parser_t* mpc_newline(void); +mpc_parser_t* mpc_tab(void); +mpc_parser_t* mpc_eoi(void); +mpc_parser_t* mpc_escape(void); + +mpc_parser_t* mpc_digit(void); +mpc_parser_t* mpc_hexdigit(void); +mpc_parser_t* mpc_octdigit(void); +mpc_parser_t* mpc_digits(void); +mpc_parser_t* mpc_hexdigits(void); +mpc_parser_t* mpc_octdigits(void); + +mpc_parser_t* mpc_lower(void); +mpc_parser_t* mpc_upper(void); +mpc_parser_t* mpc_alpha(void); +mpc_parser_t* mpc_underscore(void); + +mpc_parser_t* mpc_int(void); +mpc_parser_t* mpc_hex(void); +mpc_parser_t* mpc_oct(void); +mpc_parser_t* mpc_number(void); + +mpc_parser_t* mpc_float(void); + +mpc_parser_t* mpc_semi(void); +mpc_parser_t* mpc_comma(void); +mpc_parser_t* mpc_colon(void); +mpc_parser_t* mpc_dot(void); + +mpc_parser_t* mpc_char_lit(void); +mpc_parser_t* mpc_string_lit(void); + +mpc_parser_t* mpc_ident(void); + +/* +** Useful Parsers +*/ + +mpc_parser_t* mpc_ends(mpc_parser_t* a, mpc_dtor_t da); +mpc_parser_t* mpc_skip_many(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_skip_many1(mpc_parser_t* a, mpc_fold_t f); +mpc_parser_t* mpc_tok(mpc_parser_t* a); +mpc_parser_t* mpc_sym(const char* s); +mpc_parser_t* mpc_between(mpc_parser_t* a, mpc_dtor_t ad, const char* o, const char* c); +mpc_parser_t* mpc_parens(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_braces(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_brackets(mpc_parser_t* a, mpc_dtor_t ad); +mpc_parser_t* mpc_squares(mpc_parser_t* a, mpc_dtor_t ad); + + +/* +** Regular Expression Parsers +*/ + +mpc_parser_t* mpc_re(const char* re); + +/* +** Common Fold Functions +*/ + +mpc_val_t* mpcf_free(mpc_val_t* x); +mpc_val_t* mpcf_int(mpc_val_t* x); +mpc_val_t* mpcf_hex(mpc_val_t* x); +mpc_val_t* mpcf_oct(mpc_val_t* x); +mpc_val_t* mpcf_escape(mpc_val_t* x); + +mpc_val_t* mpcf_fst(mpc_val_t* x, mpc_val_t* y); +mpc_val_t* mpcf_snd(mpc_val_t* x, mpc_val_t* y); + +mpc_val_t* mpcf_fst_free(mpc_val_t* x, mpc_val_t* y); +mpc_val_t* mpcf_snd_free(mpc_val_t* x, mpc_val_t* y); + +mpc_val_t* mpcf_freefold(mpc_val_t* t, mpc_val_t* x); +mpc_val_t* mpcf_strfold(mpc_val_t* t, mpc_val_t* x); + +mpc_val_t* mpcf_between_free(int n, mpc_val_t** xs); +mpc_val_t* mpcf_maths(int n, mpc_val_t** xs); + + +/* +** Printing +*/ + +void mpc_print(mpc_parser_t* p); + +/* +** Testing +*/ + +bool mpc_test(mpc_parser_t* p, const char* input, void* data, + bool(*tester)(void*, void*), + void(*destructor)(void*), + void(*printer)(void*)); + +#endif \ No newline at end of file diff --git a/tests/ident.c b/tests/ident.c new file mode 100644 index 0000000..3b4677e --- /dev/null +++ b/tests/ident.c @@ -0,0 +1,37 @@ +#include "../mpc.h" + +#include +#include + +static bool string_eq(void* x, void* y) { return (strcmp(x, y) == 0); } +static void string_print(void* x) { printf("'%s'", (char*)x); } + +bool suite_ident(void) { + + /* ^[a-zA-Z_][a-zA-Z0-9_]*$ */ + + mpc_parser_t* Ident = mpc_new(); + + mpc_define(Ident, mpc_ends( + mpc_also( + mpc_either(mpc_alpha(), mpc_underscore()), + mpc_many1(mpc_or(3, mpc_alpha(), mpc_underscore(), mpc_digit()), mpcf_strfold), + free, mpcf_strfold + ), + free) + ); + + mpc_print(Ident); + + mpc_test(Ident, "test", "test", string_eq, free, string_print); + mpc_test(Ident, " blah", "", string_eq, free, string_print); + mpc_test(Ident, "anoth21er", "anoth21er", string_eq, free, string_print); + mpc_test(Ident, "du__de", "du__de", string_eq, free, string_print); + mpc_test(Ident, "some spaces", "", string_eq, free, string_print); + mpc_test(Ident, "", "", string_eq, free, string_print); + mpc_test(Ident, "18nums", "", string_eq, free, string_print); + + mpc_delete(Ident); + + return true; +} diff --git a/tests/math.c b/tests/math.c new file mode 100644 index 0000000..d13194e --- /dev/null +++ b/tests/math.c @@ -0,0 +1,56 @@ +#include "../mpc.h" + +#include +#include + +static bool int_eq(void* x, void* y) { + return (*(int*)x == *(int*)y); +} + +static void int_print(void* x) { + printf("'%i'", *((int*)x)); +} + +bool suite_math(void) { + + mpc_parser_t* Expr = mpc_new(); + mpc_parser_t* Factor = mpc_new(); + mpc_parser_t* Term = mpc_new(); + mpc_parser_t* Maths = mpc_new(); + + mpc_define(Expr, mpc_either( + mpc_and(3, mpcf_maths, Factor, mpc_oneof("*/"), Factor, free, free), + Factor + )); + + mpc_define(Factor, mpc_either( + mpc_and(3, mpcf_maths, Term, mpc_oneof("+-"), Term, free, free), + Term + )); + + mpc_define(Term, mpc_either( + mpc_int(), + mpc_parens(Expr, free) + )); + + mpc_define(Maths, mpc_ends(Expr, free)); + + mpc_print(Expr); + mpc_print(Factor); + mpc_print(Term); + mpc_print(Maths); + + mpc_test(Maths, "1", (int[]){ 1 }, int_eq, free, int_print); + mpc_test(Maths, "(5)", (int[]){ 5 }, int_eq, free, int_print); + mpc_test(Maths, "(4*2)+5", (int[]){ 13 }, int_eq, free, int_print); + mpc_test(Maths, "a", (int[]){ 0 }, int_eq, free, int_print); + mpc_test(Maths, "2b+4", (int[]){ 2 }, int_eq, free, int_print); + + mpc_delete(Expr); + mpc_delete(Factor); + mpc_delete(Term); + mpc_delete(Maths); + + return true; + +} diff --git a/tests/regex.c b/tests/regex.c new file mode 100644 index 0000000..a700099 --- /dev/null +++ b/tests/regex.c @@ -0,0 +1,34 @@ +#include "../mpc.h" + +#include + +/* +static bool string_eq(void* x, void* y) { return (strcmp(x, y) == 0); } +static void string_print(void* x) { printf("'%s'", (char*)x); } +*/ + +bool suite_regex(void) { + + mpc_parser_t* re0 = mpc_re("abc|bcd"); + mpc_parser_t* re1 = mpc_re("abc|bcd|e"); + mpc_parser_t* re2 = mpc_re("abc(ab)*"); + mpc_parser_t* re3 = mpc_re("abc(abdd)?"); + mpc_parser_t* re4 = mpc_re("ab|c(abdd)?"); + mpc_parser_t* re5 = mpc_re("abc(ab|dd)+g$"); + + mpc_print(re0); + mpc_print(re1); + mpc_print(re2); + mpc_print(re3); + mpc_print(re4); + mpc_print(re5); + + mpc_delete(re0); + mpc_delete(re1); + mpc_delete(re2); + mpc_delete(re3); + mpc_delete(re4); + mpc_delete(re5); + + return true; +} \ No newline at end of file diff --git a/tests/test.c b/tests/test.c new file mode 100644 index 0000000..3346f7f --- /dev/null +++ b/tests/test.c @@ -0,0 +1,15 @@ +#include + +bool suite_ident(void); +bool suite_math(void); +bool suite_regex(void); + +int main(int argc, char** argv) { + + suite_ident(); + suite_math(); + suite_regex(); + + return 0; + +} \ No newline at end of file