WIP input stuff

This commit is contained in:
Daniel Holden
2013-10-06 15:46:23 +01:00
parent 57064c1089
commit 2325c2bbe9
4 changed files with 208 additions and 62 deletions

View File

@@ -270,12 +270,12 @@ Returns a parser that applies function `f` (optionality taking extra input `x`)
* * *
```c
mpc_parser_t* mpc_predict(mpc_parser_t* a);
mpc_parser_t* mpc_predictive(mpc_parser_t* a);
```
Returns a parser that runs `a` with backtracking disabled. This means if `a` consumes any input, it will not be reverted, even on failure. Turning backtracking off has good performance benefits for grammars which are `LL(1)`. These are grammars where the first character completely determines the parse result - such as the decision of parsing either a C identifier, number, or string literal. This option should not be used for non `LL(1)` grammars or it will produce incorrect results or crash the parser.
Another way to think of `mpc_predict` is that it can be applied to a parser (for a performance improvement) if either successfully parsing the first character will result in a completely successful parse, or all of the referenced sub-parsers are also `LL(1)`.
Another way to think of `mpc_predictive` is that it can be applied to a parser (for a performance improvement) if either successfully parsing the first character will result in a completely successful parse, or all of the referenced sub-parsers are also `LL(1)`.
* * *

View File

@@ -4,7 +4,4 @@
- Find some good demo grammars to show
- Use while loop instead of recursion in parse function
- Support for stream based input
- Make RE stuff predictive where possible
- Support for stream based input

259
mpc.c
View File

@@ -1,7 +1,7 @@
#include "mpc.h"
#ifndef snprintf
int snprintf(char* str, size_t size, const char* fmt, ...) {
#ifndef _WIN32
static int snprintf(char* str, size_t size, const char* fmt, ...) {
int x;
va_list va;
va_start(va, fmt);
@@ -9,10 +9,8 @@ int snprintf(char* str, size_t size, const char* fmt, ...) {
va_end(va);
return x;
}
#endif
#ifndef vsnprintf
int vsnprintf(char* str, size_t size, const char* fmt, va_list args) {
static int vsnprintf(char* str, size_t size, const char* fmt, va_list args) {
return vsprintf(str, fmt, args);
}
#endif
@@ -339,77 +337,236 @@ char mpc_err_unexpected(mpc_err_t* x) {
** Input Type
*/
/*
** In mpc the input type has three modes of
** operation: String, File and Pipe.
**
** String is easy. The whole contents are
** loaded into a buffer and scanned through.
** The cursor can jump around at will making
** backtracking easy.
**
** The second is a File which is also somewhat
** easy. The contents are never loaded into
** memory but backtracking can still be achieved
** by seeking in the file at different positions.
**
** The final mode is Pipe. This is the difficult
** one. As we assume pipes cannot be seeked - and
** only support a single character lookahead at
** any point where the input is marked for a
** potential backtracking we start buffering any
** input.
**
** This means that if we are requested to seek
** back we can simply start reading from the
** buffer instead of the input.
**
** Of course using `mpc_predictive` will disable
** backtracking and make LL(1) grammars easy
** to parse for all input methods.
**
*/
enum {
MPC_INPUT_STRING = 0,
MPC_INPUT_FILE = 1,
MPC_INPUT_PIPE = 2
};
typedef struct {
char* filename;
char* str;
int type;
mpc_state_t state;
char* str;
FILE* file;
int backtrack;
int marks_num;
mpc_state_t* marks;
mpc_state_t* marks_state;
char** marks_buff;
} mpc_input_t;
static mpc_input_t* mpc_input_new(const char* filename, const char* str) {
static mpc_input_t* mpc_input_new_string(const char* filename, const char* str) {
mpc_input_t* i = malloc(sizeof(mpc_input_t));
i->filename = malloc(strlen(filename) + 1);
strcpy(i->filename, filename);
i->type = MPC_INPUT_STRING;
i->state = mpc_state_null();
i->str = malloc(strlen(str) + 1);
strcpy(i->str, str);
i->buffer = NULL;
i->file = NULL;
i->backtrack = 1;
i->marks_num = 0;
i->marks_state = NULL;
i->marks_buff = NULL;
return i;
}
static mpc_input_t* mpc_input_new_file(const char* filename, FILE* f) {
mpc_input_t* i = malloc(sizeof(mpc_input_t));
i->filename = malloc(strlen(filename) + 1);
strcpy(i->filename, filename);
i->state.next = i->str[0];
i->state.last = '\0';
i->state.pos = 0;
i->state.row = 0;
i->state.col = 0;
if (fseek(f, 0, SEEK_CUR) != 0) {
i->type = MPC_INPUT_PIPE;
} else {
i->type = MPC_INPUT_FILE;
}
i->state = mpc_state_null();
i->str = NULL;
i->buffer = NULL;
i->file = f;
i->backtrack = 1;
i->marks_num = 0;
i->marks = NULL;
return i;
}
static void mpc_input_delete(mpc_input_t* i) {
int j;
free(i->filename);
free(i->str);
free(i->marks);
if (i->type == MPC_INPUT_STRING) { free(i->str); }
if (i->type == MPC_INPUT_PIPE) { free(i->buffer); }
free(i->marks)
free(i);
}
static void mpc_input_backtrack_disable(mpc_input_t* i) {
i->backtrack = 0;
}
static void mpc_input_backtrack_enable(mpc_input_t* i) {
i->backtrack = 1;
}
static void mpc_input_backtrack_disable(mpc_input_t* i) { i->backtrack = 0; }
static void mpc_input_backtrack_enable(mpc_input_t* i) { i->backtrack = 1; }
static void mpc_input_mark(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->marks_num++;
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
i->marks[i->marks_num-1] = i->state;
if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) {
i->buffer = calloc(1, 1);
}
}
static void mpc_input_unmark(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->marks_num--;
i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num);
if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) {
free(i->buffer);
i->buffer = NULL;
}
}
static void mpc_input_rewind(mpc_input_t* i) {
if (!i->backtrack) { return; }
i->state = i->marks[i->marks_num-1];
if (i->type == MPC_INPUT_FILE) {
fseek(f, i->state.pos, SEEK_SET);
}
mpc_input_unmark(i);
}
static int mpc_input_next(mpc_input_t* i, char** o) {
static int mpc_input_buffer_in_range(mpc_input_t* i) {
return i->state.pos < (strlen(i->buffer) + i->marks[0].state.pos);
}
i->state.last = i->str[i->state.pos];
static char mpc_input_buffer_get(mpc_input_t* i) {
return i->buffer[i->state.pos - i->marks[0].state.pos];
}
static int mpc_input_eoi(mpc_input_t* i) {
if (i->type == MPC_INPUT_STRING && i->state.pos == strlen(i->str)) { return 1; }
if (i->type == MPC_INPUT_FILE && feof(i->file)) { return 1; }
if (i->type == MPC_INPUT_PIPE && feof(i->file)) { return 1; }
return 0;
}
static int mpc_input_soi(mpc_input_t* i) {
if (i->state.pos == 0) { return 1; }
return 0;
}
static char mpc_input_getc(mpc_input_t* i) {
char c;
switch (i->type) {
case MPC_INPUT_STRING: c = i->str[i->state.pos]; break;
case MPC_INPUT_FILE: c = fgetc(i->file); break;
case MPC_INPUT_PIPE:
if (!i->buffer) { c = getc(i->file); }
if (i->buffer && mpc_input_buffer_in_range(i)) {
c = mpc_input_buffer_get(i);
} else {
c = getc(i->file);
}
break;
}
return c;
}
static void mpc_input_ungetc(mpc_input_t* i) {
switch (i->type) {
case MPC_INPUT_STRING: break;
case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); break;
case MPC_INPUT_PIPE: ungetc(i->file, c); break;
}
}
static int mpc_input_failure(mpc_input_t* i, char c, char** o) {
*o = NULL;
i->state.next = c;
return 0;
}
static int mpc_input_success(mpc_input_t* i, char c, char** o) {
if (i->type == MPC_INPUT_PIPE &&
i->buffer &&
!mpc_input_buffer_in_range(i)) {
i->buffer = realloc(strlen(i->buffer) + 2);
i->buffer[strlen(i->buffer) + 1] = '\0';
i->buffer[strlen(i->buffer) + 0] = c;
}
i->state.last = c;
i->state.pos++;
i->state.col++;
@@ -419,33 +576,26 @@ static int mpc_input_next(mpc_input_t* i, char** o) {
}
(*o) = malloc(2);
(*o)[0] = i->state.last;
(*o)[0] = c;
(*o)[1] = '\0';
return 1;
}
static int mpc_input_any(mpc_input_t* i, char** o) {
if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return 0; }
if (i->str[i->state.pos] == '\0') {
i->state.next = i->str[i->state.pos];
return 0;
}
return mpc_input_next(i, o);
if (mpc_input_eoi(i)) { return mpc_input_failure(i, '\0', o); }
return mpc_input_update(i, mpc_input_getc(i), o);
}
static int mpc_input_char(mpc_input_t* i, char c, char** o) {
if (i->state.pos >= strlen(i->str)) { i->state.next = '\0'; return 0; }
if (i->str[i->state.pos] != c) {
i->state.next = i->str[i->state.pos];
return 0;
}
char x;
if (mpc_input_eoi(i)) { return mpc_input_failure(i, '\0', o); }
return mpc_input_next(i, o);
x = mpc_input_getc(i);
if (x != c) { mpc_input_ungetc(i); return mpc_input_failure(i, x, o); }
return mpc_input_success(i, x, o);
}
@@ -659,8 +809,8 @@ static void mpc_stack_delete(mpc_stack_t* s) {
static int mpc_stack_terminate(mpc_stack_t* s, mpc_result_t* r) {
int ret;
if (s->parsers_num != 0) { fprintf(stderr, "Still Parsers on stack!\n"); abort(); }
if (s->results_num != 1) { fprintf(stderr, "Still Results on stack!\n"); abort(); }
if (s->parsers_num != 0) { fprintf(stderr, "Fatal Error: Still Parsers on stack!\n"); abort(); }
if (s->results_num != 1) { fprintf(stderr, "Fatal Error: Still Results on stack!\n"); abort(); }
*r = s->results[0];
ret = s->returns[0];
mpc_stack_delete(s);
@@ -844,9 +994,10 @@ static mpc_err_t* mpc_stack_merger_err(mpc_stack_t* s, int n) {
** When this function was written in recursive form
** it looked pretty nice. But I've since switched
** it around to an akward while loop. It was an
** unfortunate change but in the name of performance
** (and not smashing the stack).
** unfortunate change but if was a noble attempt
** in the name of performance (and not smashing the stack).
**
** But it is now a pretty ugly beast...
*/
#define MPC_RETURN(st, x) mpc_stack_set_state(stk, st); mpc_stack_pushp(stk, x); continue
@@ -875,10 +1026,9 @@ int mpc_parse_input(mpc_input_t* i, mpc_parser_t* init, mpc_result_t* final) {
switch (p->type) {
case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, "Parser Undefined!"));
/* Trivial Parsers */
case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, "Parser Undefined!"));
case MPC_TYPE_PASS: MPC_SUCCESS(NULL);
case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_new_fail(i->filename, i->state, p->data.fail.m));
case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf());
@@ -888,7 +1038,6 @@ int mpc_parse_input(mpc_input_t* i, mpc_parser_t* init, mpc_result_t* final) {
case MPC_TYPE_SOI: MPC_FUNCTION(NULL, mpc_input_soi(i));
case MPC_TYPE_EOI: MPC_FUNCTION(NULL, mpc_input_eoi(i));
case MPC_TYPE_ANY: MPC_FUNCTION(s, mpc_input_any(i, &s));
case MPC_TYPE_SINGLE: MPC_FUNCTION(s, mpc_input_char(i, p->data.single.x, &s));
case MPC_TYPE_RANGE: MPC_FUNCTION(s, mpc_input_range(i, p->data.range.x, p->data.range.y, &s));
@@ -1510,7 +1659,7 @@ mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x) {
return p;
}
mpc_parser_t* mpc_predict(mpc_parser_t* a) {
mpc_parser_t* mpc_predictive(mpc_parser_t* a) {
mpc_parser_t* p = mpc_undefined();
p->type = MPC_TYPE_PREDICT;
p->data.predict.x = a;
@@ -1800,7 +1949,7 @@ mpc_parser_t* mpc_tok_squares(mpc_parser_t* a, mpc_dtor_t ad) { return mpc_tok_
**
** ### Regular Expression Grammar
**
** <regex> : (<term> "|" <regex>) | <term>
** <regex> : <term> | (<term> "|" <regex>)
**
** <term> : <factor>*
**
@@ -1988,7 +2137,7 @@ mpc_parser_t* mpc_re(const char* re) {
mpc_re_range
));
RegexEnclose = mpc_enclose(mpc_predict(Regex), (mpc_dtor_t)mpc_delete);
RegexEnclose = mpc_enclose(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete);
if(!mpc_parse("<mpc_re_compiler>", re, RegexEnclose, &r)) {
err_msg = mpc_err_string_new(r.error);
@@ -2839,7 +2988,7 @@ mpc_parser_t* mpca_grammar_st(const char* grammar, mpca_grammar_st_t* st) {
Base = mpc_new("base");
mpc_define(GrammarTotal,
mpc_apply(mpc_predict(mpc_total(Grammar, mpc_soft_delete)), mpcf_make_root)
mpc_apply(mpc_predictive(mpc_total(Grammar, mpc_soft_delete)), mpcf_make_root)
);
mpc_define(Grammar, mpc_also(
@@ -2990,7 +3139,7 @@ static mpc_err_t* mpca_lang_st(const char* language, mpca_grammar_st_t* st) {
Base = mpc_new("base");
mpc_define(Lang, mpc_apply_to(
mpc_total(mpc_predict(mpc_many(Stmt, mpca_stmt_fold)), mpca_stmt_list_delete),
mpc_total(mpc_predictive(mpc_many(Stmt, mpca_stmt_fold)), mpca_stmt_list_delete),
mpca_stmt_list_apply_to, st
));

2
mpc.h
View File

@@ -102,7 +102,7 @@ mpc_parser_t* mpc_string(const char* s);
mpc_parser_t* mpc_expect(mpc_parser_t* a, const char* expected);
mpc_parser_t* mpc_apply(mpc_parser_t* a, mpc_apply_t f);
mpc_parser_t* mpc_apply_to(mpc_parser_t* a, mpc_apply_to_t f, void* x);
mpc_parser_t* mpc_predict(mpc_parser_t* a);
mpc_parser_t* mpc_predictive(mpc_parser_t* a);
mpc_parser_t* mpc_not(mpc_parser_t* a, mpc_dtor_t da);
mpc_parser_t* mpc_not_else(mpc_parser_t* a, mpc_dtor_t da, mpc_lift_t lf);
mpc_parser_t* mpc_maybe(mpc_parser_t* a);