From fbc5d8f9ae7fe85c44fadf9987f93a69b60a718a Mon Sep 17 00:00:00 2001 From: Daniel Holden Date: Sat, 13 Oct 2018 18:27:42 -0400 Subject: [PATCH] Added line-reader example. Changed behaviour of eof on regex to parse either eof or a newline followed be eof (better matchers other regex engines). --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++-- examples/line_reader.c | 34 +++++++++++++++++++++++ mpc.c | 7 ++++- tests/core.c | 12 ++++++++ 4 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 examples/line_reader.c diff --git a/README.md b/README.md index e5f387d..8a7cd93 100644 --- a/README.md +++ b/README.md @@ -684,9 +684,9 @@ Case Study - Maths Language Combinator Approach ------------------- -Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own ouput types for parsers. For example we could design our own syntax tree type to use. We can also use this method to do some specific house-keeping or data processing in the parsing phase. +Passing around all these function pointers might seem clumsy, but having parsers be type-generic is important as it lets users define their own output types for parsers. For example we could design our own syntax tree type to use. We can also use this method to do some specific house-keeping or data processing in the parsing phase. -As an example of this power, we can specify a simple maths grammar, that ouputs `int *`, and computes the result of the expression as it goes along. +As an example of this power, we can specify a simple maths grammar, that outputs `int *`, and computes the result of the expression as it goes along. We start with a fold function that will fold two `int *` into a new `int *` based on some `char *` operator. @@ -807,6 +807,64 @@ mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`. +Case Study - Line Reader +======================== + +Another common task we might be interested in doing is parsing a file line by line and doing something on each line we encounter. For this we can setup something like the following: + +First, we can build a regular expression which parses a single line: `mpc_re("[^\\n]*(\\n|$)")`, next we can add a callback function using `mpc_apply` which gets called every time a line is parsed successfully `mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)`. Finally we can surround all of this in `mpc_many` to parse zero or more lines. The final thing might look something like this: + +```c +static void* read_line(void* line) { + printf("Reading Line: %s", (char*)line); + return line; +} + +int main(int argc, char **argv) { + + const char *input = + "abcHVwufvyuevuy3y436782\n" + "\n" + "\n" + "rehre\n" + "rew\n" + "-ql.;qa\n" + "eg"; + + mpc_parser_t* Line = mpc_many( + mpcf_strfold, + mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); + + mpc_result_t r; + + mpc_parse("input", input, Line, &r); + printf("\nParsed String: %s", (char*)r.output); + free(r.output); + + mpc_delete(Line); + + return 0; +} +``` + +This program will produce an output something like this: + +``` +Reading Line: abcHVwufvyuevuy3y436782 +Reading Line: +Reading Line: +Reading Line: rehre +Reading Line: rew +Reading Line: -ql.;qa +Reading Line: eg +Parsed String: abcHVwufvyuevuy3y436782 + + +rehre +rew +-ql.;qa +eg +``` Error Reporting =============== diff --git a/examples/line_reader.c b/examples/line_reader.c new file mode 100644 index 0000000..1354d42 --- /dev/null +++ b/examples/line_reader.c @@ -0,0 +1,34 @@ +#include "../mpc.h" + +static void* read_line(void* line) { + printf("Reading Line: %s", (char*)line); + return line; +} + +int main(int argc, char **argv) { + + const char *input = + "abcHVwufvyuevuy3y436782\n" + "\n" + "\n" + "rehre\n" + "rew\n" + "-ql.;qa\n" + "eg"; + + mpc_parser_t* Line = mpc_many( + mpcf_strfold, + mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)); + + mpc_result_t r; + + (void)argc; (void)argv; + + mpc_parse("input", input, Line, &r); + printf("\nParsed String: %s", (char*)r.output); + free(r.output); + + mpc_delete(Line); + + return 0; +} \ No newline at end of file diff --git a/mpc.c b/mpc.c index f19371a..69540a6 100644 --- a/mpc.c +++ b/mpc.c @@ -2200,7 +2200,12 @@ static mpc_val_t *mpcf_re_escape(mpc_val_t *x) { /* Regex Special Characters */ if (s[0] == '.') { free(s); return mpc_any(); } if (s[0] == '^') { free(s); return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); } - if (s[0] == '$') { free(s); return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); } + if (s[0] == '$') { + free(s); + return mpc_or(2, + mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } /* Regex Escape */ if (s[0] == '\\') { diff --git a/tests/core.c b/tests/core.c index 3a4bd90..bd450cf 100644 --- a/tests/core.c +++ b/tests/core.c @@ -185,6 +185,17 @@ void test_reader(void) { } +void test_eoi(void) { + + mpc_parser_t* Line = mpc_re("[^\\n]*$"); + + PT_ASSERT(mpc_test_pass(Line, "blah", "blah", streq, free, strprint)); + PT_ASSERT(mpc_test_pass(Line, "blah\n", "blah\n", streq, free, strprint)); + + mpc_delete(Line); + +} + void suite_core(void) { pt_add_test(test_ident, "Test Ident", "Suite Core"); pt_add_test(test_maths, "Test Maths", "Suite Core"); @@ -192,4 +203,5 @@ void suite_core(void) { pt_add_test(test_repeat, "Test Repeat", "Suite Core"); pt_add_test(test_copy, "Test Copy", "Suite Core"); pt_add_test(test_reader, "Test Reader", "Suite Core"); + pt_add_test(test_eoi, "Test EOI", "Suite Core"); }