Added mode option to regex and also changed example from a line reader to a tokenizer.
This commit is contained in:
90
README.md
90
README.md
@@ -560,6 +560,20 @@ This function makes a copy of a parser `a`. This can be useful when you want to
|
|||||||
use a parser as input for some other parsers multiple times without retaining
|
use a parser as input for some other parsers multiple times without retaining
|
||||||
it.
|
it.
|
||||||
|
|
||||||
|
* * *
|
||||||
|
|
||||||
|
```c
|
||||||
|
mpc_parser_t *mpc_re(const char *re);
|
||||||
|
mpc_parser_t *mpc_re_mode(const char *re, int mode);
|
||||||
|
```
|
||||||
|
|
||||||
|
This function takes as input the regular expression `re` and builds a parser
|
||||||
|
for it. With the `mpc_re_mode` function optional mode flags can also be given.
|
||||||
|
Available flags are `MPC_RE_MULTILINE` / `MPC_RE_M` where the start of input
|
||||||
|
character `^` also matches the beginning of new lines and the end of input `$`
|
||||||
|
character also matches new lines, and `MPC_RE_DOTALL` / `MPC_RE_S` where the
|
||||||
|
any character token `.` also matches newlines (by default it doesn't).
|
||||||
|
|
||||||
|
|
||||||
Library Reference
|
Library Reference
|
||||||
=================
|
=================
|
||||||
@@ -573,6 +587,7 @@ Common Parsers
|
|||||||
<tr><td><code>mpc_soi</code></td><td>Matches only the start of input, returns <code>NULL</code></td></tr>
|
<tr><td><code>mpc_soi</code></td><td>Matches only the start of input, returns <code>NULL</code></td></tr>
|
||||||
<tr><td><code>mpc_eoi</code></td><td>Matches only the end of input, returns <code>NULL</code></td></tr>
|
<tr><td><code>mpc_eoi</code></td><td>Matches only the end of input, returns <code>NULL</code></td></tr>
|
||||||
<tr><td><code>mpc_boundary</code></td><td>Matches only the boundary between words, returns <code>NULL</code></td></tr>
|
<tr><td><code>mpc_boundary</code></td><td>Matches only the boundary between words, returns <code>NULL</code></td></tr>
|
||||||
|
<tr><td><code>mpc_boundary_newline</code></td><td>Matches the start of a new line, returns <code>NULL</code></td></tr>
|
||||||
<tr><td><code>mpc_whitespace</code></td><td>Matches any whitespace character <code>" \f\n\r\t\v"</code></td></tr>
|
<tr><td><code>mpc_whitespace</code></td><td>Matches any whitespace character <code>" \f\n\r\t\v"</code></td></tr>
|
||||||
<tr><td><code>mpc_whitespaces</code></td><td>Matches zero or more whitespace characters</td></tr>
|
<tr><td><code>mpc_whitespaces</code></td><td>Matches zero or more whitespace characters</td></tr>
|
||||||
<tr><td><code>mpc_blank</code></td><td>Matches whitespaces and frees the result, returns <code>NULL</code></td></tr>
|
<tr><td><code>mpc_blank</code></td><td>Matches whitespaces and frees the result, returns <code>NULL</code></td></tr>
|
||||||
@@ -807,65 +822,64 @@ mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...);
|
|||||||
|
|
||||||
This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`.
|
This opens and reads in the contents of the file given by `filename` and passes it to `mpca_lang`.
|
||||||
|
|
||||||
Case Study - Line Reader
|
Case Study - Tokenizer
|
||||||
========================
|
======================
|
||||||
|
|
||||||
Another common task we might be interested in doing is parsing a file line by line and doing something on each line we encounter. For this we can setup something like the following:
|
Another common task we might be interested in doing is tokenizing some block of
|
||||||
|
text (splitting the text into individual elements) and performing some function
|
||||||
|
on each one of these elements as it is read. We can do this with `mpc` too.
|
||||||
|
|
||||||
First, we can build a regular expression which parses a single line: `mpc_re("[^\\n]*(\\n|$)")`, next we can add a callback function using `mpc_apply` which gets called every time a line is parsed successfully `mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line)`. Finally we can surround all of this in `mpc_many` to parse zero or more lines. The final thing might look something like this:
|
First, we can build a regular expression which parses an individual token. For
|
||||||
|
example if our tokens are identifiers, integers, commas, periods and colons we
|
||||||
|
could build something like this `mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")`.
|
||||||
|
Next we can strip any whitespace, and add a callback function using `mpc_apply`
|
||||||
|
which gets called every time this regex is parsed successfully
|
||||||
|
`mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)`.
|
||||||
|
Finally we can surround all of this in `mpc_many` to parse it zero or more
|
||||||
|
times. The final code might look something like this:
|
||||||
|
|
||||||
```c
|
```c
|
||||||
static void* read_line(void* line) {
|
static mpc_val_t *print_token(mpc_val_t *x) {
|
||||||
printf("Reading Line: %s", (char*)line);
|
printf("Token: '%s'\n", (char*)x);
|
||||||
return line;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
const char *input =
|
const char *input = " hello 4352 , \n foo.bar \n\n test:ing ";
|
||||||
"abcHVwufvyuevuy3y436782\n"
|
|
||||||
"\n"
|
|
||||||
"\n"
|
|
||||||
"rehre\n"
|
|
||||||
"rew\n"
|
|
||||||
"-ql.;qa\n"
|
|
||||||
"eg";
|
|
||||||
|
|
||||||
mpc_parser_t* Line = mpc_many(
|
mpc_parser_t* Tokens = mpc_many(
|
||||||
mpcf_strfold,
|
mpcf_all_free,
|
||||||
mpc_apply(mpc_re("[^\\n]*(\\n|$)"), read_line));
|
mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token));
|
||||||
|
|
||||||
mpc_result_t r;
|
mpc_result_t r;
|
||||||
|
mpc_parse("input", input, Tokens, &r);
|
||||||
|
|
||||||
mpc_parse("input", input, Line, &r);
|
mpc_delete(Tokens);
|
||||||
printf("\nParsed String: %s", (char*)r.output);
|
|
||||||
free(r.output);
|
|
||||||
|
|
||||||
mpc_delete(Line);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
This program will produce an output something like this:
|
Running this program will produce an output something like this:
|
||||||
|
|
||||||
```
|
```
|
||||||
Reading Line: abcHVwufvyuevuy3y436782
|
Token: 'hello'
|
||||||
Reading Line:
|
Token: '4352'
|
||||||
Reading Line:
|
Token: ','
|
||||||
Reading Line: rehre
|
Token: 'foo'
|
||||||
Reading Line: rew
|
Token: '.'
|
||||||
Reading Line: -ql.;qa
|
Token: 'bar'
|
||||||
Reading Line: eg
|
Token: 'test'
|
||||||
Parsed String: abcHVwufvyuevuy3y436782
|
Token: ':'
|
||||||
|
Token: 'ing'
|
||||||
|
|
||||||
rehre
|
|
||||||
rew
|
|
||||||
-ql.;qa
|
|
||||||
eg
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
By extending the regex we can easily extend this to parse many more types of
|
||||||
|
tokens and quickly and easily build a tokenizer for whatever language we are
|
||||||
|
interested in.
|
||||||
|
|
||||||
|
|
||||||
Error Reporting
|
Error Reporting
|
||||||
===============
|
===============
|
||||||
|
|
||||||
|
73
mpc.c
73
mpc.c
@@ -1979,7 +1979,13 @@ static int mpc_boundary_anchor(char prev, char next) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "boundary"); }
|
static int mpc_boundary_newline_anchor(char prev, char next) {
|
||||||
|
(void)next;
|
||||||
|
return prev == '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); }
|
||||||
|
mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); }
|
||||||
|
|
||||||
mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); }
|
mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); }
|
||||||
mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); }
|
mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); }
|
||||||
@@ -2192,20 +2198,45 @@ static mpc_parser_t *mpc_re_escape_char(char c) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static mpc_val_t *mpcf_re_escape(mpc_val_t *x) {
|
static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) {
|
||||||
|
|
||||||
|
int mode = *((int*)data);
|
||||||
char *s = x;
|
char *s = x;
|
||||||
mpc_parser_t *p;
|
mpc_parser_t *p;
|
||||||
|
|
||||||
/* Regex Special Characters */
|
/* Any Character */
|
||||||
if (s[0] == '.') { free(s); return mpc_any(); }
|
if (s[0] == '.') {
|
||||||
if (s[0] == '^') { free(s); return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); }
|
free(s);
|
||||||
|
if (mode & MPC_RE_DOTALL) {
|
||||||
|
return mpc_any();
|
||||||
|
} else {
|
||||||
|
return mpc_expect(mpc_noneof("\n"), "any character except a newline");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Start of Input */
|
||||||
|
if (s[0] == '^') {
|
||||||
|
free(s);
|
||||||
|
if (mode & MPC_RE_MULTILINE) {
|
||||||
|
return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free);
|
||||||
|
} else {
|
||||||
|
return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* End of Input */
|
||||||
if (s[0] == '$') {
|
if (s[0] == '$') {
|
||||||
free(s);
|
free(s);
|
||||||
|
if (mode & MPC_RE_MULTILINE) {
|
||||||
|
return mpc_or(2,
|
||||||
|
mpc_newline(),
|
||||||
|
mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free));
|
||||||
|
} else {
|
||||||
return mpc_or(2,
|
return mpc_or(2,
|
||||||
mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free),
|
mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free),
|
||||||
mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free));
|
mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Regex Escape */
|
/* Regex Escape */
|
||||||
if (s[0] == '\\') {
|
if (s[0] == '\\') {
|
||||||
@@ -2302,6 +2333,10 @@ static mpc_val_t *mpcf_re_range(mpc_val_t *x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mpc_parser_t *mpc_re(const char *re) {
|
mpc_parser_t *mpc_re(const char *re) {
|
||||||
|
return mpc_re_mode(re, MPC_RE_DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
mpc_parser_t *mpc_re_mode(const char *re, int mode) {
|
||||||
|
|
||||||
char *err_msg;
|
char *err_msg;
|
||||||
mpc_parser_t *err_out;
|
mpc_parser_t *err_out;
|
||||||
@@ -2334,8 +2369,8 @@ mpc_parser_t *mpc_re(const char *re) {
|
|||||||
mpc_define(Base, mpc_or(4,
|
mpc_define(Base, mpc_or(4,
|
||||||
mpc_parens(Regex, (mpc_dtor_t)mpc_delete),
|
mpc_parens(Regex, (mpc_dtor_t)mpc_delete),
|
||||||
mpc_squares(Range, (mpc_dtor_t)mpc_delete),
|
mpc_squares(Range, (mpc_dtor_t)mpc_delete),
|
||||||
mpc_apply(mpc_escape(), mpcf_re_escape),
|
mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode),
|
||||||
mpc_apply(mpc_noneof(")|"), mpcf_re_escape)
|
mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode)
|
||||||
));
|
));
|
||||||
|
|
||||||
mpc_define(Range, mpc_apply(
|
mpc_define(Range, mpc_apply(
|
||||||
@@ -3320,7 +3355,7 @@ mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_
|
|||||||
** <base> : "<" (<digits> | <ident>) ">"
|
** <base> : "<" (<digits> | <ident>) ">"
|
||||||
** | <string_lit>
|
** | <string_lit>
|
||||||
** | <char_lit>
|
** | <char_lit>
|
||||||
** | <regex_lit>
|
** | <regex_lit> <regex_mode>
|
||||||
** | "(" <grammar> ")"
|
** | "(" <grammar> ")"
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -3379,11 +3414,21 @@ static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) {
|
|||||||
return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char"));
|
return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static mpc_val_t *mpcaf_grammar_regex(mpc_val_t *x, void *s) {
|
static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) {
|
||||||
mpca_grammar_st_t *st = s;
|
char *y = xs[0];
|
||||||
char *y = mpcf_unescape_regex(x);
|
char *m = xs[1];
|
||||||
mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re(y) : mpc_tok(mpc_re(y));
|
mpca_grammar_st_t *st = xs[2];
|
||||||
|
mpc_parser_t *p;
|
||||||
|
int mode = MPC_RE_DEFAULT;
|
||||||
|
|
||||||
|
(void)n;
|
||||||
|
if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; }
|
||||||
|
if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; }
|
||||||
|
y = mpcf_unescape_regex(y);
|
||||||
|
p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode));
|
||||||
free(y);
|
free(y);
|
||||||
|
free(m);
|
||||||
|
|
||||||
return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex"));
|
return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3496,7 +3541,7 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) {
|
|||||||
mpc_define(Base, mpc_or(5,
|
mpc_define(Base, mpc_or(5,
|
||||||
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
||||||
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
||||||
mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st),
|
mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)),
|
||||||
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
||||||
mpc_tok_parens(Grammar, mpc_soft_delete)
|
mpc_tok_parens(Grammar, mpc_soft_delete)
|
||||||
));
|
));
|
||||||
@@ -3658,7 +3703,7 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) {
|
|||||||
mpc_define(Base, mpc_or(5,
|
mpc_define(Base, mpc_or(5,
|
||||||
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st),
|
||||||
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st),
|
||||||
mpc_apply_to(mpc_tok(mpc_regex_lit()), mpcaf_grammar_regex, st),
|
mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)),
|
||||||
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st),
|
||||||
mpc_tok_parens(Grammar, mpc_soft_delete)
|
mpc_tok_parens(Grammar, mpc_soft_delete)
|
||||||
));
|
));
|
||||||
|
10
mpc.h
10
mpc.h
@@ -156,6 +156,7 @@ mpc_parser_t *mpc_eoi(void);
|
|||||||
mpc_parser_t *mpc_soi(void);
|
mpc_parser_t *mpc_soi(void);
|
||||||
|
|
||||||
mpc_parser_t *mpc_boundary(void);
|
mpc_parser_t *mpc_boundary(void);
|
||||||
|
mpc_parser_t *mpc_boundary_newline(void);
|
||||||
|
|
||||||
mpc_parser_t *mpc_whitespace(void);
|
mpc_parser_t *mpc_whitespace(void);
|
||||||
mpc_parser_t *mpc_whitespaces(void);
|
mpc_parser_t *mpc_whitespaces(void);
|
||||||
@@ -264,7 +265,16 @@ mpc_val_t *mpcf_maths(int n, mpc_val_t** xs);
|
|||||||
** Regular Expression Parsers
|
** Regular Expression Parsers
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MPC_RE_DEFAULT = 0,
|
||||||
|
MPC_RE_M = 1,
|
||||||
|
MPC_RE_S = 2,
|
||||||
|
MPC_RE_MULTILINE = 1,
|
||||||
|
MPC_RE_DOTALL = 2
|
||||||
|
};
|
||||||
|
|
||||||
mpc_parser_t *mpc_re(const char *re);
|
mpc_parser_t *mpc_re(const char *re);
|
||||||
|
mpc_parser_t *mpc_re_mode(const char *re, int mode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** AST
|
** AST
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mpc",
|
"name": "mpc",
|
||||||
"version": "0.8.8",
|
"version": "0.9.8",
|
||||||
"repo": "orangeduck/mpc",
|
"repo": "orangeduck/mpc",
|
||||||
"description": "A Parser Combinator library for C",
|
"description": "A Parser Combinator library for C",
|
||||||
"keywords": ["parser", "combinator", "library", "c", "mpc"],
|
"keywords": ["parser", "combinator", "library", "c", "mpc"],
|
||||||
|
29
tests/core.c
29
tests/core.c
@@ -154,7 +154,7 @@ void test_copy(void) {
|
|||||||
|
|
||||||
static int line_count = 0;
|
static int line_count = 0;
|
||||||
|
|
||||||
static void* read_line(void* line) {
|
static mpc_val_t* read_line(mpc_val_t* line) {
|
||||||
line_count++;
|
line_count++;
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
@@ -185,6 +185,32 @@ void test_reader(void) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int token_count = 0;
|
||||||
|
|
||||||
|
static mpc_val_t *print_token(mpc_val_t *x) {
|
||||||
|
printf("Token: '%s'\n", (char*)x);
|
||||||
|
token_count++;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_tokens(void) {
|
||||||
|
|
||||||
|
mpc_parser_t* Tokens = mpc_many(
|
||||||
|
mpcf_strfold,
|
||||||
|
mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token));
|
||||||
|
|
||||||
|
token_count = 0;
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Tokens,
|
||||||
|
" hello 4352 , \n foo.bar \n\n test:ing ",
|
||||||
|
"hello4352,foo.bartest:ing", streq, free, strprint));
|
||||||
|
|
||||||
|
PT_ASSERT(token_count == 9);
|
||||||
|
|
||||||
|
mpc_delete(Tokens);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void test_eoi(void) {
|
void test_eoi(void) {
|
||||||
|
|
||||||
mpc_parser_t* Line = mpc_re("[^\\n]*$");
|
mpc_parser_t* Line = mpc_re("[^\\n]*$");
|
||||||
@@ -203,5 +229,6 @@ void suite_core(void) {
|
|||||||
pt_add_test(test_repeat, "Test Repeat", "Suite Core");
|
pt_add_test(test_repeat, "Test Repeat", "Suite Core");
|
||||||
pt_add_test(test_copy, "Test Copy", "Suite Core");
|
pt_add_test(test_copy, "Test Copy", "Suite Core");
|
||||||
pt_add_test(test_reader, "Test Reader", "Suite Core");
|
pt_add_test(test_reader, "Test Reader", "Suite Core");
|
||||||
|
pt_add_test(test_tokens, "Test Tokens", "Suite Core");
|
||||||
pt_add_test(test_eoi, "Test EOI", "Suite Core");
|
pt_add_test(test_eoi, "Test EOI", "Suite Core");
|
||||||
}
|
}
|
||||||
|
@@ -163,6 +163,8 @@ void test_partial(void) {
|
|||||||
(mpc_dtor_t)mpc_ast_delete,
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
(void(*)(const void*))mpc_ast_print));
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
mpc_ast_delete(t0);
|
||||||
|
|
||||||
mpc_cleanup(5, Line, Number, QuotedString, LinePragma, Parser);
|
mpc_cleanup(5, Line, Number, QuotedString, LinePragma, Parser);
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -248,6 +250,8 @@ void test_qscript(void) {
|
|||||||
(mpc_dtor_t)mpc_ast_delete,
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
(void(*)(const void*))mpc_ast_print));
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
mpc_ast_delete(t0);
|
||||||
|
|
||||||
mpc_cleanup(18, Qscript, Comment, Resource, Rtype, Rname, InnerBlock,
|
mpc_cleanup(18, Qscript, Comment, Resource, Rtype, Rname, InnerBlock,
|
||||||
Statement, Function, Parameter, Literal, Block, Seperator, Qstring,
|
Statement, Function, Parameter, Literal, Block, Seperator, Qstring,
|
||||||
SimpleStr, ComplexStr, Number, Float, Int);
|
SimpleStr, ComplexStr, Number, Float, Int);
|
||||||
@@ -278,6 +282,61 @@ void test_missingrule(void) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_regex_mode(void) {
|
||||||
|
|
||||||
|
mpc_parser_t *Line0, *Line1, *Line2, *Line3;
|
||||||
|
mpc_ast_t *t0, *t1, *t2, *t3, *t4;
|
||||||
|
|
||||||
|
Line0 = mpc_new("line0");
|
||||||
|
Line1 = mpc_new("line1");
|
||||||
|
Line2 = mpc_new("line2");
|
||||||
|
Line3 = mpc_new("line3");
|
||||||
|
|
||||||
|
mpca_lang(MPCA_LANG_DEFAULT, " line0 : /.*/; ", Line0);
|
||||||
|
mpca_lang(MPCA_LANG_DEFAULT, " line1 : /.*/s; ", Line1);
|
||||||
|
mpca_lang(MPCA_LANG_DEFAULT, " line2 : /(^[a-z]*$)*/; ", Line2);
|
||||||
|
mpca_lang(MPCA_LANG_DEFAULT, " line3 : /(^[a-z]*$)*/m; ", Line3);
|
||||||
|
|
||||||
|
t0 = mpc_ast_new("regex", "blah");
|
||||||
|
t1 = mpc_ast_new("regex", "blah\nblah");
|
||||||
|
t2 = mpc_ast_new("regex", "");
|
||||||
|
t3 = mpc_ast_new("regex", "blah");
|
||||||
|
t4 = mpc_ast_new("regex", "blah\nblah");
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Line0, "blah\nblah", t0,
|
||||||
|
(int(*)(const void*,const void*))mpc_ast_eq,
|
||||||
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Line1, "blah\nblah", t1,
|
||||||
|
(int(*)(const void*,const void*))mpc_ast_eq,
|
||||||
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Line2, "blah\nblah", t2,
|
||||||
|
(int(*)(const void*,const void*))mpc_ast_eq,
|
||||||
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Line2, "blah", t3,
|
||||||
|
(int(*)(const void*,const void*))mpc_ast_eq,
|
||||||
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
PT_ASSERT(mpc_test_pass(Line3, "blah\nblah", t4,
|
||||||
|
(int(*)(const void*,const void*))mpc_ast_eq,
|
||||||
|
(mpc_dtor_t)mpc_ast_delete,
|
||||||
|
(void(*)(const void*))mpc_ast_print));
|
||||||
|
|
||||||
|
mpc_ast_delete(t0);
|
||||||
|
mpc_ast_delete(t1);
|
||||||
|
mpc_ast_delete(t2);
|
||||||
|
mpc_ast_delete(t3);
|
||||||
|
mpc_ast_delete(t4);
|
||||||
|
|
||||||
|
mpc_cleanup(4, Line0, Line1, Line2, Line3);
|
||||||
|
}
|
||||||
|
|
||||||
void suite_grammar(void) {
|
void suite_grammar(void) {
|
||||||
pt_add_test(test_grammar, "Test Grammar", "Suite Grammar");
|
pt_add_test(test_grammar, "Test Grammar", "Suite Grammar");
|
||||||
pt_add_test(test_language, "Test Language", "Suite Grammar");
|
pt_add_test(test_language, "Test Language", "Suite Grammar");
|
||||||
@@ -286,4 +345,5 @@ void suite_grammar(void) {
|
|||||||
pt_add_test(test_partial, "Test Partial", "Suite Grammar");
|
pt_add_test(test_partial, "Test Partial", "Suite Grammar");
|
||||||
pt_add_test(test_qscript, "Test QScript", "Suite Grammar");
|
pt_add_test(test_qscript, "Test QScript", "Suite Grammar");
|
||||||
pt_add_test(test_missingrule, "Test Missing Rule", "Suite Grammar");
|
pt_add_test(test_missingrule, "Test Missing Rule", "Suite Grammar");
|
||||||
|
pt_add_test(test_regex_mode, "Test Regex Mode", "Suite Grammar");
|
||||||
}
|
}
|
||||||
|
@@ -132,6 +132,43 @@ void test_regex_newline(void) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test_regex_multiline(void) {
|
||||||
|
|
||||||
|
mpc_parser_t *re0 = mpc_re_mode("(^[a-z]*$)*", MPC_RE_MULTILINE);
|
||||||
|
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "hello\nhello", "hello\nhello"));
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "hello\nhello\n", "hello\nhello\n"));
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "\nblah\n\nblah\n", "\nblah\n\nblah\n"));
|
||||||
|
PT_ASSERT(regex_test_fail(re0, "45234", "45234"));
|
||||||
|
PT_ASSERT(regex_test_fail(re0, "\n45234", "\n45234"));
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "\n45234", "\n"));
|
||||||
|
|
||||||
|
mpc_delete(re0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_regex_dotall(void) {
|
||||||
|
|
||||||
|
mpc_parser_t *re0 = mpc_re_mode("^.*$", MPC_RE_DEFAULT);
|
||||||
|
mpc_parser_t *re1 = mpc_re_mode("^.*$", MPC_RE_DOTALL);
|
||||||
|
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "hello", "hello"));
|
||||||
|
PT_ASSERT(regex_test_fail(re0, "hello\n", "hello"));
|
||||||
|
PT_ASSERT(regex_test_fail(re0, "he\nllo\n", "he"));
|
||||||
|
PT_ASSERT(regex_test_pass(re0, "34njaksdklmasd", "34njaksdklmasd"));
|
||||||
|
PT_ASSERT(regex_test_fail(re0, "34njaksd\nklmasd", "34njaksd"));
|
||||||
|
|
||||||
|
PT_ASSERT(regex_test_pass(re1, "hello", "hello"));
|
||||||
|
PT_ASSERT(regex_test_pass(re1, "hello\n", "hello\n"));
|
||||||
|
PT_ASSERT(regex_test_pass(re1, "he\nllo\n", "he\nllo\n"));
|
||||||
|
PT_ASSERT(regex_test_pass(re1, "34njaksdklmasd", "34njaksdklmasd"));
|
||||||
|
PT_ASSERT(regex_test_pass(re1, "34njaksd\nklmasd", "34njaksd\nklmasd"));
|
||||||
|
|
||||||
|
mpc_delete(re0);
|
||||||
|
mpc_delete(re1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void suite_regex(void) {
|
void suite_regex(void) {
|
||||||
pt_add_test(test_regex_basic, "Test Regex Basic", "Suite Regex");
|
pt_add_test(test_regex_basic, "Test Regex Basic", "Suite Regex");
|
||||||
pt_add_test(test_regex_range, "Test Regex Range", "Suite Regex");
|
pt_add_test(test_regex_range, "Test Regex Range", "Suite Regex");
|
||||||
@@ -139,4 +176,6 @@ void suite_regex(void) {
|
|||||||
pt_add_test(test_regex_lisp_comment, "Test Regex Lisp Comment", "Suite Regex");
|
pt_add_test(test_regex_lisp_comment, "Test Regex Lisp Comment", "Suite Regex");
|
||||||
pt_add_test(test_regex_boundary, "Test Regex Boundary", "Suite Regex");
|
pt_add_test(test_regex_boundary, "Test Regex Boundary", "Suite Regex");
|
||||||
pt_add_test(test_regex_newline, "Test Regex Newline", "Suite Regex");
|
pt_add_test(test_regex_newline, "Test Regex Newline", "Suite Regex");
|
||||||
|
pt_add_test(test_regex_multiline, "Test Regex Multiline", "Suite Regex");
|
||||||
|
pt_add_test(test_regex_dotall, "Test Regex Dotall", "Suite Regex");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user