From a8257e5b8ecf42bedbc60ef756336cc9e761c874 Mon Sep 17 00:00:00 2001 From: Daniel Holden Date: Sun, 17 Nov 2013 18:25:06 +0000 Subject: [PATCH] Regex tests --- README.md | 2 +- mpc.c | 61 ++++++++++++++++++++++++++++++++++++++------------- tests/regex.c | 14 ++++++++++-- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4bf65f5..3f02365 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ mpc_ast_t* parse_maths(const char* input) { If you were to input something like `"(4 * 2 * 11 + 2) - 5"` into this function the `mpc_ast_t` you get out would look something like this: -```c +```python >: value|>: char: '(' diff --git a/mpc.c b/mpc.c index 4b0952e..ad21561 100644 --- a/mpc.c +++ b/mpc.c @@ -1804,11 +1804,12 @@ static mpc_val_t* mpcf_re_repeat(int n, mpc_val_t** xs) { return mpc_count(num, mpcf_strfold, xs[0], free); } -static mpc_parser_t* mpc_re_escape_char(char c, int range) { +static mpc_parser_t* mpc_re_escape_char(char c) { switch (c) { case 'a': return mpc_char('\a'); case 'f': return mpc_char('\f'); case 'n': return mpc_char('\n'); + case 'r': return mpc_char('\r'); case 't': return mpc_char('\t'); case 'v': return mpc_char('\v'); case 'b': return mpc_char('\b'); @@ -1836,7 +1837,7 @@ static mpc_val_t* mpcf_re_escape(mpc_val_t* x) { /* Regex Escape */ if (s[0] == '\\') { - p = mpc_re_escape_char(s[1], 0); + p = mpc_re_escape_char(s[1]); p = (p == NULL) ? mpc_char(s[1]) : p; free(s); return p; @@ -1848,18 +1849,35 @@ static mpc_val_t* mpcf_re_escape(mpc_val_t* x) { return p; } +static char* mpc_re_range_escape_char(char c) { + switch (c) { + case '-': return "-"; + case 'a': return "\a"; + case 'f': return "\f"; + case 'n': return "\n"; + case 'r': return "\r"; + case 't': return "\t"; + case 'v': return "\v"; + case 'b': return "\b"; + case 'd': return "0123456789"; + case 's': return " \f\n\r\t\v"; + case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + default: return NULL; + } +} + static mpc_val_t* mpcf_re_range(mpc_val_t* x) { + char* range = calloc(1,1); + char* tmp = NULL; char* s = x; - int i = 0; + char start, end; + int i, j; int comp = 0; - mpc_parser_t* q = NULL; - mpc_parser_t* p = mpc_failf("Invalid Range Specifier"); - - if (s[0] == '\0') { free(x); return p; } + if (s[0] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); } if (s[0] == '^' && - s[1] == '\0') { free(x); return p; } + s[1] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); } if (s[0] == '^') { comp = 1;} @@ -1867,28 +1885,41 @@ static mpc_val_t* mpcf_re_range(mpc_val_t* x) { /* Regex Range Escape */ if (s[i] == '\\') { - q = mpc_re_escape_char(s[i+1], 1); - q = (q == NULL) ? mpc_char(s[i+1]) : q; - p = mpc_or(2, p, q); + tmp = mpc_re_range_escape_char(s[i+1]); + if (tmp != NULL) { + range = realloc(range, strlen(range) + strlen(tmp) + 1); + strcat(range, tmp); + } i++; } /* Regex Range...Range */ else if (s[i] == '-') { if (s[i+1] == '\0' || i == 0) { - p = mpc_or(2, p, mpc_char('-')); + range = realloc(range, strlen(range) + strlen("-") + 1); + strcat(range, "-"); } else { - p = mpc_or(2, p, mpc_range(s[i-1]+1, s[i+1]-1)); + start = s[i-1]+1; + end = s[i+1]-1; + for (j = start; j <= end; j++) { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = j; + } } } /* Regex Range Normal */ - else { p = mpc_or(2, p, mpc_char(s[i])); } + else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i]; + } } free(x); - return comp ? mpc_not_lift(p, free, mpcf_ctor_str) : p; + return comp ? mpc_noneof(range) : mpc_oneof(range); } static mpc_val_t* mpcf_re_invalid(void) { diff --git a/tests/regex.c b/tests/regex.c index 0216700..89f1316 100644 --- a/tests/regex.c +++ b/tests/regex.c @@ -9,7 +9,7 @@ static void string_print(void* x) { printf("'%s'", (char*)x); } void test_regex_basic(void) { - mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5; + mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5, *re6, *re7; re0 = mpc_re("abc|bcd"); re1 = mpc_re("abc|bcd|e"); @@ -17,6 +17,8 @@ void test_regex_basic(void) { re3 = mpc_re("abc(abdd)?"); re4 = mpc_re("ab|c(abdd)?"); re5 = mpc_re("abc(ab|dd)+g$"); + re6 = mpc_re("\"(\\\\.|[^\"])*\""); + re7 = mpc_re(";[^\\n\\r]*"); PT_ASSERT(mpc_match(re0, "abc", "abc", string_eq, free, string_print)); PT_ASSERT(mpc_match(re0, "bcd", "bcd", string_eq, free, string_print)); @@ -26,7 +28,13 @@ void test_regex_basic(void) { PT_ASSERT(mpc_match(re2, "abc", "abc", string_eq, free, string_print)); PT_ASSERT(mpc_match(re2, "abcabab", "abcabab", string_eq, free, string_print)); PT_ASSERT(mpc_match(re2, "abcababd", "abcabab", string_eq, free, string_print)); - PT_ASSERT(mpc_match(re5, "abck", "", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re5, "abcddg", "abcddg", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re6, "\"there\"", "\"there\"", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re6, "\"hello\"", "\"hello\"", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re6, "\"i am dan\"", "\"i am dan\"", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re6, "\"i a\\\"m dan\"", "\"i a\\\"m dan\"", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re7, ";comment", ";comment", string_eq, free, string_print)); + PT_ASSERT(mpc_match(re7, ";i am the\nman", ";i am the", string_eq, free, string_print)); mpc_delete(re0); mpc_delete(re1); @@ -34,6 +42,8 @@ void test_regex_basic(void) { mpc_delete(re3); mpc_delete(re4); mpc_delete(re5); + mpc_delete(re6); + mpc_delete(re7); }