Regex tests
This commit is contained in:
@@ -75,7 +75,7 @@ mpc_ast_t* parse_maths(const char* input) {
|
|||||||
|
|
||||||
If you were to input something like `"(4 * 2 * 11 + 2) - 5"` into this function the `mpc_ast_t` you get out would look something like this:
|
If you were to input something like `"(4 * 2 * 11 + 2) - 5"` into this function the `mpc_ast_t` you get out would look something like this:
|
||||||
|
|
||||||
```c
|
```python
|
||||||
>:
|
>:
|
||||||
value|>:
|
value|>:
|
||||||
char: '('
|
char: '('
|
||||||
|
61
mpc.c
61
mpc.c
@@ -1804,11 +1804,12 @@ static mpc_val_t* mpcf_re_repeat(int n, mpc_val_t** xs) {
|
|||||||
return mpc_count(num, mpcf_strfold, xs[0], free);
|
return mpc_count(num, mpcf_strfold, xs[0], free);
|
||||||
}
|
}
|
||||||
|
|
||||||
static mpc_parser_t* mpc_re_escape_char(char c, int range) {
|
static mpc_parser_t* mpc_re_escape_char(char c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'a': return mpc_char('\a');
|
case 'a': return mpc_char('\a');
|
||||||
case 'f': return mpc_char('\f');
|
case 'f': return mpc_char('\f');
|
||||||
case 'n': return mpc_char('\n');
|
case 'n': return mpc_char('\n');
|
||||||
|
case 'r': return mpc_char('\r');
|
||||||
case 't': return mpc_char('\t');
|
case 't': return mpc_char('\t');
|
||||||
case 'v': return mpc_char('\v');
|
case 'v': return mpc_char('\v');
|
||||||
case 'b': return mpc_char('\b');
|
case 'b': return mpc_char('\b');
|
||||||
@@ -1836,7 +1837,7 @@ static mpc_val_t* mpcf_re_escape(mpc_val_t* x) {
|
|||||||
|
|
||||||
/* Regex Escape */
|
/* Regex Escape */
|
||||||
if (s[0] == '\\') {
|
if (s[0] == '\\') {
|
||||||
p = mpc_re_escape_char(s[1], 0);
|
p = mpc_re_escape_char(s[1]);
|
||||||
p = (p == NULL) ? mpc_char(s[1]) : p;
|
p = (p == NULL) ? mpc_char(s[1]) : p;
|
||||||
free(s);
|
free(s);
|
||||||
return p;
|
return p;
|
||||||
@@ -1848,18 +1849,35 @@ static mpc_val_t* mpcf_re_escape(mpc_val_t* x) {
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char* mpc_re_range_escape_char(char c) {
|
||||||
|
switch (c) {
|
||||||
|
case '-': return "-";
|
||||||
|
case 'a': return "\a";
|
||||||
|
case 'f': return "\f";
|
||||||
|
case 'n': return "\n";
|
||||||
|
case 'r': return "\r";
|
||||||
|
case 't': return "\t";
|
||||||
|
case 'v': return "\v";
|
||||||
|
case 'b': return "\b";
|
||||||
|
case 'd': return "0123456789";
|
||||||
|
case 's': return " \f\n\r\t\v";
|
||||||
|
case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static mpc_val_t* mpcf_re_range(mpc_val_t* x) {
|
static mpc_val_t* mpcf_re_range(mpc_val_t* x) {
|
||||||
|
|
||||||
|
char* range = calloc(1,1);
|
||||||
|
char* tmp = NULL;
|
||||||
char* s = x;
|
char* s = x;
|
||||||
int i = 0;
|
char start, end;
|
||||||
|
int i, j;
|
||||||
int comp = 0;
|
int comp = 0;
|
||||||
|
|
||||||
mpc_parser_t* q = NULL;
|
if (s[0] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); }
|
||||||
mpc_parser_t* p = mpc_failf("Invalid Range Specifier");
|
|
||||||
|
|
||||||
if (s[0] == '\0') { free(x); return p; }
|
|
||||||
if (s[0] == '^' &&
|
if (s[0] == '^' &&
|
||||||
s[1] == '\0') { free(x); return p; }
|
s[1] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); }
|
||||||
|
|
||||||
if (s[0] == '^') { comp = 1;}
|
if (s[0] == '^') { comp = 1;}
|
||||||
|
|
||||||
@@ -1867,28 +1885,41 @@ static mpc_val_t* mpcf_re_range(mpc_val_t* x) {
|
|||||||
|
|
||||||
/* Regex Range Escape */
|
/* Regex Range Escape */
|
||||||
if (s[i] == '\\') {
|
if (s[i] == '\\') {
|
||||||
q = mpc_re_escape_char(s[i+1], 1);
|
tmp = mpc_re_range_escape_char(s[i+1]);
|
||||||
q = (q == NULL) ? mpc_char(s[i+1]) : q;
|
if (tmp != NULL) {
|
||||||
p = mpc_or(2, p, q);
|
range = realloc(range, strlen(range) + strlen(tmp) + 1);
|
||||||
|
strcat(range, tmp);
|
||||||
|
}
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Regex Range...Range */
|
/* Regex Range...Range */
|
||||||
else if (s[i] == '-') {
|
else if (s[i] == '-') {
|
||||||
if (s[i+1] == '\0' || i == 0) {
|
if (s[i+1] == '\0' || i == 0) {
|
||||||
p = mpc_or(2, p, mpc_char('-'));
|
range = realloc(range, strlen(range) + strlen("-") + 1);
|
||||||
|
strcat(range, "-");
|
||||||
} else {
|
} else {
|
||||||
p = mpc_or(2, p, mpc_range(s[i-1]+1, s[i+1]-1));
|
start = s[i-1]+1;
|
||||||
|
end = s[i+1]-1;
|
||||||
|
for (j = start; j <= end; j++) {
|
||||||
|
range = realloc(range, strlen(range) + 1 + 1);
|
||||||
|
range[strlen(range) + 1] = '\0';
|
||||||
|
range[strlen(range) + 0] = j;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Regex Range Normal */
|
/* Regex Range Normal */
|
||||||
else { p = mpc_or(2, p, mpc_char(s[i])); }
|
else {
|
||||||
|
range = realloc(range, strlen(range) + 1 + 1);
|
||||||
|
range[strlen(range) + 1] = '\0';
|
||||||
|
range[strlen(range) + 0] = s[i];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(x);
|
free(x);
|
||||||
return comp ? mpc_not_lift(p, free, mpcf_ctor_str) : p;
|
return comp ? mpc_noneof(range) : mpc_oneof(range);
|
||||||
}
|
}
|
||||||
|
|
||||||
static mpc_val_t* mpcf_re_invalid(void) {
|
static mpc_val_t* mpcf_re_invalid(void) {
|
||||||
|
@@ -9,7 +9,7 @@ static void string_print(void* x) { printf("'%s'", (char*)x); }
|
|||||||
|
|
||||||
void test_regex_basic(void) {
|
void test_regex_basic(void) {
|
||||||
|
|
||||||
mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5;
|
mpc_parser_t *re0, *re1, *re2, *re3, *re4, *re5, *re6, *re7;
|
||||||
|
|
||||||
re0 = mpc_re("abc|bcd");
|
re0 = mpc_re("abc|bcd");
|
||||||
re1 = mpc_re("abc|bcd|e");
|
re1 = mpc_re("abc|bcd|e");
|
||||||
@@ -17,6 +17,8 @@ void test_regex_basic(void) {
|
|||||||
re3 = mpc_re("abc(abdd)?");
|
re3 = mpc_re("abc(abdd)?");
|
||||||
re4 = mpc_re("ab|c(abdd)?");
|
re4 = mpc_re("ab|c(abdd)?");
|
||||||
re5 = mpc_re("abc(ab|dd)+g$");
|
re5 = mpc_re("abc(ab|dd)+g$");
|
||||||
|
re6 = mpc_re("\"(\\\\.|[^\"])*\"");
|
||||||
|
re7 = mpc_re(";[^\\n\\r]*");
|
||||||
|
|
||||||
PT_ASSERT(mpc_match(re0, "abc", "abc", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re0, "abc", "abc", string_eq, free, string_print));
|
||||||
PT_ASSERT(mpc_match(re0, "bcd", "bcd", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re0, "bcd", "bcd", string_eq, free, string_print));
|
||||||
@@ -26,7 +28,13 @@ void test_regex_basic(void) {
|
|||||||
PT_ASSERT(mpc_match(re2, "abc", "abc", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re2, "abc", "abc", string_eq, free, string_print));
|
||||||
PT_ASSERT(mpc_match(re2, "abcabab", "abcabab", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re2, "abcabab", "abcabab", string_eq, free, string_print));
|
||||||
PT_ASSERT(mpc_match(re2, "abcababd", "abcabab", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re2, "abcababd", "abcabab", string_eq, free, string_print));
|
||||||
PT_ASSERT(mpc_match(re5, "abck", "", string_eq, free, string_print));
|
PT_ASSERT(mpc_match(re5, "abcddg", "abcddg", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re6, "\"there\"", "\"there\"", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re6, "\"hello\"", "\"hello\"", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re6, "\"i am dan\"", "\"i am dan\"", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re6, "\"i a\\\"m dan\"", "\"i a\\\"m dan\"", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re7, ";comment", ";comment", string_eq, free, string_print));
|
||||||
|
PT_ASSERT(mpc_match(re7, ";i am the\nman", ";i am the", string_eq, free, string_print));
|
||||||
|
|
||||||
mpc_delete(re0);
|
mpc_delete(re0);
|
||||||
mpc_delete(re1);
|
mpc_delete(re1);
|
||||||
@@ -34,6 +42,8 @@ void test_regex_basic(void) {
|
|||||||
mpc_delete(re3);
|
mpc_delete(re3);
|
||||||
mpc_delete(re4);
|
mpc_delete(re4);
|
||||||
mpc_delete(re5);
|
mpc_delete(re5);
|
||||||
|
mpc_delete(re6);
|
||||||
|
mpc_delete(re7);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user