#include "stdio.h" #include "regex.h" #include "stdlib.h" #include "stdint.h" #include "exception.h" #include "mythread.h" #include "unistd.h" #include "debug.h" #include "string.h" #include "ctype.h" // c语言词法分析 typedef enum { TOKEN_IF = 256, TOKEN_BREAK = 257, TOKEN_WHILE=258, TOKEN_SWITCH=259, TOKEN_CASE=260, TOKEN_DO=261, TOKEN_CHAR=262, TOKEN_INT=263, TOKEN_VOID=264, TOKEN_NAME = 265 , TOKEN_NUM = 266 , }token_index_def; typedef struct { const char* key; const token_index_def token; const char* token_str; }keywork_item_def; #define TOKEN_DEF(s,t) {#s,t,#t} const keywork_item_def g_keyword_table[ ] = { TOKEN_DEF(if,TOKEN_IF), TOKEN_DEF(break,TOKEN_IF), TOKEN_DEF(while,TOKEN_WHILE), TOKEN_DEF(switch,TOKEN_SWITCH), TOKEN_DEF(case,TOKEN_CASE), TOKEN_DEF(do,TOKEN_DO), TOKEN_DEF(char,TOKEN_CHAR), TOKEN_DEF(int,TOKEN_INT), TOKEN_DEF(void,TOKEN_VOID), {NULL } , }; #define TOKEN_BUFF_MAX_LEN 128 // 字母 下划线 #define cislalpha(c) (isalpha(c) || (c) == '_') // 数字 字母 下划线 #define cislalnum(c) (isalnum(c) || (c) == '_') // 数字 #define cisdigit(c) (isdigit(c)) // 空白 #define cisspace(c) (isspace(c)) // 可打印字符 #define cisprint(c) (isprint(c)) // 16进制数字 #define cisxdigit(c) (isxdigit(c)) // 转换为小写 #define ctolower(c) (tolower(c)) typedef struct{ char buff[TOKEN_BUFF_MAX_LEN]; int used; int line; int pos; int token; }token_def; typedef struct _token_list { token_def token; struct _token_list* next; }token_list_node_def; typedef struct { token_list_node_def* head; token_list_node_def* current; int len; }token_list_def; typedef struct { int current_c; int current_line; int current_line_pos; token_def token_buff; const char *input_text; int input_len; int input_pos; token_list_def tlist; }lex_def; // 对比关键字 返回其token int lex_compare_keywords(const char* key) { const keywork_item_def* ckey = g_keyword_table; int index = 0; while (ckey[index].key != NULL) { if (strcmp(ckey[index].key , key) == 0) { return ckey[index].token; } index++; } return -1; } // 获取下一个字符 返回0成功 int lex_get_next(lex_def *lex) { if (lex->input_pos >= lex->input_len) { lex->current_c = 0; return -1; } lex->current_c=lex->input_text[lex->input_pos]; lex->input_pos++; lex->current_line_pos++; return 0; } // 保存当前字符 int lex_save_char(lex_def *lex){ token_def *t=&lex->token_buff; if(t->used>=TOKEN_BUFF_MAX_LEN){ return -1; } if (t->used == 0) { t->pos = lex->current_line_pos; t->line=lex->current_line; } t->buff[t->used]=lex->current_c; t->used++; return 0; } // 保存一个token int lex_save_token(lex_def* lex) { token_list_node_def* t = mem_calloc(1 , sizeof(token_list_node_def)); token_list_def* l = &lex->tlist; memcpy(&t->token , &lex->token_buff , sizeof(token_def)); memset(&lex->token_buff , 0 , sizeof(token_def)); if (l->head == NULL) { l->head = t; } else { l->current->next = t; if (l->head->next == NULL) { l->head->next = t; } } l->current = t; l->len++; return 0; } // 删除 token list int lex_del_token_list(lex_def* lex) { token_list_def* l = &lex->tlist; token_list_node_def* t; while (l->len > 0) { t = l->head->next; mem_free(l->head); l->head = t; l->len--; } l->head = NULL; l->current = NULL; return 0; } // 打印 token list int lex_print_token_list(lex_def* lex) { token_list_def* l = &lex->tlist; token_list_node_def* t; t = l->head; while (t) { printf("\"%s\":%d[%d,%d],token=%d\n" , t->token.buff , t->token.line , t->token.pos , t->token.pos + t->token.used -1 , t->token.token); t = t->next; } return 0; } int lex_analysis(const char *text,int len){ lex_def lex = { 0 }; int in_loop = 1; lex.input_text = text; lex.input_len = len; lex_get_next(&lex); lex.current_line_pos = 0; while(in_loop){ switch (lex.current_c) { case 0: { in_loop = 0; break; } case '\r': { lex_get_next(&lex); } case '\n': { if(lex.current_c=='\n'){ lex_get_next(&lex); } lex.current_line++; lex.current_line_pos=0; break; } case ' ': case '\t': case '\v': case '\f': { lex_get_next(&lex); break; } case '(': case ')': case '{': case '}': case '[': case ']': case '~': case '!': case ',': case ';': case ':': { DBG_LOG("enter %c" , lex.current_c); lex_save_char(&lex); lex.token_buff.token = lex.current_c; lex_save_token(&lex); lex_get_next(&lex); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':{ if (cisdigit(lex.current_c)) { do { lex_save_char(&lex); lex_get_next(&lex); } while (cisdigit(lex.current_c)); lex.token_buff.token = TOKEN_NUM; lex_save_token(&lex); } break; } default: if (cislalpha(lex.current_c)) { do { lex_save_char(&lex); lex_get_next(&lex); } while (cislalnum(lex.current_c)); int token = lex_compare_keywords(lex.token_buff.buff); if (token != -1) { lex.token_buff.token = token; } else { lex.token_buff.token = TOKEN_NAME; } lex_save_token(&lex); } else { DBG_ERR("未知的符号('%c'): 在 %d 行" , lex.current_c , lex.current_line); in_loop = 0; } break; } } lex_print_token_list(&lex); lex_del_token_list(&lex); return 0; }