304 lines
6.4 KiB
C
304 lines
6.4 KiB
C
#include "stdio.h"
|
|
#include "regex.h"
|
|
#include "stdlib.h"
|
|
#include "stdint.h"
|
|
#include "exception.h"
|
|
#include "mythread.h"
|
|
#include "unistd.h"
|
|
#include "debug.h"
|
|
#include "string.h"
|
|
#include "ctype.h"
|
|
|
|
|
|
// c语言词法分析
|
|
|
|
typedef enum {
|
|
TOKEN_IF = 256,
|
|
TOKEN_BREAK = 257,
|
|
TOKEN_WHILE=258,
|
|
TOKEN_SWITCH=259,
|
|
TOKEN_CASE=260,
|
|
TOKEN_DO=261,
|
|
TOKEN_CHAR=262,
|
|
TOKEN_INT=263,
|
|
TOKEN_VOID=264,
|
|
TOKEN_NAME = 265 ,
|
|
TOKEN_NUM = 266 ,
|
|
}token_index_def;
|
|
|
|
|
|
typedef struct {
|
|
const char* key;
|
|
const token_index_def token;
|
|
const char* token_str;
|
|
}keywork_item_def;
|
|
|
|
#define TOKEN_DEF(s,t) {#s,t,#t}
|
|
|
|
const keywork_item_def g_keyword_table[ ] = {
|
|
TOKEN_DEF(if,TOKEN_IF),
|
|
TOKEN_DEF(break,TOKEN_IF),
|
|
TOKEN_DEF(while,TOKEN_WHILE),
|
|
TOKEN_DEF(switch,TOKEN_SWITCH),
|
|
TOKEN_DEF(case,TOKEN_CASE),
|
|
TOKEN_DEF(do,TOKEN_DO),
|
|
TOKEN_DEF(char,TOKEN_CHAR),
|
|
TOKEN_DEF(int,TOKEN_INT),
|
|
TOKEN_DEF(void,TOKEN_VOID),
|
|
{NULL } ,
|
|
};
|
|
|
|
|
|
#define TOKEN_BUFF_MAX_LEN 128
|
|
|
|
// 字母 下划线
|
|
#define cislalpha(c) (isalpha(c) || (c) == '_')
|
|
// 数字 字母 下划线
|
|
#define cislalnum(c) (isalnum(c) || (c) == '_')
|
|
// 数字
|
|
#define cisdigit(c) (isdigit(c))
|
|
// 空白
|
|
#define cisspace(c) (isspace(c))
|
|
// 可打印字符
|
|
#define cisprint(c) (isprint(c))
|
|
// 16进制数字
|
|
#define cisxdigit(c) (isxdigit(c))
|
|
// 转换为小写
|
|
#define ctolower(c) (tolower(c))
|
|
|
|
|
|
|
|
typedef struct{
|
|
char buff[TOKEN_BUFF_MAX_LEN];
|
|
int used;
|
|
int line;
|
|
int pos;
|
|
int token;
|
|
}token_def;
|
|
|
|
|
|
typedef struct _token_list {
|
|
token_def token;
|
|
struct _token_list* next;
|
|
}token_list_node_def;
|
|
|
|
|
|
typedef struct {
|
|
token_list_node_def* head;
|
|
token_list_node_def* current;
|
|
int len;
|
|
}token_list_def;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
int current_c;
|
|
int current_line;
|
|
int current_line_pos;
|
|
token_def token_buff;
|
|
const char *input_text;
|
|
int input_len;
|
|
int input_pos;
|
|
token_list_def tlist;
|
|
}lex_def;
|
|
|
|
|
|
|
|
|
|
// 对比关键字 返回其token
|
|
int lex_compare_keywords(const char* key) {
|
|
const keywork_item_def* ckey = g_keyword_table;
|
|
int index = 0;
|
|
while (ckey[index].key != NULL) {
|
|
if (strcmp(ckey[index].key , key) == 0) {
|
|
return ckey[index].token;
|
|
}
|
|
index++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取下一个字符 返回0成功
|
|
int lex_get_next(lex_def *lex)
|
|
{
|
|
if (lex->input_pos >= lex->input_len) {
|
|
lex->current_c = 0;
|
|
return -1;
|
|
}
|
|
lex->current_c=lex->input_text[lex->input_pos];
|
|
lex->input_pos++;
|
|
lex->current_line_pos++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
// 保存当前字符
|
|
int lex_save_char(lex_def *lex){
|
|
token_def *t=&lex->token_buff;
|
|
if(t->used>=TOKEN_BUFF_MAX_LEN){
|
|
return -1;
|
|
}
|
|
if (t->used == 0) {
|
|
t->pos = lex->current_line_pos;
|
|
t->line=lex->current_line;
|
|
}
|
|
t->buff[t->used]=lex->current_c;
|
|
t->used++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
// 保存一个token
|
|
int lex_save_token(lex_def* lex) {
|
|
token_list_node_def* t = mem_calloc(1 , sizeof(token_list_node_def));
|
|
token_list_def* l = &lex->tlist;
|
|
memcpy(&t->token , &lex->token_buff , sizeof(token_def));
|
|
memset(&lex->token_buff , 0 , sizeof(token_def));
|
|
if (l->head == NULL) {
|
|
l->head = t;
|
|
} else {
|
|
l->current->next = t;
|
|
if (l->head->next == NULL) {
|
|
l->head->next = t;
|
|
}
|
|
}
|
|
l->current = t;
|
|
l->len++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
// 删除 token list
|
|
int lex_del_token_list(lex_def* lex) {
|
|
token_list_def* l = &lex->tlist;
|
|
token_list_node_def* t;
|
|
while (l->len > 0) {
|
|
t = l->head->next;
|
|
mem_free(l->head);
|
|
l->head = t;
|
|
l->len--;
|
|
}
|
|
l->head = NULL;
|
|
l->current = NULL;
|
|
return 0;
|
|
}
|
|
|
|
// 打印 token list
|
|
int lex_print_token_list(lex_def* lex) {
|
|
token_list_def* l = &lex->tlist;
|
|
token_list_node_def* t;
|
|
t = l->head;
|
|
while (t) {
|
|
printf("\"%s\":%d[%d,%d],token=%d\n" , t->token.buff , t->token.line , t->token.pos ,
|
|
t->token.pos + t->token.used -1 , t->token.token);
|
|
t = t->next;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
int lex_analysis(const char *text,int len){
|
|
lex_def lex = { 0 };
|
|
int in_loop = 1;
|
|
lex.input_text = text;
|
|
lex.input_len = len;
|
|
lex_get_next(&lex);
|
|
lex.current_line_pos = 0;
|
|
while(in_loop){
|
|
switch (lex.current_c)
|
|
{
|
|
case 0: {
|
|
in_loop = 0;
|
|
break;
|
|
}
|
|
case '\r': {
|
|
lex_get_next(&lex);
|
|
}
|
|
case '\n': {
|
|
if(lex.current_c=='\n'){
|
|
lex_get_next(&lex);
|
|
}
|
|
lex.current_line++;
|
|
lex.current_line_pos=0;
|
|
break;
|
|
}
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f': {
|
|
lex_get_next(&lex);
|
|
break;
|
|
}
|
|
case '(':
|
|
case ')':
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
case '~':
|
|
case '!':
|
|
case ',':
|
|
case ';':
|
|
case ':':
|
|
{
|
|
DBG_LOG("enter %c" , lex.current_c);
|
|
lex_save_char(&lex);
|
|
lex.token_buff.token = lex.current_c;
|
|
lex_save_token(&lex);
|
|
lex_get_next(&lex);
|
|
break;
|
|
}
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':{
|
|
if (cisdigit(lex.current_c)) {
|
|
do {
|
|
lex_save_char(&lex);
|
|
lex_get_next(&lex);
|
|
} while (cisdigit(lex.current_c));
|
|
lex.token_buff.token = TOKEN_NUM;
|
|
lex_save_token(&lex);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
if (cislalpha(lex.current_c)) {
|
|
do {
|
|
lex_save_char(&lex);
|
|
lex_get_next(&lex);
|
|
} while (cislalnum(lex.current_c));
|
|
int token = lex_compare_keywords(lex.token_buff.buff);
|
|
if (token != -1) {
|
|
lex.token_buff.token = token;
|
|
} else {
|
|
lex.token_buff.token = TOKEN_NAME;
|
|
}
|
|
lex_save_token(&lex);
|
|
} else {
|
|
DBG_ERR("未知的符号('%c'): 在 %d 行" , lex.current_c , lex.current_line);
|
|
in_loop = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
lex_print_token_list(&lex);
|
|
lex_del_token_list(&lex);
|
|
return 0;
|
|
}
|
|
|
|
|
|
|