Files
c_soft/soft/clexical.c
2024-10-30 23:30:35 +08:00

356 lines
7.9 KiB
C

#include "stdio.h"
#include "regex.h"
#include "stdlib.h"
#include "stdint.h"
#include "exception.h"
#include "mythread.h"
#include "unistd.h"
#include "debug.h"
#include "string.h"
#include "ctype.h"
// c语言词法分析
typedef enum {
TOKEN_IF = 256,
TOKEN_BREAK = 257,
TOKEN_WHILE=258,
TOKEN_SWITCH=259,
TOKEN_CASE=260,
TOKEN_DO=261,
TOKEN_CHAR=262,
TOKEN_INT=263,
TOKEN_VOID=264,
TOKEN_NAME = 265 ,
TOKEN_NUM = 266 ,// 数字
TOKEN_INC = 267,// 自增
TOKEN_DEC = 268,// 自减
TOKEN_EQ = 269,// 相等
TOKEN_NEQ = 270,// 不相等
TOKEN_LSH = 271,// 左移
TOKEN_RSH = 272,// 右移
TOKEN_LEQ = 273,// 小于等于
TOKEN_GEQ = 274,// 大于等于
TOKEN_ELSE = 275,
TOKEN_CONTINUE = 276,
}token_index_def;
typedef struct {
const char* key;
const token_index_def token;
const char* token_str;
}keywork_item_def;
#define TOKEN_DEF(s,t) {#s,t,#t}
const keywork_item_def g_keyword_table[ ] = {
TOKEN_DEF(if,TOKEN_IF),
TOKEN_DEF(else,TOKEN_ELSE),
TOKEN_DEF(break,TOKEN_IF),
TOKEN_DEF(while,TOKEN_WHILE),
TOKEN_DEF(switch,TOKEN_SWITCH),
TOKEN_DEF(case,TOKEN_CASE),
TOKEN_DEF(do,TOKEN_DO),
TOKEN_DEF(char,TOKEN_CHAR),
TOKEN_DEF(int,TOKEN_INT),
TOKEN_DEF(void,TOKEN_VOID),
TOKEN_DEF(continue,TOKEN_CONTINUE),
{NULL},
};
#define TOKEN_BUFF_MAX_LEN 128
// 字母 下划线
#define cislalpha(c) (isalpha(c) || (c) == '_')
// 数字 字母 下划线
#define cislalnum(c) (isalnum(c) || (c) == '_')
// 数字
#define cisdigit(c) (isdigit(c))
// 空白
#define cisspace(c) (isspace(c))
// 可打印字符
#define cisprint(c) (isprint(c))
// 16进制数字
#define cisxdigit(c) (isxdigit(c))
// 转换为小写
#define ctolower(c) (tolower(c))
typedef struct{
char buff[TOKEN_BUFF_MAX_LEN];
int used;
int line;
int pos;
int token;
}token_def;
typedef struct _token_list {
token_def token;
struct _token_list* next;
}token_list_node_def;
typedef struct {
token_list_node_def* head;
token_list_node_def* current;
int len;
}token_list_def;
typedef struct {
int current_c;
int current_line;
int current_line_pos;
token_def token_buff;
const char *input_text;
int input_len;
int input_pos;
token_list_def tlist;
}lex_def;
// 对比关键字 返回其token
int lex_compare_keywords(const char* key) {
const keywork_item_def* ckey = g_keyword_table;
int index = 0;
while (ckey[index].key != NULL) {
if (strcmp(ckey[index].key , key) == 0) {
return ckey[index].token;
}
index++;
}
return -1;
}
// 获取下一个字符 返回0成功
int lex_get_next(lex_def *lex)
{
if (lex->input_pos >= lex->input_len) {
lex->current_c = 0;
return -1;
}
lex->current_c=lex->input_text[lex->input_pos];
lex->input_pos++;
lex->current_line_pos++;
return 0;
}
// 保存当前字符
int lex_save_char(lex_def *lex){
token_def *t=&lex->token_buff;
if(t->used>=TOKEN_BUFF_MAX_LEN){
return -1;
}
if (t->used == 0) {
t->pos = lex->current_line_pos;
t->line=lex->current_line;
}
t->buff[t->used]=lex->current_c;
t->used++;
return 0;
}
// 保存一个token
int lex_save_token(lex_def* lex) {
token_list_node_def* t = mem_calloc(1 , sizeof(token_list_node_def));
token_list_def* l = &lex->tlist;
memcpy(&t->token , &lex->token_buff , sizeof(token_def));
memset(&lex->token_buff , 0 , sizeof(token_def));
if (l->head == NULL) {
l->head = t;
} else {
l->current->next = t;
if (l->head->next == NULL) {
l->head->next = t;
}
}
l->current = t;
l->len++;
return 0;
}
// 删除 token list
int lex_del_token_list(lex_def* lex) {
token_list_def* l = &lex->tlist;
token_list_node_def* t;
while (l->len > 0) {
t = l->head->next;
mem_free(l->head);
l->head = t;
l->len--;
}
l->head = NULL;
l->current = NULL;
return 0;
}
// 打印 token list
int lex_print_token_list(lex_def* lex) {
token_list_def* l = &lex->tlist;
token_list_node_def* t;
t = l->head;
while (t) {
printf("\"%s\":%d[%d,%d],token=%d\n" , t->token.buff , t->token.line , t->token.pos ,
t->token.pos + t->token.used -1 , t->token.token);
t = t->next;
}
return 0;
}
int lex_analysis(const char *text,int len){
lex_def lex = { 0 };
int in_loop = 1;
lex.input_text = text;
lex.input_len = len;
lex_get_next(&lex);
lex.current_line_pos = 0;
while(in_loop){
switch (lex.current_c)
{
case 0: {
in_loop = 0;
break;
}
case '\r': {
lex_get_next(&lex);
}
case '\n': {
if(lex.current_c=='\n'){
lex_get_next(&lex);
}
lex.current_line++;
lex.current_line_pos=0;
break;
}
case ' ':
case '\t':
case '\v':
case '\f': {
lex_get_next(&lex);
break;
}
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case '~':
case ',':
case ';':
case ':':
{
lex_save_char(&lex);
lex.token_buff.token = lex.current_c;
lex_save_token(&lex);
lex_get_next(&lex);
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':{
if (cisdigit(lex.current_c)) {
do {
lex_save_char(&lex);
lex_get_next(&lex);
} while (cisdigit(lex.current_c));
lex.token_buff.token = TOKEN_NUM;
lex_save_token(&lex);
}
break;
}
case '+':{
lex_save_char(&lex);
lex_get_next(&lex);
if(lex.current_c=='+'){
lex_save_char(&lex);
lex_get_next(&lex);
lex.token_buff.token = TOKEN_INC;
}else{
lex.token_buff.token = lex.current_c;
}
lex_save_token(&lex);
break;
}
case '-':{
lex_save_char(&lex);
lex_get_next(&lex);
if(lex.current_c=='-'){
lex_save_char(&lex);
lex_get_next(&lex);
lex.token_buff.token = TOKEN_DEC;
}else{
lex.token_buff.token = lex.current_c;
}
lex_save_token(&lex);
break;
}
case '!':
case '>':
case '<':
case '=':{
lex_save_char(&lex);
lex_get_next(&lex);
if(lex.current_c=='='){
lex_save_char(&lex);
lex_get_next(&lex);
lex.token_buff.token = TOKEN_EQ;
}else{
lex.token_buff.token = lex.current_c;
}
lex_save_token(&lex);
break;
}
default:
if (cislalpha(lex.current_c)) {
do {
lex_save_char(&lex);
lex_get_next(&lex);
} while (cislalnum(lex.current_c));
int token = lex_compare_keywords(lex.token_buff.buff);
if (token != -1) {
lex.token_buff.token = token;
} else {
lex.token_buff.token = TOKEN_NAME;
}
lex_save_token(&lex);
} else {
DBG_ERR("未知的符号('%c'): 在 %d 行" , lex.current_c , lex.current_line);
in_loop = 0;
}
break;
}
}
lex_print_token_list(&lex);
lex_del_token_list(&lex);
return 0;
}