添加符号和数字的解析

2024-10-30 19:08:46 +08:00
parent 93a8c2c265
commit 01a7999d55
7 changed files with 264 additions and 56 deletions
--- a/main.c
+++ b/main.c
@@ -6,15 +6,26 @@
 #include "soft/mythread.h"
 #include "unistd.h"
 #include "soft/debug.h"
+#include "string.h"
+#include "soft/clexical.h"
+
+
+
+
+const char g_str[ ] = "int main(){\n"
+"int index;\n"
+"while(1){\n"
+"printf(index)\n"
+"}\n"
+"}";
+
+


 int thread_fun(void* t) {
  DBG_INFO("run in thread_fun.\n");
+  lex_analysis(g_str , strlen(g_str));

-  while(1){
-    sleep(5);
-    throw_("s");
-  }
  return 0;
 }

@@ -26,17 +37,8 @@ int main(int argc,char *argv[]){
  debug_init(NULL);
  DBG_INFO("hello world.%ld\n",(size_t)pthread_self());

-
-
-  // while(p&&(*p)){
-
-  //   printf("%s\n",*p++);
-  // }
-
-  // printf("a+b=%d",test_add(3,5));
  myth_create(thread_fun , NULL);
-  myth_create(NULL,NULL);
-  // sleep(10);
+
  myth_join( );
 }

--- a/soft/clexical.c
+++ b/soft/clexical.c
@@ -6,33 +6,68 @@
 #include "mythread.h"
 #include "unistd.h"
 #include "debug.h"
+#include "string.h"
+#include "ctype.h"


 // c语言词法分析

-
-
-
-const char *g_keyword_table[]={
-    "if","break","while","switch","case","do",
-    "char","int","void",
-};
-
 typedef enum {
    TOKEN_IF = 256,
-    TOKEN_BREAK,
-    TOKEN_WHILE,
-    TOKEN_SWITCH,
-    TOKEN_CASE,
-    TOKEN_DO,
-    TOKEN_CHAR,
-    TOKEN_INT,
-    TOKEN_VOID,
-    TOKEN_NAME,
-}token_def;
+    TOKEN_BREAK = 257,
+    TOKEN_WHILE=258,
+    TOKEN_SWITCH=259,
+    TOKEN_CASE=260,
+    TOKEN_DO=261,
+    TOKEN_CHAR=262,
+    TOKEN_INT=263,
+    TOKEN_VOID=264,
+    TOKEN_NAME = 265 ,
+    TOKEN_NUM = 266 ,
+}token_index_def;
+
+
+typedef struct {
+    const char* key;
+    const token_index_def token;
+    const char* token_str;
+}keywork_item_def;
+
+#define TOKEN_DEF(s,t)  {#s,t,#t}
+
+const keywork_item_def g_keyword_table[ ] = {
+    TOKEN_DEF(if,TOKEN_IF),
+    TOKEN_DEF(break,TOKEN_IF),
+    TOKEN_DEF(while,TOKEN_WHILE),
+    TOKEN_DEF(switch,TOKEN_SWITCH),
+    TOKEN_DEF(case,TOKEN_CASE),
+    TOKEN_DEF(do,TOKEN_DO),
+    TOKEN_DEF(char,TOKEN_CHAR),
+    TOKEN_DEF(int,TOKEN_INT),
+    TOKEN_DEF(void,TOKEN_VOID),
+    {NULL } ,
+};
+

 #define TOKEN_BUFF_MAX_LEN      128

+// 字母 下划线
+#define cislalpha(c)	(isalpha(c) || (c) == '_')
+// 数字 字母 下划线
+#define cislalnum(c)	(isalnum(c) || (c) == '_')
+// 数字
+#define cisdigit(c)	(isdigit(c))
+// 空白
+#define cisspace(c)	(isspace(c))
+// 可打印字符
+#define cisprint(c)	(isprint(c))
+// 16进制数字
+#define cisxdigit(c)	(isxdigit(c))
+// 转换为小写
+#define ctolower(c)	(tolower(c))
+
+
+
 typedef struct{
    char buff[TOKEN_BUFF_MAX_LEN];
    int used;
@@ -41,22 +76,58 @@ typedef struct{
    int token;
 }token_def;

+
+typedef struct _token_list {
+    token_def token;
+    struct _token_list* next;
+}token_list_node_def;
+
+
+typedef struct {
+    token_list_node_def* head;
+    token_list_node_def* current;
+    int len;
+}token_list_def;
+
+
+
+
 typedef struct {
    int current_c;
    int current_line;
    int current_line_pos;
    token_def token_buff;
-    char *input_text;
+    const char *input_text;
    int input_len;
    int input_pos;
+    token_list_def tlist;
 }lex_def;


+
+
+// 对比关键字 返回其token
+int lex_compare_keywords(const char* key) {
+    const keywork_item_def* ckey = g_keyword_table;
+    int index = 0;
+    while (ckey[index].key != NULL) {
+        if (strcmp(ckey[index].key , key) == 0) {
+            return ckey[index].token;
+        }
+        index++;
+    }
+    return -1;
+}
+
+
+
+
+
 // 获取下一个字符 返回0成功
 int lex_get_next(lex_def *lex)
 {
-    token_def *t=&lex->token_buff;
    if (lex->input_pos >= lex->input_len) {
+        lex->current_c = 0;
        return -1;
    }
    lex->current_c=lex->input_text[lex->input_pos];
@@ -72,26 +143,84 @@ int lex_save_char(lex_def *lex){
    if(t->used>=TOKEN_BUFF_MAX_LEN){
        return -1;
    }
+    if (t->used == 0) {
+        t->pos = lex->current_line_pos;
+        t->line=lex->current_line;
+    }
    t->buff[t->used]=lex->current_c;
    t->used++;
-    t->line=lex->current_line;
-    t->pos=lex->current_line_pos;
    return 0;
 }



+// 保存一个token
+int lex_save_token(lex_def* lex) {
+    token_list_node_def* t = mem_calloc(1 , sizeof(token_list_node_def));
+    token_list_def* l = &lex->tlist;
+    memcpy(&t->token , &lex->token_buff , sizeof(token_def));
+    memset(&lex->token_buff , 0 , sizeof(token_def));
+    if (l->head == NULL) {
+        l->head = t;
+    } else {
+        l->current->next = t;
+        if (l->head->next == NULL) {
+            l->head->next = t;
+        }
+    }
+    l->current = t;
+    l->len++;
+    return 0;
+}
+
+
+// 删除 token list
+int lex_del_token_list(lex_def* lex) {
+    token_list_def* l = &lex->tlist;
+    token_list_node_def* t;
+    while (l->len > 0) {
+        t = l->head->next;
+        mem_free(l->head);
+        l->head = t;
+        l->len--;
+    }
+    l->head = NULL;
+    l->current = NULL;
+    return 0;
+}
+
+// 打印 token list
+int lex_print_token_list(lex_def* lex) {
+    token_list_def* l = &lex->tlist;
+    token_list_node_def* t;
+    t = l->head;
+    while (t) {
+        printf("\"%s\":%d[%d,%d],token=%d\n" , t->token.buff , t->token.line , t->token.pos ,
+            t->token.pos + t->token.used -1 , t->token.token);
+        t = t->next;
+    }
+    return 0;
+}
+

 int lex_analysis(const char *text,int len){
    lex_def lex = { 0 };
+    int in_loop = 1;
    lex.input_text = text;
    lex.input_len = len;
    lex_get_next(&lex);
-    while(1){
+    lex.current_line_pos = 0;
+    while(in_loop){
        switch (lex.current_c)
        {
+        case 0: {
+            in_loop = 0;
+            break;
+        }
        case '\r': {
            lex_get_next(&lex);
+        }
+        case '\n': {
            if(lex.current_c=='\n'){
                lex_get_next(&lex);
            }
@@ -99,14 +228,75 @@ int lex_analysis(const char *text,int len){
            lex.current_line_pos=0;
            break;
        }
-        case ' ':case '\t':case '\v':case '\f':{
+        case ' ':
+        case '\t':
+        case '\v':
+        case '\f': {
            lex_get_next(&lex);
-            lex.current_line_pos++;
+            break;
+        }
+        case '(':
+        case ')':
+        case '{':
+        case '}':
+        case '[':
+        case ']':
+        case '~':
+        case '!':
+        case ',':
+        case ';':
+        case ':':
+        {
+            DBG_LOG("enter %c" , lex.current_c);
+            lex_save_char(&lex);
+            lex.token_buff.token = lex.current_c;
+            lex_save_token(&lex);
+            lex_get_next(&lex);
+            break;
+        }
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':{
+            if (cisdigit(lex.current_c)) {
+                do {
+                    lex_save_char(&lex);
+                    lex_get_next(&lex);
+                } while (cisdigit(lex.current_c));
+                lex.token_buff.token = TOKEN_NUM;
+                lex_save_token(&lex);
+            }
+            break;
        }
        default:
+            if (cislalpha(lex.current_c)) {
+                do {
+                    lex_save_char(&lex);
+                    lex_get_next(&lex);
+                } while (cislalnum(lex.current_c));
+                int token = lex_compare_keywords(lex.token_buff.buff);
+                if (token != -1) {
+                    lex.token_buff.token = token;
+                } else {
+                    lex.token_buff.token = TOKEN_NAME;
+                }
+                lex_save_token(&lex);
+            } else {
+                DBG_ERR("未知的符号('%c'): 在 %d 行" , lex.current_c , lex.current_line);
+                in_loop = 0;
+            }
            break;
        }
    }
+    lex_print_token_list(&lex);
+    lex_del_token_list(&lex);
+    return 0;
 }


--- a/soft/clexical.h
+++ b/soft/clexical.h
@@ -0,0 +1,13 @@
+#ifndef clexical_h__
+#define clexical_h__
+
+
+
+int lex_analysis(const char* text , int len);
+
+
+
+
+
+#endif
+
--- a/soft/debug.c
+++ b/soft/debug.c
@@ -44,11 +44,11 @@ int _sem_init(){
  key_t key;
  int mutex;
  key = ftok(".",5345);
-  printf("sem init, key=%llu\n",key);
+  // printf("sem init, key=%llu\n",key);
  mutex = semget(key,1,IPC_CREAT);//创建信号量
-  printf("sem init, mutex=%d\n",mutex);
+  // printf("sem init, mutex=%d\n",mutex);
  if(mutex<=0){
-    printf("%d\n",errno);
+    // printf("%d\n",errno);
  }
  union semun set;
  set.val = 1;//钥匙数量为0
@@ -138,7 +138,7 @@ void debug_log(const char *file,const char *fun,int line,int level,const char *f
  #endif
  memcpy(log_buf,level_str[level],level_str_len[level]);
  length=level_str_len[level];
-  length+=sprintf(log_buf + length,"%s|%s|%d| ",file,fun,line);
+  length+=sprintf(log_buf + length,"%s:%d|%s| ",file,line,fun);

  va_start(args, fmt);
  length += vsnprintf(log_buf + length, CONSOLEBUF_SIZE - length - 3, fmt, args);
--- a/soft/debug.h
+++ b/soft/debug.h
@@ -9,7 +9,7 @@


 /*r{ 修改日志打印等级 }c*/
-#define DBG_LOG_LEVEL DBG_LEVEL_INFO
+#define DBG_LOG_LEVEL DBG_LEVEL_LOG



--- a/soft/mystdlib.c
+++ b/soft/mystdlib.c
@@ -6,6 +6,9 @@
 #include "debug.h"


+
+
+
 static void __mem_append_m(map_def** m , void* p);


@@ -98,8 +101,8 @@ void *mem_malloc(size_t size){
  return p;
 }

-void *mem_calloc(size_t memb_size,size_t memb_num){
-  void *p=calloc(memb_size,memb_num);
+void *mem_calloc(size_t memb_num, size_t memb_size){
+  void *p=calloc(memb_num, memb_size);
  __mem_append(p);
  return p;
 }
--- a/soft/mystdlib.h
+++ b/soft/mystdlib.h
@@ -21,7 +21,7 @@ typedef struct _map_def{
 void __mem_clear(map_def **m);
 void __mem_mov(map_def **d,map_def **s);

-void *mem_calloc(size_t memb_size,size_t memb_num);
+void *mem_calloc(size_t memb_num, size_t memb_size);
 void *mem_malloc(size_t size);
 void mem_free(void *p);