修复一些符号不识别的问题

2025-01-15 18:32:00 +08:00
parent 478d4c615d
commit e67ded6cc9
3 changed files with 684 additions and 542 deletions
--- a/lex_c.py
+++ b/lex_c.py
@@ -1,287 +0,0 @@
-import os
-import sys
-import dataclasses
-
-
-TOKEN_IF = 256,
-TOKEN_BREAK = 257,
-TOKEN_WHILE=258,
-TOKEN_SWITCH=259,
-TOKEN_CASE=260,
-TOKEN_DO=261,
-TOKEN_CHAR=262,
-TOKEN_INT=263,
-TOKEN_VOID=264,
-TOKEN_SYMBOL = 265 ,
-TOKEN_NUM = 266 ,# 数字
-TOKEN_INC = 267,# 自增
-TOKEN_DEC = 268,# 自减
-TOKEN_EQ = 269,# 相等
-TOKEN_NEQ = 270,# 不相等
-TOKEN_LSH = 271,# 左移
-TOKEN_RSH = 272,# 右移
-TOKEN_LEQ = 273,# 小于等于
-TOKEN_GEQ = 274,# 大于等于
-TOKEN_ELSE = 275,
-TOKEN_CONTINUE = 276 ,
-TOKEN_CONST = 277 ,
-TOKEN_STATIC = 278 ,
-TOKEN_UNSIGNED = 279 ,
-TOKEN_TYPEDEF = 280 ,
-TOKEN_STRUCT = 281 ,
-TOKEN_ENUM = 282 ,
-TOKEN_UNION = 283,
-TOKEN_STRING = 284,
-TOKEN_DEFAULT = 285,
-TOKEN_RETURN = 286,
-TOKEN_ASSIG_ADD = 287
-TOKEN_ASSIG_SUB = 288
-TOKEN_ASSIG_MUL = 289
-TOKEN_ASSIG_DIV = 290
-TOKEN_ASSIG_LSH = 291
-TOKEN_ASSIG_RSH = 292
-TOKEN_EXTERN = 293
-TOKEN_FLOAT = 294
-TOKEN_DOUBLE = 295
-TOKEN_SHORT = 296
-TOKEN_LONG = 297
-
-
-def TOKEN(t:str):
-  return t.encode("utf-8")[0]
-
-_KeyWordTable={
-  "if":TOKEN_IF,
-  "else":TOKEN_ELSE,
-  "break":TOKEN_BREAK,
-  "while":TOKEN_WHILE,
-  "switch":TOKEN_SWITCH,
-  "case":TOKEN_CASE,
-  "do":TOKEN_DO,
-  "char":TOKEN_CHAR,
-  "int":TOKEN_INT,
-  "void":TOKEN_VOID,
-  "continue":TOKEN_CONTINUE,
-  "const":TOKEN_CONST,
-  "static":TOKEN_STATIC,
-  "unisgned":TOKEN_UNSIGNED,
-  "typedef":TOKEN_TYPEDEF,
-  "struct":TOKEN_STRUCT,
-  "enum":TOKEN_ENUM,
-  "union":TOKEN_UNION,
-  "default":TOKEN_DEFAULT,
-  "return":TOKEN_RETURN,
-  "extern":TOKEN_EXTERN,
-  "float":TOKEN_FLOAT,
-  "double":TOKEN_DOUBLE,
-  "short":TOKEN_SHORT,
-  "long":TOKEN_LONG,
-}
-
-_MarkTable={
-  "<<":TOKEN_LSH,
-  ">>":TOKEN_RSH,
-  "<=":TOKEN_LEQ,
-  ">=":TOKEN_GEQ,
-  "!=":TOKEN_NEQ,
-  "==":TOKEN_EQ,
-  "++":TOKEN_INC,
-  "--":TOKEN_DEC,
-  "+=":TOKEN_ASSIG_ADD,
-  "-=":TOKEN_ASSIG_SUB,
-  "*=":TOKEN_ASSIG_MUL,
-  "<<=":TOKEN_ASSIG_LSH,
-  ">>=":TOKEN_ASSIG_RSH,
-  "=":TOKEN("="),
-  "!":TOKEN("!"),
-  "<":TOKEN("<"),
-  ">":TOKEN(">"),
-  "+":TOKEN("+"),
-  "-":TOKEN("-"),
-
-}
-
-
-
-# 是否是数字加字母
-def isalnum(num:int):
-  return bytes([num]).isalnum()
-
-# 是否是数字加字母或下划线
-def isalnum_(num:int):
-  return bytes([num]).isalnum() or num==TOKEN("_")
-
-# 是否是字母
-def isalpha(num:int):
-  return bytes([num]).isalpha()
-
-# 是否是字母或下划线
-def isalpha_(num:int):
-  return bytes([num]).isalpha() or num==TOKEN("_")
-
-# 是否是数字
-def isdigit(num:int):
-  return bytes([num]).isdigit()
-
-# 是否是数字或小数点
-def isdigitdot(num:int):
-  return bytes([num]).isdigit() or num==TOKEN(".")
-
-# 是否是空白字符 包括换行符
-def isspace(num:int):
-  return bytes([num]).isspace()
-
-# 是否是给定字符串之一
-def isinstr(num:int,t:str):
-  c=bytes([num])
-  return c in t.encode("utf-8")
-
-# 是否是操作符
-def isoperator(num:int):
-  return isinstr(num,"<>!+-=")
-
-@dataclasses.dataclass
-class lex_token:
-    name:str
-    buff:bytearray
-    token:int
-    line:int
-    pos:int
-
-
-class lex_class(object):
-  def __init__(self,text:bytes) -> None:
-    self.text=text
-    self.index=-1
-    self.line=1
-    self.pos=-1
-    self.token_list:list[lex_token]=[]
-    self.token_buff=bytearray()
-  def save_char(self,c:int):
-    self.token_buff.append(c&0xff)
-  def save_token(self,token:lex_token):
-    self.token_list.append(token)
-    self.token_buff=bytearray()
-  def _get_char(self):
-    if(self.index<len(self.text)):
-      c= self.text[self.index]
-      return c
-    return -1
-  def get_next_char(self):
-    if not self.is_end():
-      self.index+=1
-    c= self._get_char()
-    if(c==b'\n'[0]):
-      self.line+=1
-      self.pos=-1
-    else:
-      self.pos+=1
-    return c
-  def is_end(self):
-    return self.index>=len(self.text)
-  def save_one_char_token(self,c:int):
-    token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos)
-    self.save_token(token)
-  def read_name_and_save(self,c:int):
-    token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
-    self.save_char(c)
-    while True:
-      c=self.get_next_char()
-      if(isalnum_(c)):
-        self.save_char(c)
-      else:
-        break
-    name=self.token_buff.decode("utf-8")
-    if(name in _KeyWordTable):
-      token.token=_KeyWordTable[name]
-      token.name=name
-    token.buff=self.token_buff
-    self.save_token(token)
-    return c
-  def read_operator_and_save(self,c:int):
-    token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
-    self.save_char(c)
-    while True:
-      c=self.get_next_char()
-      if(isoperator(c)):
-        self.save_char(c)
-      else:
-        break
-    name=self.token_buff.decode("utf-8")
-    if(name in _MarkTable):
-      token.token=_MarkTable[name]
-      token.name=name
-    else:
-      raise Exception(f"不存在的操作符 {name} ")
-    token.buff=self.token_buff
-    self.save_token(token)
-    return c
-  def read_num_and_save(self,c:int):
-    token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
-    self.save_char(c)
-    while True:
-      c=self.get_next_char()
-      if(isdigitdot(c)):
-        self.save_char(c)
-      else:
-        break
-    if(self.token_buff.count(b'.')>1):
-      raise Exception("数字不能包含多个点号")
-    token.buff=self.token_buff
-    self.save_token(token)
-    return c
-  def read_str_and_save(self,c:int):
-    c=self.get_next_char()
-    while c!=b'\"'[0]:
-      self.save_char(c)
-      c=self.get_next_char()
-    self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
-    return self.get_next_char()
-
-def lex(text:bytes):
-  lex_obj = lex_class(text)
-  c=lex_obj.get_next_char()
-  while not lex_obj.is_end():
-    if isalpha_(c):
-      c=lex_obj.read_name_and_save(c)
-    elif isinstr(c,"{}[]()~,;:*"):
-      lex_obj.save_one_char_token(c)
-      c=lex_obj.get_next_char()
-    elif isdigit(c):
-      c=lex_obj.read_num_and_save(c)
-    elif isspace(c):
-      c=lex_obj.get_next_char()
-    elif isoperator(c):
-      c=lex_obj.read_operator_and_save(c)
-    elif isinstr(c,"\""):
-      c=lex_obj.read_str_and_save(c)
-    elif isinstr(c,"\\"):
-      c=lex_obj.get_next_char(c)
-      if(c!=TOKEN("\r") and c!=TOKEN("\n")):
-        raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
-    elif isinstr(c,"/"):
-      c=lex_obj.get_next_char()
-      if(c==TOKEN("/")):
-        while c!=TOKEN("\n"):
-          c=lex_obj.get_next_char()
-      elif(c==TOKEN("*")):
-        c_old=lex_obj.get_next_char()
-        c=lex_obj.get_next_char()
-        while not (c_old==TOKEN("*") and c==TOKEN("/")):
-          c_old=c
-          c=lex_obj.get_next_char()
-        c=lex_obj.get_next_char()
-      elif(c==TOKEN("=")):
-        lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
-        c=lex_obj.get_next_char()
-      else:
-        lex_obj.save_one_char_token(TOKEN("/"))
-    else:
-      raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
-  # for item in lex_obj.token_list:
-  #   print(f"{item}")
-  return lex_obj.token_list
-
-if __name__ == "__main__":
-  with open("main.c",mode='rb') as f:
-    lex(f.read())
--- a/node_declear.py
+++ b/node_declear.py
@@ -1,128 +0,0 @@
-from lex_c import lex_token
-import lex_c
-from parser_c import node
-from parser_c import node_file
-from parser_c import node_variable_def
-from parser_c import node_struct_decl
-from parser_c import node_struct_def
-from parser_c import node_union_decl
-from parser_c import node_union_def
-from parser_c import node_enum_decl
-from parser_c import node_enum_def
-from parser_c import node_func_decl
-from parser_c import node_typedef
-from parser_c import node_func_def
-
-from parser_c import find_sentence
-from parser_c import dist_node_type
-from parser_c import find_close
-
-
-
-
-
-def dist_node_type_struct(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_STRUCT):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
-      if(len(token_list)==2):
-        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
-        v_list:list[node_variable_def]=[]
-        token_list_local=token_list[3:-1]
-        while len(token_list_local)>0:
-          sentence=find_sentence(token_list_local)
-          v_list.append(dist_node_type(token_list=sentence))
-          token_list_local=token_list_local[len(sentence):]
-        return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
-  raise Exception(f"语法错误 {token_list[0]}")
-
-
-
-def dist_node_type_union(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_UNION):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
-      if(len(token_list)==2):
-        return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
-        v_list:list[node_variable_def]=[]
-        token_list_local=token_list[3:-1]
-        while len(token_list_local)>0:
-          sentence=find_sentence(token_list_local)
-          v_list.append(dist_node_type(token_list=sentence))
-          token_list_local=token_list_local[len(sentence):]
-        return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
-  raise Exception(f"语法错误 {token_list[0]}")
-
-
-
-def dist_node_type_enum(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_ENUM):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
-      if(len(token_list)==2):
-        return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
-        token_list_local=token_list[3:-1]
-        index=0
-        v_list:list[dict]=[]
-        while len(token_list_local)>0:
-          if(token_list_local[0].token==lex_c.TOKEN_SYMBOL):
-            key=token_list_local[0].buff.decode("utf-8")
-            if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM):
-              index=int(token_list_local[2].buff.decode("utf-8"))
-              token_list_local=token_list_local[3:]
-            else:
-              index+=1
-              token_list_local=token_list_local[1:]
-            v_list.append({key:index})
-          if(len(token_list_local)>0):
-            if(token_list_local[0].token!=lex_c.TOKEN(",")):
-              raise Exception(f"枚举类型应该使用 ',' 分隔符")
-            token_list_local=token_list_local[1:]
-        return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
-  raise Exception(f"语法错误 {token_list[0]}")
-
-
-def dist_node_type_typedef(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
-    attr=[]
-    token_list_local=token_list
-    if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
-      raise Exception(f"没有定义新类型 {token_list[-1]}")
-    name=token_list[-1].buff.decode("utf-8")
-    token_list=token_list[1:]
-    while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
-      attr.append(token_list[0].name)
-      token_list=token_list[1:]
-    if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
-      attr.append(token_list[0].name)
-      if(token_list[1].token==lex_c.TOKEN_SYMBOL):
-        node_r=None
-        attr.append(token_list[1].buff.decode("utf-8"))
-        if(token_list[2].token==lex_c.TOKEN("{")):
-          node_r=dist_node_type(token_list=token_list[1:-1])
-        elif(token_list[2].token==lex_c.TOKEN("*")):
-          attr.append(token_list[2].name)
-        return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r)
-    if(token_list[0].token==lex_c.TOKEN_SYMBOL):
-      # 使用typedef 定义过的自定义类型
-      attr.append(token_list[0].buff.decode("utf-8"))
-      token_list=token_list[1:]
-    else:
-      # c语言预设类型
-      while(token_list[0].token in
-        [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
-         lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
-        attr.append(token_list[0].name)
-        token_list=token_list[1:]
-    if(len(token_list)>1):
-      raise Exception(f"意外的token {token_list[0]}")
-    return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None)
-  raise Exception(f"语法错误 {token_list[0]}")
-
-
--- a/parser_c.py
+++ b/parser_c.py
@@ -1,20 +1,441 @@
 import os
 import sys
+import shutil
 import dataclasses
-from lex_c import lex_token
-from lex_c import lex
-import lex_c
+import copy
+
+
+
+TOKEN_IF = 256,
+TOKEN_BREAK = 257,
+TOKEN_WHILE=258,
+TOKEN_SWITCH=259,
+TOKEN_CASE=260,
+TOKEN_DO=261,
+TOKEN_CHAR=262,
+TOKEN_INT=263,
+TOKEN_VOID=264,
+TOKEN_SYMBOL = 265 ,
+TOKEN_NUM = 266 ,# 数字
+TOKEN_INC = 267,# 自增
+TOKEN_DEC = 268,# 自减
+TOKEN_EQ = 269,# 相等
+TOKEN_NEQ = 270,# 不相等
+TOKEN_LSH = 271,# 左移
+TOKEN_RSH = 272,# 右移
+TOKEN_LEQ = 273,# 小于等于
+TOKEN_GEQ = 274,# 大于等于
+TOKEN_ELSE = 275,
+TOKEN_CONTINUE = 276 ,
+TOKEN_CONST = 277 ,
+TOKEN_STATIC = 278 ,
+TOKEN_UNSIGNED = 279 ,
+TOKEN_TYPEDEF = 280 ,
+TOKEN_STRUCT = 281 ,
+TOKEN_ENUM = 282 ,
+TOKEN_UNION = 283,
+TOKEN_STRING = 284,
+TOKEN_DEFAULT = 285,
+TOKEN_RETURN = 286,
+TOKEN_ASSIG_ADD = 287,
+TOKEN_ASSIG_SUB = 288,
+TOKEN_ASSIG_MUL = 289,
+TOKEN_ASSIG_DIV = 290,
+TOKEN_ASSIG_LSH = 291,
+TOKEN_ASSIG_RSH = 292,
+TOKEN_EXTERN = 293,
+TOKEN_FLOAT = 294,
+TOKEN_DOUBLE = 295,
+TOKEN_SHORT = 296,
+TOKEN_LONG = 297,
+TOKEN_POINTER = 298,
+TOKEN_LOGICAL_OR = 299,# 逻辑或
+TOKEN_LOGICAL_AND = 300,# 逻辑与
+TOKEN_OMIT = 301,# 省略符 ...
+
+
+def TOKEN(t:str):
+  return t.encode("utf-8")[0]
+
+_KeyWordTable={
+  "if":TOKEN_IF,
+  "else":TOKEN_ELSE,
+  "break":TOKEN_BREAK,
+  "while":TOKEN_WHILE,
+  "switch":TOKEN_SWITCH,
+  "case":TOKEN_CASE,
+  "do":TOKEN_DO,
+  "char":TOKEN_CHAR,
+  "int":TOKEN_INT,
+  "void":TOKEN_VOID,
+  "continue":TOKEN_CONTINUE,
+  "const":TOKEN_CONST,
+  "static":TOKEN_STATIC,
+  "unisgned":TOKEN_UNSIGNED,
+  "typedef":TOKEN_TYPEDEF,
+  "struct":TOKEN_STRUCT,
+  "enum":TOKEN_ENUM,
+  "union":TOKEN_UNION,
+  "default":TOKEN_DEFAULT,
+  "return":TOKEN_RETURN,
+  "extern":TOKEN_EXTERN,
+  "float":TOKEN_FLOAT,
+  "double":TOKEN_DOUBLE,
+  "short":TOKEN_SHORT,
+  "long":TOKEN_LONG,
+}
+
+_MarkTable={
+  "<<":TOKEN_LSH,
+  ">>":TOKEN_RSH,
+  "<=":TOKEN_LEQ,
+  ">=":TOKEN_GEQ,
+  "!=":TOKEN_NEQ,
+  "==":TOKEN_EQ,
+  "++":TOKEN_INC,
+  "--":TOKEN_DEC,
+  "->":TOKEN_POINTER,
+  "+=":TOKEN_ASSIG_ADD,
+  "-=":TOKEN_ASSIG_SUB,
+  "*=":TOKEN_ASSIG_MUL,
+  "<<=":TOKEN_ASSIG_LSH,
+  ">>=":TOKEN_ASSIG_RSH,
+  "=":TOKEN("="),
+  "!":TOKEN("!"),
+  "<":TOKEN("<"),
+  ">":TOKEN(">"),
+  "+":TOKEN("+"),
+  "-":TOKEN("-"),
+
+}
+
+
+
+# 是否是数字加字母
+def isalnum(num:int):
+  return bytes([num]).isalnum()
+
+# 是否是数字加字母或下划线
+def isalnum_(num:int):
+  return bytes([num]).isalnum() or num==TOKEN("_")
+
+# 是否是字母
+def isalpha(num:int):
+  return bytes([num]).isalpha()
+
+# 是否是字母或下划线
+def isalpha_(num:int):
+  return bytes([num]).isalpha() or num==TOKEN("_")
+
+# 是否是数字
+def isdigit(num:int):
+  return bytes([num]).isdigit()
+
+# 是否是数字或小数点
+def isdigitdot(num:int):
+  return bytes([num]).isdigit() or num==TOKEN(".")
+
+# 是否是空白字符 包括换行符
+def isspace(num:int):
+  return bytes([num]).isspace()
+
+# 是否是给定字符串之一
+def isinstr(num:int,t:str):
+  c=bytes([num])
+  return c in t.encode("utf-8")
+
+# 是否是操作符
+def isoperator(num:int):
+  return isinstr(num,"<>!+-=")
+
+@dataclasses.dataclass
+class lex_token:
+    name:str
+    buff:bytearray
+    token:int
+    line:int
+    pos:int
+
+# 连写的操作符，这些实际上是多个操作符写在一起的结果
+_NotMarkTable={
+  "!!":[lex_token("operator",'!',TOKEN('!'),0,0),
+        lex_token("operator",'!',TOKEN('!'),0,0)],
+  "=-":[lex_token("operator",'=',TOKEN('='),0,0),
+        lex_token("operator",'-',TOKEN('-'),0,0)],
+  "--=":[lex_token("operator",'--',TOKEN_DEC,0,0),
+        lex_token("operator",'=',TOKEN('='),0,0)],
+  "++=":[lex_token("operator",'++',TOKEN_INC,0,0),
+        lex_token("operator",'=',TOKEN('='),0,0)],
+  "=--":[lex_token("operator",'=',TOKEN('='),0,0),
+        lex_token("operator",'--',TOKEN_DEC,0,0)],
+  "=++":[lex_token("operator",'=',TOKEN('='),0,0),
+        lex_token("operator",'++',TOKEN_INC,0,0)],
+  "!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
+        lex_token("operator",'--',TOKEN_DEC,0,0)],
+  "!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
+        lex_token("operator",'++',TOKEN_INC,0,0)],
+  "==--":[lex_token("operator",'==',TOKEN_EQ,0,0),
+        lex_token("operator",'--',TOKEN_DEC,0,0)],
+  "==++":[lex_token("operator",'==',TOKEN_EQ,0,0),
+        lex_token("operator",'++',TOKEN_INC,0,0)],
+}
+
+
+
+class lex_class(object):
+  def __init__(self,text:bytes,file_name:str="") -> None:
+    self.text=text
+    self.index=-1
+    self.line=1
+    self.pos=-1
+    self.token_list:list[lex_token]=[]
+    self.token_buff=bytearray()
+    self.file_name=file_name
+    self.macro_table={}
+  def save_char(self,c:int):
+    self.token_buff.append(c&0xff)
+  def save_token(self,token:lex_token):
+    self.token_list.append(token)
+    self.token_buff=bytearray()
+  def _get_char(self):
+    if(self.index<len(self.text)):
+      c= self.text[self.index]
+      return c
+    return -1
+  def get_next_char(self):
+    if not self.is_end():
+      self.index+=1
+    c= self._get_char()
+    if(c==b'\n'[0]):
+      self.line+=1
+      self.pos=-1
+    else:
+      self.pos+=1
+    return c
+  def is_end(self):
+    return self.index>=len(self.text)
+  def save_one_char_token(self,c:int):
+    token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos)
+    self.save_token(token)
+  def read_name_and_save(self,c:int):
+    token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
+    self.save_char(c)
+    while True:
+      c=self.get_next_char()
+      if(isalnum_(c)):
+        self.save_char(c)
+      else:
+        break
+    name=self.token_buff.decode("utf-8")
+    if(name in _KeyWordTable):
+      token.token=_KeyWordTable[name]
+      token.name=name
+    token.buff=self.token_buff
+    self.save_token(token)
+    return c
+  def read_operator_and_save(self,c:int):
+    token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
+    self.save_char(c)
+    while True:
+      c=self.get_next_char()
+      if(isoperator(c)):
+        self.save_char(c)
+      else:
+        break
+    name=self.token_buff.decode("utf-8")
+    if(name in _MarkTable):
+      token.token=_MarkTable[name]
+      token.name=name
+      token.buff=self.token_buff
+      self.save_token(token)
+    elif(name in _NotMarkTable):
+      tokens=_NotMarkTable[name]
+      for t in tokens:
+        token.token=t.token
+        token.name=t.name
+        token.buff=token.name.encode("utf-8")
+        self.save_token(copy.deepcopy(token))
+        token.pos+=len(token.name)
+    else:
+      raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}")
+      # print(f"不存在的操作符 {name} ")
+    return c
+  def read_num_and_save(self,c:int):
+    token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
+    self.save_char(c)
+    while True:
+      c=self.get_next_char()
+      if(isdigitdot(c)):
+        self.save_char(c)
+      else:
+        break
+    if(self.token_buff.count(b'.')>1):
+      raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}")
+    token.buff=self.token_buff
+    self.save_token(token)
+    return c
+  _escape_table={'0':0,'a':7,'b':8,'t':9,'n':10,'v':11,'f':12,'r':13,'"':34,'\'':39,'?':63,'\\':92}
+  def read_str_and_save(self,c:int):
+    c=self.get_next_char()
+    while c!=b'\"'[0]:
+      if(c==TOKEN('\\')):# \
+        c=self.get_next_char()
+        self.save_char(self._escape_table.get(c,0))
+      else:
+        self.save_char(c)
+      c=self.get_next_char()
+    self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
+    return self.get_next_char()
+  def read_char_and_save(self,c:int):
+    c=self.get_next_char()
+    while c!=b'\''[0]:
+      if(c==TOKEN('\\')):# \
+        c=self.get_next_char()
+        self.save_char(self._escape_table.get(c,0))
+      else:
+        self.save_char(c)
+      c=self.get_next_char()
+    self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
+    return self.get_next_char()
+  def deal_macro(self,buff:bytearray):
+    self.macro_result=False
+    sp=buff.decode('utf-8').split()
+    if(len(sp)>0):
+      if(sp[0]=='#define'):
+        if(len(sp)>=3):
+          if not (sp[1] in self.macro_table):
+            self.macro_table[sp[1]]=' '.join(sp[2:])
+        else:
+          if not (sp[1] in self.macro_table):
+            self.macro_table[sp[1]]=""
+      elif(sp[0]=='#ifdef'):
+        self.macro_result= (sp[1] in self.macro_table)
+        return self.macro_result
+      elif(sp[0]=='#if'):
+        t=' '.join(sp[1:])# 判断条件比较复杂，暂时固定返回失败
+        return self.macro_result
+      elif(sp[0]=='#elif'):
+        return self.macro_result
+      elif(sp[0]=='#else'):
+        self.macro_result= not self.macro_result
+        return self.macro_result
+      elif(sp[0]=='#endif'):
+        return True
+      else:
+        return True
+
+def lex(text:bytes,file_name:str=""):
+  lex_obj = lex_class(text,file_name)
+  c=lex_obj.get_next_char()
+  line_old=0
+  pos_old=0
+  while not lex_obj.is_end():
+    line_old=lex_obj.line
+    pos_old=lex_obj.pos
+    if isalpha_(c):
+      c=lex_obj.read_name_and_save(c)
+    elif isinstr(c,"{}[]()~,;:*?%^"):
+      lex_obj.save_one_char_token(c)
+      c=lex_obj.get_next_char()
+    elif isdigit(c):
+      c=lex_obj.read_num_and_save(c)
+    elif isspace(c):
+      c=lex_obj.get_next_char()
+    elif isoperator(c):
+      c=lex_obj.read_operator_and_save(c)
+    elif isinstr(c,"\""):
+      c=lex_obj.read_str_and_save(c)
+    elif isinstr(c,"\'"):
+      c=lex_obj.read_char_and_save(c)
+    elif isinstr(c,"\\"):
+      c=lex_obj.get_next_char()
+      if(c!=TOKEN("\r") and c!=TOKEN("\n")):
+        raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
+    elif isinstr(c,"#"): # 宏定义
+      c_old=c
+      buff=bytearray()
+      while (c!=TOKEN("\n") and c!=-1):
+        c=lex_obj.get_next_char()
+        if(c_old==TOKEN('/') and c==TOKEN('*')):# 适配宏后面有注释的情况
+          while not (c_old==TOKEN("*") and c==TOKEN("/")) or c==-1:
+            c_old=c
+            c=lex_obj.get_next_char()
+        elif(c_old==TOKEN('/') and c==TOKEN('/')):
+          while not (c==TOKEN('\n') or c==-1):
+            c=lex_obj.get_next_char()
+        elif(c_old==TOKEN('\\') and c in [TOKEN('\n'),TOKEN('\r')]):# 适配多行
+          c=lex_obj.get_next_char()
+        else:
+          buff.append(c_old&0xff)
+        c_old=c
+      if not (lex_obj.deal_macro(buff)): # 处理宏
+        is_space=True
+        while True:
+          c=lex_obj.get_next_char()
+          if(is_space and c==TOKEN('#')):
+            break
+          if(c==-1):
+            break
+          if not isspace(c):
+            is_space=False
+          elif(c==TOKEN('\n')):
+            is_space=True
+    elif isinstr(c,"/"):
+      c=lex_obj.get_next_char()
+      if(c==TOKEN("/")):
+        while (c!=TOKEN("\n") and c!=-1):
+          c=lex_obj.get_next_char()
+      elif(c==TOKEN("*")):
+        c_old=lex_obj.get_next_char()
+        c=lex_obj.get_next_char()
+        while not (c_old==TOKEN("*") and c==TOKEN("/")):
+          c_old=c
+          c=lex_obj.get_next_char()
+        c=lex_obj.get_next_char()
+      elif(c==TOKEN("=")):
+        lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
+        c=lex_obj.get_next_char()
+      else:
+        lex_obj.save_one_char_token(TOKEN("/"))
+    elif isinstr(c,"|"):
+      c=lex_obj.get_next_char()
+      if(c==TOKEN("|")):
+        lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos))
+      else:
+        lex_obj.save_one_char_token(TOKEN("|"))
+    elif isinstr(c,"&"):
+      c=lex_obj.get_next_char()
+      if(c==TOKEN("&")):
+        lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos))
+      else:
+        lex_obj.save_one_char_token(TOKEN("&"))
+    elif isinstr(c,'.'):
+      c=lex_obj.get_next_char()
+      if(c==TOKEN('.')):
+        c=lex_obj.get_next_char()
+        if(c==TOKEN('.')):
+          lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos))
+        else:
+          raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
+      else:
+        lex_obj.save_one_char_token(TOKEN("."))
+    else:
+      raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
+      # c=lex_obj.get_next_char()
+    # if(line_old==lex_obj.line and pos_old==lex_obj.pos):
+    #   print(f"pointer not move.")
+    # print(line_old,pos_old)
+  # for item in lex_obj.token_list:
+  #   print(f"{item}")
+  return lex_obj.token_list

-_NodeTypeTable=[
-  "file","vdecl","fdef"
-]


@dataclasses.dataclass
 class node:
-  name:list[str]=dataclasses.field(default_factory=list)
+  name:list=dataclasses.field(default_factory=list)
  type:str="base"
-  token_list:list[lex_token]=dataclasses.field(default_factory=list)
+  token_list:list=dataclasses.field(default_factory=list)
  child:list=dataclasses.field(default_factory=list)
  def complite(self):
    print(f"complite {self.type}")
@@ -125,7 +546,7 @@ class node_int(node):


 # 找到闭合的括号
-def find_close(token_list:list[lex_token],token:tuple[int,int]):
+def find_close(token_list:list,token:tuple):
  if token_list[0].token!=token[0]:
    return 0
  num=0
@@ -136,20 +557,33 @@ def find_close(token_list:list[lex_token],token:tuple[int,int]):
      num-=1
    if(num==0):
      return index
-  raise Exception(f"没有找到闭合的符号 {token[1]}")
+  raise Exception(f"没有找到闭合的符号 {token_list[0]}")
+
+# 找到指定token的index
+def find_token(token_list:list,token:int):
+  num=0
+  for index,item in enumerate(token_list):
+    if(item.token!=token):
+      num+=1
+    else:
+      return num
+  return num
+

 # 找到一个完整的语句
-def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]):
+def find_sentence(token_list:list,sep:list=[TOKEN(";"),TOKEN(":")]):
  bracket_flag=False
  index=0
+  if(len(token_list)==1):
+    return token_list
  while index<len(token_list):
-    if(token_list[index].token==lex_c.TOKEN("(")):
-      bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")")))
+    if(token_list[index].token==TOKEN("(")):
+      bracket_index=find_close(token_list[index:],(TOKEN("("),TOKEN(")")))
      if(bracket_index>0):
        bracket_flag=True
        index+=bracket_index
-    elif(token_list[index].token==lex_c.TOKEN("{")):
-      bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}")))
+    elif(token_list[index].token==TOKEN("{")):
+      bracket_index=find_close(token_list[index:],(TOKEN("{"),TOKEN("}")))
      if(bracket_index>0):
        index+=bracket_index
        if(bracket_flag==True):
@@ -157,7 +591,7 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex
    elif(token_list[index].token in sep):
      return token_list[:index+1]
    index+=1
-  raise Exception(f"没有找到完整的语句")
+  raise Exception(f"没有找到完整的语句 sep={sep} token={token_list[0]}")



@@ -171,140 +605,162 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex



-def dist_node_type_struct(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_STRUCT):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+def dist_node_type_struct(token_list:list):
+  if(token_list[0].token==TOKEN_STRUCT):
+    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
-        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
-        v_list:list[node_variable_def]=[]
-        token_list_local=token_list[3:-1]
-        while len(token_list_local)>0:
-          sentence=find_sentence(token_list_local)
-          v_list.append(dist_node_type(token_list=sentence))
-          token_list_local=token_list_local[len(sentence):]
-        return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list)
+        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
+      elif(token_list[2].token==TOKEN("{")):
+        # if not token_list[-1].token==TOKEN("}"):
+        #   raise Exception("没有出现预期的符号 '}'")
+        # v_list:list[node_variable_def]=[]
+        # token_list_local=token_list[3:-1]
+        # while len(token_list_local)>0:
+        #   sentence=find_sentence(token_list_local)
+        #   v_list.append(dist_node_type(token_list=sentence))
+        #   token_list_local=token_list_local[len(sentence):]
+        return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
+      else:
+        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
+  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
+    # 函数定义
+    return dist_node_type_funcdef(token_list=token_list)
+
  raise Exception(f"语法错误 {token_list[0]}")



-def dist_node_type_union(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_UNION):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+def dist_node_type_union(token_list:list):
+  if(token_list[0].token==TOKEN_UNION):
+    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
        return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
-        v_list:list[node_variable_def]=[]
-        token_list_local=token_list[3:-1]
-        while len(token_list_local)>0:
-          sentence=find_sentence(token_list_local)
-          v_list.append(dist_node_type(token_list=sentence))
-          token_list_local=token_list_local[len(sentence):]
-        return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list)
+      elif(token_list[2].token==TOKEN("{")):
+        # if not token_list[-1].token==TOKEN("}"):
+        #   raise Exception("没有出现预期的符号 '}'")
+        # v_list:list[node_variable_def]=[]
+        # token_list_local=token_list[3:-1]
+        # while len(token_list_local)>0:
+        #   sentence=find_sentence(token_list_local)
+        #   v_list.append(dist_node_type(token_list=sentence))
+        #   token_list_local=token_list_local[len(sentence):]
+        return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
+  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
+    # 函数定义
+    return dist_node_type_funcdef(token_list=token_list)
  raise Exception(f"语法错误 {token_list[0]}")



-def dist_node_type_enum(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_ENUM):
-    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+def dist_node_type_enum(token_list:list):
+  if(token_list[0].token==TOKEN_ENUM):
+    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
        return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
-      elif(token_list[2].token==lex_c.TOKEN("{")):
-        if not token_list[-1].token==lex_c.TOKEN("}"):
-          raise Exception("没有出现预期的符号 '}'")
+      elif(token_list[2].token==TOKEN("{")):
+        # if not token_list[-1].token==TOKEN("}"):
+        #   raise Exception("没有出现预期的符号 '}'")
        # token_list_local=token_list[3:-1]
        return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
+    elif(token_list[1].token==TOKEN("{")):
+      # if not token_list[-1].token==TOKEN("}"):
+      #   raise Exception("没有出现预期的符号 '}'")
+      # token_list_local=token_list[3:-1]
+      return node_enum_def(name='unnamed_enum',token_list=token_list,child=[])
+  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
+    # 函数定义
+    return dist_node_type_funcdef(token_list=token_list)
  raise Exception(f"语法错误 {token_list[0]}")


-def dist_node_type_typedef(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
+def dist_node_type_typedef(token_list:list):
+  if(token_list[0].token==TOKEN_TYPEDEF):
    attr=[]
    token_list_local=token_list
-    if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
-      raise Exception(f"没有定义新类型 {token_list[-1]}")
+    # if(token_list[-1].token!=TOKEN_SYMBOL):
+    #   raise Exception(f"没有定义新类型 {token_list[-1]}")
    name=token_list[-1].buff.decode("utf-8")
    token_list=token_list[1:]
-    while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
+    while token_list[0].token in [TOKEN_UNSIGNED,TOKEN_CONST]:
      attr.append(token_list[0].name)
      token_list=token_list[1:]
-    if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
+    if(token_list[0].token==TOKEN_STRUCT or token_list[0].token==TOKEN_UNION):
      attr.append(token_list[0].name)
-      if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+      if(token_list[1].token==TOKEN_SYMBOL):
        node_r=None
        attr.append(token_list[1].buff.decode("utf-8"))
-        if(token_list[2].token==lex_c.TOKEN("{")):
+        if(token_list[2].token==TOKEN("{")):
          node_r=dist_node_type(token_list=token_list[1:-1])
-        elif(token_list[2].token==lex_c.TOKEN("*")):
+        elif(token_list[2].token==TOKEN("*")):
          attr.append(token_list[2].name)
-        return node_typedef(name=name,token_list=token_list_local,child=node_r)
-    if(token_list[0].token==lex_c.TOKEN_SYMBOL):
+        return node_typedef(name=name,token_list=token_list_local,child=[])
+    if(token_list[0].token==TOKEN_SYMBOL):
      # 使用typedef 定义过的自定义类型
      attr.append(token_list[0].buff.decode("utf-8"))
      token_list=token_list[1:]
    else:
      # c语言预设类型
      while(token_list[0].token in
-        [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
-         lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
+        [TOKEN_INT,TOKEN_CHAR,TOKEN_SHORT,TOKEN_LONG,TOKEN_FLOAT,
+         TOKEN_DOUBLE,TOKEN_VOID,TOKEN("*")]):
        attr.append(token_list[0].name)
        token_list=token_list[1:]
-    if(len(token_list)>1):
-      raise Exception(f"意外的token {token_list[0]}")
+    # if(len(token_list)>1):
+    #   raise Exception(f"意外的token {token_list[0]}")
    return node_typedef(name=name,token_list=token_list_local,child=[])
  raise Exception(f"语法错误 {token_list[0]}")



 # 找到子节点
-def find_child(token_list:list[lex_token],seq:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]):
+def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
  child=[]
+  token_list_local=[]
  for i in range(len(token_list)):
-    if(token_list[i].token==lex_c.TOKEN("{")):
+    if(token_list[i].token==TOKEN("{")):
      token_list_local=token_list[i+1:-1]
      break
  while len(token_list_local)>0:
    sentence=find_sentence(token_list_local,seq)
    node_d=dist_node_type(sentence)
-    child.append(node_d)
+    if not node_d is None:
+      child.append(node_d)
    token_list_local=token_list_local[len(sentence):]
  return child


-def dist_node_type_funcdef(token_list:list[lex_token]):
+def dist_node_type_funcdef(token_list:list):
  for i in range(len(token_list)):
-    if(token_list[i].token==lex_c.TOKEN_SYMBOL):
-      name=token_list[i].buff.decode("utf-8")
+    if(token_list[i].token==TOKEN('(')):
+      name=token_list[i-1].buff.decode("utf-8")
      break
-  return node_func_def(name=[name],token_list=token_list,child=find_child(token_list))
+  # return node_func_def(name=[name],token_list=token_list,child=find_child(token_list))
+  return node_func_def(name=[name],token_list=token_list,child=[])

-def dist_node_type_funcdecl(token_list:list[lex_token]):
+def dist_node_type_funcdecl(token_list:list):
  for i in range(len(token_list)):
-    if(token_list[i].token==lex_c.TOKEN_SYMBOL):
+    if(token_list[i].token==TOKEN_SYMBOL):
      name=token_list[i].buff.decode("utf-8")
      return node_func_decl(name=[name],token_list=token_list,child=[])
  raise Exception(f"函数声明格式错误 {token_list[0]}")


 # 第一个token是symbol的处理
-def dist_node_type_symbol(token_list:list[lex_token]):
+def dist_node_type_symbol(token_list:list):
  # 变量赋值或函数调用
  if(len(token_list)==1):
    return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list)
-  if(token_list[1].token == lex_c.TOKEN("(")):
+  if(token_list[1].token == TOKEN("(")):
    child=find_child(token_list=token_list[2:-1])
    return node_call("call",token_list=token_list,child=child)
  elif(token_list[1].token in [
-    lex_c.TOKEN("="),lex_c.TOKEN_ASSIG_ADD,lex_c.TOKEN_ASSIG_DIV,lex_c.TOKEN_ASSIG_LSH,
-    lex_c.TOKEN_ASSIG_MUL,lex_c.TOKEN_ASSIG_RSH,lex_c.TOKEN_ASSIG_SUB]):
+    TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
+    TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
    name=token_list[1].name
-    child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),
-            dist_node_type(token_list=token_list[2:])]
+    child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),]
+    child_d=dist_node_type(token_list=token_list[2:])
+    if not child_d is None:
+      child.append(child_d)
    return node_opt(name=name,token_list=token_list,child=child)
  else:
    # 没有赋值属性的操作
@@ -325,80 +781,181 @@ def dist_node_type_symbol(token_list:list[lex_token]):


 # 判断一个语句的类型
-def dist_node_type(token_list:list[lex_token]):
-  if(token_list[0].token==lex_c.TOKEN_EXTERN):
+def dist_node_type(token_list:list):
+  # print(f"{token_list[0]}")
+  if(token_list[0].token==TOKEN_EXTERN):
    token_list=token_list[1:]
-  if(token_list[-1].token==lex_c.TOKEN(";")):
+  if(token_list[-1].token==TOKEN(";")):
    token_list=token_list[:-1]
-  if(token_list[0].token==lex_c.TOKEN_STRUCT):
+  if(len(token_list)==0):
+    return None
+  if(token_list[0].token==TOKEN_STRUCT):
    return dist_node_type_struct(token_list=token_list)
-  if(token_list[0].token==lex_c.TOKEN_UNION):
+  if(token_list[0].token==TOKEN_UNION):
    return dist_node_type_union(token_list=token_list)
-  if(token_list[0].token==lex_c.TOKEN_ENUM):
+  if(token_list[0].token==TOKEN_ENUM):
    return dist_node_type_enum(token_list=token_list)
-  if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
+  if(token_list[0].token==TOKEN_TYPEDEF):
    return dist_node_type_typedef(token_list=token_list)
-  if(token_list[0].token==lex_c.TOKEN_SWITCH):
-    child=find_child(token_list)
-    return node_switch(name="",token_list=token_list,child=child)
-  if(token_list[0].token==lex_c.TOKEN_CASE):
-    name=token_list[1].buff.decode("utf-8")
-    return node_case(name=name,token_list=token_list,child=[])
-  if(token_list[0].token==lex_c.TOKEN_DEFAULT):
-    return node_default(name="",token_list=token_list,child=[])
-  if(token_list[0].token==lex_c.TOKEN_BREAK):
-    return node_break(name="",token_list=token_list,child=[])
-  if(token_list[0].token==lex_c.TOKEN_RETURN):
-    if(len(token_list)>1):
-      child=[dist_node_type(token_list[1:])]
-    else:
-      child=[]
-    return node_return(name="",token_list=token_list,child=child)
-  if(token_list[0].token==lex_c.TOKEN_STRING):
+  # if(token_list[0].token==TOKEN_SWITCH):
+  #   child=find_child(token_list)
+  #   return node_switch(name="",token_list=token_list,child=child)
+  # if(token_list[0].token==TOKEN_CASE):
+  #   name=token_list[1].buff.decode("utf-8")
+  #   return node_case(name=name,token_list=token_list,child=[])
+  # if(token_list[0].token==TOKEN_DEFAULT):
+  #   return node_default(name="",token_list=token_list,child=[])
+  # if(token_list[0].token==TOKEN_BREAK):
+  #   return node_break(name="",token_list=token_list,child=[])
+  # if(token_list[0].token==TOKEN_RETURN):
+  #   if(len(token_list)>1):
+  #     child=[dist_node_type(token_list[1:])]
+  #   else:
+  #     child=[]
+  #   return node_return(name="",token_list=token_list,child=child)
+  if(token_list[0].token==TOKEN_STRING):
    name=token_list[0].buff.decode("utf-8")
    return node_string(name=name,token_list=token_list,child=[])
-  if(token_list[0].token==lex_c.TOKEN_NUM):
+  if(token_list[0].token==TOKEN_NUM):
    name=token_list[0].buff.decode("utf-8")
    return node_int(name=name,token_list=token_list,child=[])

-  if(token_list[-1].token==lex_c.TOKEN(")")):
+  if(token_list[-1].token==TOKEN(")")):
    # 函数声明
    return dist_node_type_funcdecl(token_list)
-  elif(token_list[-1].token==lex_c.TOKEN("}")):
-    # 函数定义
-    return dist_node_type_funcdef(token_list=token_list)
-  elif(token_list[0].token==lex_c.TOKEN_SYMBOL):
+  elif(token_list[-1].token==TOKEN("}")):
+    if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
+      # 函数定义
+      return dist_node_type_funcdef(token_list=token_list)
+    else:
+      # 变量定义
+      pass
+  elif(token_list[0].token==TOKEN_SYMBOL):
    # 变量赋值或函数调用
    return dist_node_type_symbol(token_list=token_list)
-  else:
-    # 变量定义
-    for i in range(len(token_list)):
-      if(token_list[i].token==lex_c.TOKEN_SYMBOL):
-        name=token_list[i].buff.decode("utf-8")
-        return node_variable_def(name=[name],token_list=token_list,child=[])
-    raise Exception(f"变量定义格式错误 {token_list[0]}")
+
+  # 变量定义
+  for i in range(len(token_list)):
+    if(token_list[i].token==TOKEN_SYMBOL):
+      name=token_list[i].buff.decode("utf-8")
+      return node_variable_def(name=[name],token_list=token_list,child=[])
+  raise Exception(f"变量定义格式错误 {token_list[0]}")


 def print_node(n:node,deep:int):
  s="|"*deep
-  print(f"{s} {n.type} {n.name}")
+  print(f"{s} {n.type} {n.name} {n.token_list[0]}")
  # n.complite()
  if (not n.child is None) and len(n.child)>0:
    for item in n.child:
      print_node(item,deep+1)

+def find_func_def_in_file(n:node,deep:int,func_name_list:list):
+  ack=False
+  if(n.type=='func_def') and (n.name[0] in func_name_list):
+    print(f"{n.type} {n.name}")
+    return True
+  # n.complite()
+  if (not n.child is None) and len(n.child)>0:
+    for item in n.child:
+      ack=find_func_def_in_file(item,deep+1,func_name_list)
+      if(ack):
+        return ack
+  return False

-if __name__ == "__main__":
-  file_name="main.c"
+def check_func_def(file_name:str,func_name_list:list):
  with open(file_name,mode='rb') as f:
-    token_list=lex(f.read())
+    read_d=f.read()
+    if(read_d[:3]==bytes([0xef,0xbb,0xbf])):
+      read_d=read_d[3:]
+    token_list=lex(read_d,file_name)
  file=node_file(name=file_name,token_list=token_list)
  while len(token_list)>0:
-    sentence=find_sentence(token_list)
-    node_d=dist_node_type(sentence)
-    file.child.append(node_d)
-    # print('找到一个语句：')
-    # for item in sentence:
-    #   print(f"\t{item}")
+    node_d=None
+    try:
+      sentence=find_sentence(token_list)
+      node_d=dist_node_type(sentence)
+    except Exception as e:
+      print(f"in {file_name}")
+      print(f"\t {e}")
+      break
+    if not node_d is None:
+      file.child.append(node_d)
    token_list=token_list[len(sentence):]
-  print_node(file,0)
+  print_node(file,0)
+  return find_func_def_in_file(file,0,func_name_list)
+
+
+# 找到定义函数的文件
+def find_func_def(file_list:list,func_name_list:str):
+  ret_list=[]
+  err_list=[]
+  for item in file_list:
+    sys.stdout.write('.')
+    sys.stdout.flush()
+    # try:
+    ack=check_func_def(item,func_name_list)
+    if(ack):
+      ret_list.append(item)
+    # except Exception as e:
+    #   print(e)
+    #   err_list.append(item)
+  return ret_list,err_list
+
+# 找到指定后缀的文件
+def find_type(path:str,fix:str):
+    dlist=os.listdir(path)
+    file_list=[]
+    for i in dlist:
+        ps=os.path.join(path, i)
+        if os.path.isdir(ps):
+            file_list+=find_type(ps,fix)
+            pass
+        else:
+            if(ps[-len(fix):]==fix):
+                file_list.append(ps)
+    return file_list
+
+
+
+
+# with open("build/build_log.log",mode="r",encoding="utf-8") as f:
+#   _out_text=f.readlines()
+
+
+def get_func_list():
+  func_list=[]
+  _out_text=sys.stdin.readlines()
+  for item in _out_text:
+    key_str='undefined reference to `'
+    index=item.find(key_str)
+    if(index<0):
+      continue
+    index+=len(key_str)
+    index_end=item[index:].find('\'')
+    func=item[index:index+index_end]
+    if not (func in func_list):
+      func_list.append(func)
+  return func_list
+
+
+
+
+# 参数是扫描的目录列表
+if __name__=="__main__":
+  file_list=[]
+  for item in sys.argv[1:]:
+    file_list+=find_type(item,'.c')
+  # file_list=["./dtest/dtest3/kl3_core_mark/core_main.c"]
+  print(f"there is {len(file_list)} .c file.")
+  # func_list=get_func_list()
+  func_list=['main']
+  print(func_list)
+  # find_func_def(['driver/src/hw3/efuse.c'],['efuse_get_d_bg_vbg_cntl'])
+  ret_list,err_list=find_func_def(file_list,func_list)
+  print("已找到的文件")
+  for item in ret_list:
+    print(item)
+  print("分析失败的文件")
+  for item in err_list:
+    print(item)