修复一些符号不识别的问题
This commit is contained in:
		
							
								
								
									
										287
									
								
								lex_c.py
									
									
									
									
									
								
							
							
						
						
									
										287
									
								
								lex_c.py
									
									
									
									
									
								
							| @@ -1,287 +0,0 @@ | |||||||
| import os |  | ||||||
| import sys |  | ||||||
| import dataclasses |  | ||||||
|  |  | ||||||
|  |  | ||||||
| TOKEN_IF = 256, |  | ||||||
| TOKEN_BREAK = 257, |  | ||||||
| TOKEN_WHILE=258, |  | ||||||
| TOKEN_SWITCH=259, |  | ||||||
| TOKEN_CASE=260, |  | ||||||
| TOKEN_DO=261, |  | ||||||
| TOKEN_CHAR=262, |  | ||||||
| TOKEN_INT=263, |  | ||||||
| TOKEN_VOID=264, |  | ||||||
| TOKEN_SYMBOL = 265 , |  | ||||||
| TOKEN_NUM = 266 ,# 数字 |  | ||||||
| TOKEN_INC = 267,# 自增 |  | ||||||
| TOKEN_DEC = 268,# 自减 |  | ||||||
| TOKEN_EQ = 269,# 相等 |  | ||||||
| TOKEN_NEQ = 270,# 不相等 |  | ||||||
| TOKEN_LSH = 271,# 左移 |  | ||||||
| TOKEN_RSH = 272,# 右移 |  | ||||||
| TOKEN_LEQ = 273,# 小于等于 |  | ||||||
| TOKEN_GEQ = 274,# 大于等于 |  | ||||||
| TOKEN_ELSE = 275, |  | ||||||
| TOKEN_CONTINUE = 276 , |  | ||||||
| TOKEN_CONST = 277 , |  | ||||||
| TOKEN_STATIC = 278 , |  | ||||||
| TOKEN_UNSIGNED = 279 , |  | ||||||
| TOKEN_TYPEDEF = 280 , |  | ||||||
| TOKEN_STRUCT = 281 , |  | ||||||
| TOKEN_ENUM = 282 , |  | ||||||
| TOKEN_UNION = 283, |  | ||||||
| TOKEN_STRING = 284, |  | ||||||
| TOKEN_DEFAULT = 285, |  | ||||||
| TOKEN_RETURN = 286, |  | ||||||
| TOKEN_ASSIG_ADD = 287 |  | ||||||
| TOKEN_ASSIG_SUB = 288 |  | ||||||
| TOKEN_ASSIG_MUL = 289 |  | ||||||
| TOKEN_ASSIG_DIV = 290 |  | ||||||
| TOKEN_ASSIG_LSH = 291 |  | ||||||
| TOKEN_ASSIG_RSH = 292 |  | ||||||
| TOKEN_EXTERN = 293 |  | ||||||
| TOKEN_FLOAT = 294 |  | ||||||
| TOKEN_DOUBLE = 295 |  | ||||||
| TOKEN_SHORT = 296 |  | ||||||
| TOKEN_LONG = 297 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def TOKEN(t:str): |  | ||||||
|   return t.encode("utf-8")[0] |  | ||||||
|  |  | ||||||
| _KeyWordTable={ |  | ||||||
|   "if":TOKEN_IF, |  | ||||||
|   "else":TOKEN_ELSE, |  | ||||||
|   "break":TOKEN_BREAK, |  | ||||||
|   "while":TOKEN_WHILE, |  | ||||||
|   "switch":TOKEN_SWITCH, |  | ||||||
|   "case":TOKEN_CASE, |  | ||||||
|   "do":TOKEN_DO, |  | ||||||
|   "char":TOKEN_CHAR, |  | ||||||
|   "int":TOKEN_INT, |  | ||||||
|   "void":TOKEN_VOID, |  | ||||||
|   "continue":TOKEN_CONTINUE, |  | ||||||
|   "const":TOKEN_CONST, |  | ||||||
|   "static":TOKEN_STATIC, |  | ||||||
|   "unisgned":TOKEN_UNSIGNED, |  | ||||||
|   "typedef":TOKEN_TYPEDEF, |  | ||||||
|   "struct":TOKEN_STRUCT, |  | ||||||
|   "enum":TOKEN_ENUM, |  | ||||||
|   "union":TOKEN_UNION, |  | ||||||
|   "default":TOKEN_DEFAULT, |  | ||||||
|   "return":TOKEN_RETURN, |  | ||||||
|   "extern":TOKEN_EXTERN, |  | ||||||
|   "float":TOKEN_FLOAT, |  | ||||||
|   "double":TOKEN_DOUBLE, |  | ||||||
|   "short":TOKEN_SHORT, |  | ||||||
|   "long":TOKEN_LONG, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| _MarkTable={ |  | ||||||
|   "<<":TOKEN_LSH, |  | ||||||
|   ">>":TOKEN_RSH, |  | ||||||
|   "<=":TOKEN_LEQ, |  | ||||||
|   ">=":TOKEN_GEQ, |  | ||||||
|   "!=":TOKEN_NEQ, |  | ||||||
|   "==":TOKEN_EQ, |  | ||||||
|   "++":TOKEN_INC, |  | ||||||
|   "--":TOKEN_DEC, |  | ||||||
|   "+=":TOKEN_ASSIG_ADD, |  | ||||||
|   "-=":TOKEN_ASSIG_SUB, |  | ||||||
|   "*=":TOKEN_ASSIG_MUL, |  | ||||||
|   "<<=":TOKEN_ASSIG_LSH, |  | ||||||
|   ">>=":TOKEN_ASSIG_RSH, |  | ||||||
|   "=":TOKEN("="), |  | ||||||
|   "!":TOKEN("!"), |  | ||||||
|   "<":TOKEN("<"), |  | ||||||
|   ">":TOKEN(">"), |  | ||||||
|   "+":TOKEN("+"), |  | ||||||
|   "-":TOKEN("-"), |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # 是否是数字加字母 |  | ||||||
| def isalnum(num:int): |  | ||||||
|   return bytes([num]).isalnum() |  | ||||||
|  |  | ||||||
| # 是否是数字加字母或下划线 |  | ||||||
| def isalnum_(num:int): |  | ||||||
|   return bytes([num]).isalnum() or num==TOKEN("_") |  | ||||||
|  |  | ||||||
| # 是否是字母 |  | ||||||
| def isalpha(num:int): |  | ||||||
|   return bytes([num]).isalpha() |  | ||||||
|  |  | ||||||
| # 是否是字母或下划线 |  | ||||||
| def isalpha_(num:int): |  | ||||||
|   return bytes([num]).isalpha() or num==TOKEN("_") |  | ||||||
|  |  | ||||||
| # 是否是数字 |  | ||||||
| def isdigit(num:int): |  | ||||||
|   return bytes([num]).isdigit() |  | ||||||
|  |  | ||||||
| # 是否是数字或小数点 |  | ||||||
| def isdigitdot(num:int): |  | ||||||
|   return bytes([num]).isdigit() or num==TOKEN(".") |  | ||||||
|  |  | ||||||
| # 是否是空白字符 包括换行符 |  | ||||||
| def isspace(num:int): |  | ||||||
|   return bytes([num]).isspace() |  | ||||||
|  |  | ||||||
| # 是否是给定字符串之一 |  | ||||||
| def isinstr(num:int,t:str): |  | ||||||
|   c=bytes([num]) |  | ||||||
|   return c in t.encode("utf-8") |  | ||||||
|  |  | ||||||
| # 是否是操作符 |  | ||||||
| def isoperator(num:int): |  | ||||||
|   return isinstr(num,"<>!+-=") |  | ||||||
|  |  | ||||||
| @dataclasses.dataclass |  | ||||||
| class lex_token: |  | ||||||
|     name:str |  | ||||||
|     buff:bytearray |  | ||||||
|     token:int |  | ||||||
|     line:int |  | ||||||
|     pos:int |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class lex_class(object): |  | ||||||
|   def __init__(self,text:bytes) -> None: |  | ||||||
|     self.text=text |  | ||||||
|     self.index=-1 |  | ||||||
|     self.line=1 |  | ||||||
|     self.pos=-1 |  | ||||||
|     self.token_list:list[lex_token]=[] |  | ||||||
|     self.token_buff=bytearray() |  | ||||||
|   def save_char(self,c:int): |  | ||||||
|     self.token_buff.append(c&0xff) |  | ||||||
|   def save_token(self,token:lex_token): |  | ||||||
|     self.token_list.append(token) |  | ||||||
|     self.token_buff=bytearray() |  | ||||||
|   def _get_char(self): |  | ||||||
|     if(self.index<len(self.text)): |  | ||||||
|       c= self.text[self.index] |  | ||||||
|       return c |  | ||||||
|     return -1 |  | ||||||
|   def get_next_char(self): |  | ||||||
|     if not self.is_end(): |  | ||||||
|       self.index+=1 |  | ||||||
|     c= self._get_char() |  | ||||||
|     if(c==b'\n'[0]): |  | ||||||
|       self.line+=1 |  | ||||||
|       self.pos=-1 |  | ||||||
|     else: |  | ||||||
|       self.pos+=1 |  | ||||||
|     return c |  | ||||||
|   def is_end(self): |  | ||||||
|     return self.index>=len(self.text) |  | ||||||
|   def save_one_char_token(self,c:int): |  | ||||||
|     token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos) |  | ||||||
|     self.save_token(token) |  | ||||||
|   def read_name_and_save(self,c:int): |  | ||||||
|     token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos) |  | ||||||
|     self.save_char(c) |  | ||||||
|     while True: |  | ||||||
|       c=self.get_next_char() |  | ||||||
|       if(isalnum_(c)): |  | ||||||
|         self.save_char(c) |  | ||||||
|       else: |  | ||||||
|         break |  | ||||||
|     name=self.token_buff.decode("utf-8") |  | ||||||
|     if(name in _KeyWordTable): |  | ||||||
|       token.token=_KeyWordTable[name] |  | ||||||
|       token.name=name |  | ||||||
|     token.buff=self.token_buff |  | ||||||
|     self.save_token(token) |  | ||||||
|     return c |  | ||||||
|   def read_operator_and_save(self,c:int): |  | ||||||
|     token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos) |  | ||||||
|     self.save_char(c) |  | ||||||
|     while True: |  | ||||||
|       c=self.get_next_char() |  | ||||||
|       if(isoperator(c)): |  | ||||||
|         self.save_char(c) |  | ||||||
|       else: |  | ||||||
|         break |  | ||||||
|     name=self.token_buff.decode("utf-8") |  | ||||||
|     if(name in _MarkTable): |  | ||||||
|       token.token=_MarkTable[name] |  | ||||||
|       token.name=name |  | ||||||
|     else: |  | ||||||
|       raise Exception(f"不存在的操作符 {name} ") |  | ||||||
|     token.buff=self.token_buff |  | ||||||
|     self.save_token(token) |  | ||||||
|     return c |  | ||||||
|   def read_num_and_save(self,c:int): |  | ||||||
|     token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos) |  | ||||||
|     self.save_char(c) |  | ||||||
|     while True: |  | ||||||
|       c=self.get_next_char() |  | ||||||
|       if(isdigitdot(c)): |  | ||||||
|         self.save_char(c) |  | ||||||
|       else: |  | ||||||
|         break |  | ||||||
|     if(self.token_buff.count(b'.')>1): |  | ||||||
|       raise Exception("数字不能包含多个点号") |  | ||||||
|     token.buff=self.token_buff |  | ||||||
|     self.save_token(token) |  | ||||||
|     return c |  | ||||||
|   def read_str_and_save(self,c:int): |  | ||||||
|     c=self.get_next_char() |  | ||||||
|     while c!=b'\"'[0]: |  | ||||||
|       self.save_char(c) |  | ||||||
|       c=self.get_next_char() |  | ||||||
|     self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos)) |  | ||||||
|     return self.get_next_char() |  | ||||||
|  |  | ||||||
| def lex(text:bytes): |  | ||||||
|   lex_obj = lex_class(text) |  | ||||||
|   c=lex_obj.get_next_char() |  | ||||||
|   while not lex_obj.is_end(): |  | ||||||
|     if isalpha_(c): |  | ||||||
|       c=lex_obj.read_name_and_save(c) |  | ||||||
|     elif isinstr(c,"{}[]()~,;:*"): |  | ||||||
|       lex_obj.save_one_char_token(c) |  | ||||||
|       c=lex_obj.get_next_char() |  | ||||||
|     elif isdigit(c): |  | ||||||
|       c=lex_obj.read_num_and_save(c) |  | ||||||
|     elif isspace(c): |  | ||||||
|       c=lex_obj.get_next_char() |  | ||||||
|     elif isoperator(c): |  | ||||||
|       c=lex_obj.read_operator_and_save(c) |  | ||||||
|     elif isinstr(c,"\""): |  | ||||||
|       c=lex_obj.read_str_and_save(c) |  | ||||||
|     elif isinstr(c,"\\"): |  | ||||||
|       c=lex_obj.get_next_char(c) |  | ||||||
|       if(c!=TOKEN("\r") and c!=TOKEN("\n")): |  | ||||||
|         raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}") |  | ||||||
|     elif isinstr(c,"/"): |  | ||||||
|       c=lex_obj.get_next_char() |  | ||||||
|       if(c==TOKEN("/")): |  | ||||||
|         while c!=TOKEN("\n"): |  | ||||||
|           c=lex_obj.get_next_char() |  | ||||||
|       elif(c==TOKEN("*")): |  | ||||||
|         c_old=lex_obj.get_next_char() |  | ||||||
|         c=lex_obj.get_next_char() |  | ||||||
|         while not (c_old==TOKEN("*") and c==TOKEN("/")): |  | ||||||
|           c_old=c |  | ||||||
|           c=lex_obj.get_next_char() |  | ||||||
|         c=lex_obj.get_next_char() |  | ||||||
|       elif(c==TOKEN("=")): |  | ||||||
|         lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos)) |  | ||||||
|         c=lex_obj.get_next_char() |  | ||||||
|       else: |  | ||||||
|         lex_obj.save_one_char_token(TOKEN("/")) |  | ||||||
|     else: |  | ||||||
|       raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}") |  | ||||||
|   # for item in lex_obj.token_list: |  | ||||||
|   #   print(f"{item}") |  | ||||||
|   return lex_obj.token_list |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": |  | ||||||
|   with open("main.c",mode='rb') as f: |  | ||||||
|     lex(f.read()) |  | ||||||
							
								
								
									
										128
									
								
								node_declear.py
									
									
									
									
									
								
							
							
						
						
									
										128
									
								
								node_declear.py
									
									
									
									
									
								
							| @@ -1,128 +0,0 @@ | |||||||
| from lex_c import lex_token |  | ||||||
| import lex_c |  | ||||||
| from parser_c import node |  | ||||||
| from parser_c import node_file |  | ||||||
| from parser_c import node_variable_def |  | ||||||
| from parser_c import node_struct_decl |  | ||||||
| from parser_c import node_struct_def |  | ||||||
| from parser_c import node_union_decl |  | ||||||
| from parser_c import node_union_def |  | ||||||
| from parser_c import node_enum_decl |  | ||||||
| from parser_c import node_enum_def |  | ||||||
| from parser_c import node_func_decl |  | ||||||
| from parser_c import node_typedef |  | ||||||
| from parser_c import node_func_def |  | ||||||
|  |  | ||||||
| from parser_c import find_sentence |  | ||||||
| from parser_c import dist_node_type |  | ||||||
| from parser_c import find_close |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_struct(token_list:list[lex_token]): |  | ||||||
|   if(token_list[0].token==lex_c.TOKEN_STRUCT): |  | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|       if(len(token_list)==2): |  | ||||||
|         return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |  | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |  | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |  | ||||||
|           raise Exception("没有出现预期的符号 '}'") |  | ||||||
|         v_list:list[node_variable_def]=[] |  | ||||||
|         token_list_local=token_list[3:-1] |  | ||||||
|         while len(token_list_local)>0: |  | ||||||
|           sentence=find_sentence(token_list_local) |  | ||||||
|           v_list.append(dist_node_type(token_list=sentence)) |  | ||||||
|           token_list_local=token_list_local[len(sentence):] |  | ||||||
|         return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) |  | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_union(token_list:list[lex_token]): |  | ||||||
|   if(token_list[0].token==lex_c.TOKEN_UNION): |  | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|       if(len(token_list)==2): |  | ||||||
|         return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |  | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |  | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |  | ||||||
|           raise Exception("没有出现预期的符号 '}'") |  | ||||||
|         v_list:list[node_variable_def]=[] |  | ||||||
|         token_list_local=token_list[3:-1] |  | ||||||
|         while len(token_list_local)>0: |  | ||||||
|           sentence=find_sentence(token_list_local) |  | ||||||
|           v_list.append(dist_node_type(token_list=sentence)) |  | ||||||
|           token_list_local=token_list_local[len(sentence):] |  | ||||||
|         return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) |  | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_enum(token_list:list[lex_token]): |  | ||||||
|   if(token_list[0].token==lex_c.TOKEN_ENUM): |  | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|       if(len(token_list)==2): |  | ||||||
|         return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |  | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |  | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |  | ||||||
|           raise Exception("没有出现预期的符号 '}'") |  | ||||||
|         token_list_local=token_list[3:-1] |  | ||||||
|         index=0 |  | ||||||
|         v_list:list[dict]=[] |  | ||||||
|         while len(token_list_local)>0: |  | ||||||
|           if(token_list_local[0].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|             key=token_list_local[0].buff.decode("utf-8") |  | ||||||
|             if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM): |  | ||||||
|               index=int(token_list_local[2].buff.decode("utf-8")) |  | ||||||
|               token_list_local=token_list_local[3:] |  | ||||||
|             else: |  | ||||||
|               index+=1 |  | ||||||
|               token_list_local=token_list_local[1:] |  | ||||||
|             v_list.append({key:index}) |  | ||||||
|           if(len(token_list_local)>0): |  | ||||||
|             if(token_list_local[0].token!=lex_c.TOKEN(",")): |  | ||||||
|               raise Exception(f"枚举类型应该使用 ',' 分隔符") |  | ||||||
|             token_list_local=token_list_local[1:] |  | ||||||
|         return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) |  | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_typedef(token_list:list[lex_token]): |  | ||||||
|   if(token_list[0].token==lex_c.TOKEN_TYPEDEF): |  | ||||||
|     attr=[] |  | ||||||
|     token_list_local=token_list |  | ||||||
|     if(token_list[-1].token!=lex_c.TOKEN_SYMBOL): |  | ||||||
|       raise Exception(f"没有定义新类型 {token_list[-1]}") |  | ||||||
|     name=token_list[-1].buff.decode("utf-8") |  | ||||||
|     token_list=token_list[1:] |  | ||||||
|     while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]: |  | ||||||
|       attr.append(token_list[0].name) |  | ||||||
|       token_list=token_list[1:] |  | ||||||
|     if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION): |  | ||||||
|       attr.append(token_list[0].name) |  | ||||||
|       if(token_list[1].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|         node_r=None |  | ||||||
|         attr.append(token_list[1].buff.decode("utf-8")) |  | ||||||
|         if(token_list[2].token==lex_c.TOKEN("{")): |  | ||||||
|           node_r=dist_node_type(token_list=token_list[1:-1]) |  | ||||||
|         elif(token_list[2].token==lex_c.TOKEN("*")): |  | ||||||
|           attr.append(token_list[2].name) |  | ||||||
|         return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r) |  | ||||||
|     if(token_list[0].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|       # 使用typedef 定义过的自定义类型 |  | ||||||
|       attr.append(token_list[0].buff.decode("utf-8")) |  | ||||||
|       token_list=token_list[1:] |  | ||||||
|     else: |  | ||||||
|       # c语言预设类型 |  | ||||||
|       while(token_list[0].token in |  | ||||||
|         [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT, |  | ||||||
|          lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]): |  | ||||||
|         attr.append(token_list[0].name) |  | ||||||
|         token_list=token_list[1:] |  | ||||||
|     if(len(token_list)>1): |  | ||||||
|       raise Exception(f"意外的token {token_list[0]}") |  | ||||||
|     return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None) |  | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |  | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										789
									
								
								parser_c.py
									
									
									
									
									
								
							
							
						
						
									
										789
									
								
								parser_c.py
									
									
									
									
									
								
							| @@ -1,20 +1,441 @@ | |||||||
| import os | import os | ||||||
| import sys | import sys | ||||||
|  | import shutil | ||||||
| import dataclasses | import dataclasses | ||||||
| from lex_c import lex_token | import copy | ||||||
| from lex_c import lex |  | ||||||
| import lex_c |  | ||||||
|  |  | ||||||
|  | TOKEN_IF = 256, | ||||||
|  | TOKEN_BREAK = 257, | ||||||
|  | TOKEN_WHILE=258, | ||||||
|  | TOKEN_SWITCH=259, | ||||||
|  | TOKEN_CASE=260, | ||||||
|  | TOKEN_DO=261, | ||||||
|  | TOKEN_CHAR=262, | ||||||
|  | TOKEN_INT=263, | ||||||
|  | TOKEN_VOID=264, | ||||||
|  | TOKEN_SYMBOL = 265 , | ||||||
|  | TOKEN_NUM = 266 ,# 数字 | ||||||
|  | TOKEN_INC = 267,# 自增 | ||||||
|  | TOKEN_DEC = 268,# 自减 | ||||||
|  | TOKEN_EQ = 269,# 相等 | ||||||
|  | TOKEN_NEQ = 270,# 不相等 | ||||||
|  | TOKEN_LSH = 271,# 左移 | ||||||
|  | TOKEN_RSH = 272,# 右移 | ||||||
|  | TOKEN_LEQ = 273,# 小于等于 | ||||||
|  | TOKEN_GEQ = 274,# 大于等于 | ||||||
|  | TOKEN_ELSE = 275, | ||||||
|  | TOKEN_CONTINUE = 276 , | ||||||
|  | TOKEN_CONST = 277 , | ||||||
|  | TOKEN_STATIC = 278 , | ||||||
|  | TOKEN_UNSIGNED = 279 , | ||||||
|  | TOKEN_TYPEDEF = 280 , | ||||||
|  | TOKEN_STRUCT = 281 , | ||||||
|  | TOKEN_ENUM = 282 , | ||||||
|  | TOKEN_UNION = 283, | ||||||
|  | TOKEN_STRING = 284, | ||||||
|  | TOKEN_DEFAULT = 285, | ||||||
|  | TOKEN_RETURN = 286, | ||||||
|  | TOKEN_ASSIG_ADD = 287, | ||||||
|  | TOKEN_ASSIG_SUB = 288, | ||||||
|  | TOKEN_ASSIG_MUL = 289, | ||||||
|  | TOKEN_ASSIG_DIV = 290, | ||||||
|  | TOKEN_ASSIG_LSH = 291, | ||||||
|  | TOKEN_ASSIG_RSH = 292, | ||||||
|  | TOKEN_EXTERN = 293, | ||||||
|  | TOKEN_FLOAT = 294, | ||||||
|  | TOKEN_DOUBLE = 295, | ||||||
|  | TOKEN_SHORT = 296, | ||||||
|  | TOKEN_LONG = 297, | ||||||
|  | TOKEN_POINTER = 298, | ||||||
|  | TOKEN_LOGICAL_OR = 299,# 逻辑或 | ||||||
|  | TOKEN_LOGICAL_AND = 300,# 逻辑与 | ||||||
|  | TOKEN_OMIT = 301,# 省略符 ... | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def TOKEN(t:str): | ||||||
|  |   return t.encode("utf-8")[0] | ||||||
|  |  | ||||||
|  | _KeyWordTable={ | ||||||
|  |   "if":TOKEN_IF, | ||||||
|  |   "else":TOKEN_ELSE, | ||||||
|  |   "break":TOKEN_BREAK, | ||||||
|  |   "while":TOKEN_WHILE, | ||||||
|  |   "switch":TOKEN_SWITCH, | ||||||
|  |   "case":TOKEN_CASE, | ||||||
|  |   "do":TOKEN_DO, | ||||||
|  |   "char":TOKEN_CHAR, | ||||||
|  |   "int":TOKEN_INT, | ||||||
|  |   "void":TOKEN_VOID, | ||||||
|  |   "continue":TOKEN_CONTINUE, | ||||||
|  |   "const":TOKEN_CONST, | ||||||
|  |   "static":TOKEN_STATIC, | ||||||
|  |   "unisgned":TOKEN_UNSIGNED, | ||||||
|  |   "typedef":TOKEN_TYPEDEF, | ||||||
|  |   "struct":TOKEN_STRUCT, | ||||||
|  |   "enum":TOKEN_ENUM, | ||||||
|  |   "union":TOKEN_UNION, | ||||||
|  |   "default":TOKEN_DEFAULT, | ||||||
|  |   "return":TOKEN_RETURN, | ||||||
|  |   "extern":TOKEN_EXTERN, | ||||||
|  |   "float":TOKEN_FLOAT, | ||||||
|  |   "double":TOKEN_DOUBLE, | ||||||
|  |   "short":TOKEN_SHORT, | ||||||
|  |   "long":TOKEN_LONG, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | _MarkTable={ | ||||||
|  |   "<<":TOKEN_LSH, | ||||||
|  |   ">>":TOKEN_RSH, | ||||||
|  |   "<=":TOKEN_LEQ, | ||||||
|  |   ">=":TOKEN_GEQ, | ||||||
|  |   "!=":TOKEN_NEQ, | ||||||
|  |   "==":TOKEN_EQ, | ||||||
|  |   "++":TOKEN_INC, | ||||||
|  |   "--":TOKEN_DEC, | ||||||
|  |   "->":TOKEN_POINTER, | ||||||
|  |   "+=":TOKEN_ASSIG_ADD, | ||||||
|  |   "-=":TOKEN_ASSIG_SUB, | ||||||
|  |   "*=":TOKEN_ASSIG_MUL, | ||||||
|  |   "<<=":TOKEN_ASSIG_LSH, | ||||||
|  |   ">>=":TOKEN_ASSIG_RSH, | ||||||
|  |   "=":TOKEN("="), | ||||||
|  |   "!":TOKEN("!"), | ||||||
|  |   "<":TOKEN("<"), | ||||||
|  |   ">":TOKEN(">"), | ||||||
|  |   "+":TOKEN("+"), | ||||||
|  |   "-":TOKEN("-"), | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # 是否是数字加字母 | ||||||
|  | def isalnum(num:int): | ||||||
|  |   return bytes([num]).isalnum() | ||||||
|  |  | ||||||
|  | # 是否是数字加字母或下划线 | ||||||
|  | def isalnum_(num:int): | ||||||
|  |   return bytes([num]).isalnum() or num==TOKEN("_") | ||||||
|  |  | ||||||
|  | # 是否是字母 | ||||||
|  | def isalpha(num:int): | ||||||
|  |   return bytes([num]).isalpha() | ||||||
|  |  | ||||||
|  | # 是否是字母或下划线 | ||||||
|  | def isalpha_(num:int): | ||||||
|  |   return bytes([num]).isalpha() or num==TOKEN("_") | ||||||
|  |  | ||||||
|  | # 是否是数字 | ||||||
|  | def isdigit(num:int): | ||||||
|  |   return bytes([num]).isdigit() | ||||||
|  |  | ||||||
|  | # 是否是数字或小数点 | ||||||
|  | def isdigitdot(num:int): | ||||||
|  |   return bytes([num]).isdigit() or num==TOKEN(".") | ||||||
|  |  | ||||||
|  | # 是否是空白字符 包括换行符 | ||||||
|  | def isspace(num:int): | ||||||
|  |   return bytes([num]).isspace() | ||||||
|  |  | ||||||
|  | # 是否是给定字符串之一 | ||||||
|  | def isinstr(num:int,t:str): | ||||||
|  |   c=bytes([num]) | ||||||
|  |   return c in t.encode("utf-8") | ||||||
|  |  | ||||||
|  | # 是否是操作符 | ||||||
|  | def isoperator(num:int): | ||||||
|  |   return isinstr(num,"<>!+-=") | ||||||
|  |  | ||||||
|  | @dataclasses.dataclass | ||||||
|  | class lex_token: | ||||||
|  |     name:str | ||||||
|  |     buff:bytearray | ||||||
|  |     token:int | ||||||
|  |     line:int | ||||||
|  |     pos:int | ||||||
|  |  | ||||||
|  | # 连写的操作符,这些实际上是多个操作符写在一起的结果 | ||||||
|  | _NotMarkTable={ | ||||||
|  |   "!!":[lex_token("operator",'!',TOKEN('!'),0,0), | ||||||
|  |         lex_token("operator",'!',TOKEN('!'),0,0)], | ||||||
|  |   "=-":[lex_token("operator",'=',TOKEN('='),0,0), | ||||||
|  |         lex_token("operator",'-',TOKEN('-'),0,0)], | ||||||
|  |   "--=":[lex_token("operator",'--',TOKEN_DEC,0,0), | ||||||
|  |         lex_token("operator",'=',TOKEN('='),0,0)], | ||||||
|  |   "++=":[lex_token("operator",'++',TOKEN_INC,0,0), | ||||||
|  |         lex_token("operator",'=',TOKEN('='),0,0)], | ||||||
|  |   "=--":[lex_token("operator",'=',TOKEN('='),0,0), | ||||||
|  |         lex_token("operator",'--',TOKEN_DEC,0,0)], | ||||||
|  |   "=++":[lex_token("operator",'=',TOKEN('='),0,0), | ||||||
|  |         lex_token("operator",'++',TOKEN_INC,0,0)], | ||||||
|  |   "!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0), | ||||||
|  |         lex_token("operator",'--',TOKEN_DEC,0,0)], | ||||||
|  |   "!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0), | ||||||
|  |         lex_token("operator",'++',TOKEN_INC,0,0)], | ||||||
|  |   "==--":[lex_token("operator",'==',TOKEN_EQ,0,0), | ||||||
|  |         lex_token("operator",'--',TOKEN_DEC,0,0)], | ||||||
|  |   "==++":[lex_token("operator",'==',TOKEN_EQ,0,0), | ||||||
|  |         lex_token("operator",'++',TOKEN_INC,0,0)], | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class lex_class(object): | ||||||
|  |   def __init__(self,text:bytes,file_name:str="") -> None: | ||||||
|  |     self.text=text | ||||||
|  |     self.index=-1 | ||||||
|  |     self.line=1 | ||||||
|  |     self.pos=-1 | ||||||
|  |     self.token_list:list[lex_token]=[] | ||||||
|  |     self.token_buff=bytearray() | ||||||
|  |     self.file_name=file_name | ||||||
|  |     self.macro_table={} | ||||||
|  |   def save_char(self,c:int): | ||||||
|  |     self.token_buff.append(c&0xff) | ||||||
|  |   def save_token(self,token:lex_token): | ||||||
|  |     self.token_list.append(token) | ||||||
|  |     self.token_buff=bytearray() | ||||||
|  |   def _get_char(self): | ||||||
|  |     if(self.index<len(self.text)): | ||||||
|  |       c= self.text[self.index] | ||||||
|  |       return c | ||||||
|  |     return -1 | ||||||
|  |   def get_next_char(self): | ||||||
|  |     if not self.is_end(): | ||||||
|  |       self.index+=1 | ||||||
|  |     c= self._get_char() | ||||||
|  |     if(c==b'\n'[0]): | ||||||
|  |       self.line+=1 | ||||||
|  |       self.pos=-1 | ||||||
|  |     else: | ||||||
|  |       self.pos+=1 | ||||||
|  |     return c | ||||||
|  |   def is_end(self): | ||||||
|  |     return self.index>=len(self.text) | ||||||
|  |   def save_one_char_token(self,c:int): | ||||||
|  |     token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos) | ||||||
|  |     self.save_token(token) | ||||||
|  |   def read_name_and_save(self,c:int): | ||||||
|  |     token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos) | ||||||
|  |     self.save_char(c) | ||||||
|  |     while True: | ||||||
|  |       c=self.get_next_char() | ||||||
|  |       if(isalnum_(c)): | ||||||
|  |         self.save_char(c) | ||||||
|  |       else: | ||||||
|  |         break | ||||||
|  |     name=self.token_buff.decode("utf-8") | ||||||
|  |     if(name in _KeyWordTable): | ||||||
|  |       token.token=_KeyWordTable[name] | ||||||
|  |       token.name=name | ||||||
|  |     token.buff=self.token_buff | ||||||
|  |     self.save_token(token) | ||||||
|  |     return c | ||||||
|  |   def read_operator_and_save(self,c:int): | ||||||
|  |     token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos) | ||||||
|  |     self.save_char(c) | ||||||
|  |     while True: | ||||||
|  |       c=self.get_next_char() | ||||||
|  |       if(isoperator(c)): | ||||||
|  |         self.save_char(c) | ||||||
|  |       else: | ||||||
|  |         break | ||||||
|  |     name=self.token_buff.decode("utf-8") | ||||||
|  |     if(name in _MarkTable): | ||||||
|  |       token.token=_MarkTable[name] | ||||||
|  |       token.name=name | ||||||
|  |       token.buff=self.token_buff | ||||||
|  |       self.save_token(token) | ||||||
|  |     elif(name in _NotMarkTable): | ||||||
|  |       tokens=_NotMarkTable[name] | ||||||
|  |       for t in tokens: | ||||||
|  |         token.token=t.token | ||||||
|  |         token.name=t.name | ||||||
|  |         token.buff=token.name.encode("utf-8") | ||||||
|  |         self.save_token(copy.deepcopy(token)) | ||||||
|  |         token.pos+=len(token.name) | ||||||
|  |     else: | ||||||
|  |       raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}") | ||||||
|  |       # print(f"不存在的操作符 {name} ") | ||||||
|  |     return c | ||||||
|  |   def read_num_and_save(self,c:int): | ||||||
|  |     token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos) | ||||||
|  |     self.save_char(c) | ||||||
|  |     while True: | ||||||
|  |       c=self.get_next_char() | ||||||
|  |       if(isdigitdot(c)): | ||||||
|  |         self.save_char(c) | ||||||
|  |       else: | ||||||
|  |         break | ||||||
|  |     if(self.token_buff.count(b'.')>1): | ||||||
|  |       raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}") | ||||||
|  |     token.buff=self.token_buff | ||||||
|  |     self.save_token(token) | ||||||
|  |     return c | ||||||
|  |   _escape_table={'0':0,'a':7,'b':8,'t':9,'n':10,'v':11,'f':12,'r':13,'"':34,'\'':39,'?':63,'\\':92} | ||||||
|  |   def read_str_and_save(self,c:int): | ||||||
|  |     c=self.get_next_char() | ||||||
|  |     while c!=b'\"'[0]: | ||||||
|  |       if(c==TOKEN('\\')):# \ | ||||||
|  |         c=self.get_next_char() | ||||||
|  |         self.save_char(self._escape_table.get(c,0)) | ||||||
|  |       else: | ||||||
|  |         self.save_char(c) | ||||||
|  |       c=self.get_next_char() | ||||||
|  |     self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos)) | ||||||
|  |     return self.get_next_char() | ||||||
|  |   def read_char_and_save(self,c:int): | ||||||
|  |     c=self.get_next_char() | ||||||
|  |     while c!=b'\''[0]: | ||||||
|  |       if(c==TOKEN('\\')):# \ | ||||||
|  |         c=self.get_next_char() | ||||||
|  |         self.save_char(self._escape_table.get(c,0)) | ||||||
|  |       else: | ||||||
|  |         self.save_char(c) | ||||||
|  |       c=self.get_next_char() | ||||||
|  |     self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos)) | ||||||
|  |     return self.get_next_char() | ||||||
|  |   def deal_macro(self,buff:bytearray): | ||||||
|  |     self.macro_result=False | ||||||
|  |     sp=buff.decode('utf-8').split() | ||||||
|  |     if(len(sp)>0): | ||||||
|  |       if(sp[0]=='#define'): | ||||||
|  |         if(len(sp)>=3): | ||||||
|  |           if not (sp[1] in self.macro_table): | ||||||
|  |             self.macro_table[sp[1]]=' '.join(sp[2:]) | ||||||
|  |         else: | ||||||
|  |           if not (sp[1] in self.macro_table): | ||||||
|  |             self.macro_table[sp[1]]="" | ||||||
|  |       elif(sp[0]=='#ifdef'): | ||||||
|  |         self.macro_result= (sp[1] in self.macro_table) | ||||||
|  |         return self.macro_result | ||||||
|  |       elif(sp[0]=='#if'): | ||||||
|  |         t=' '.join(sp[1:])# 判断条件比较复杂,暂时固定返回失败 | ||||||
|  |         return self.macro_result | ||||||
|  |       elif(sp[0]=='#elif'): | ||||||
|  |         return self.macro_result | ||||||
|  |       elif(sp[0]=='#else'): | ||||||
|  |         self.macro_result= not self.macro_result | ||||||
|  |         return self.macro_result | ||||||
|  |       elif(sp[0]=='#endif'): | ||||||
|  |         return True | ||||||
|  |       else: | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  | def lex(text:bytes,file_name:str=""): | ||||||
|  |   lex_obj = lex_class(text,file_name) | ||||||
|  |   c=lex_obj.get_next_char() | ||||||
|  |   line_old=0 | ||||||
|  |   pos_old=0 | ||||||
|  |   while not lex_obj.is_end(): | ||||||
|  |     line_old=lex_obj.line | ||||||
|  |     pos_old=lex_obj.pos | ||||||
|  |     if isalpha_(c): | ||||||
|  |       c=lex_obj.read_name_and_save(c) | ||||||
|  |     elif isinstr(c,"{}[]()~,;:*?%^"): | ||||||
|  |       lex_obj.save_one_char_token(c) | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |     elif isdigit(c): | ||||||
|  |       c=lex_obj.read_num_and_save(c) | ||||||
|  |     elif isspace(c): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |     elif isoperator(c): | ||||||
|  |       c=lex_obj.read_operator_and_save(c) | ||||||
|  |     elif isinstr(c,"\""): | ||||||
|  |       c=lex_obj.read_str_and_save(c) | ||||||
|  |     elif isinstr(c,"\'"): | ||||||
|  |       c=lex_obj.read_char_and_save(c) | ||||||
|  |     elif isinstr(c,"\\"): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |       if(c!=TOKEN("\r") and c!=TOKEN("\n")): | ||||||
|  |         raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") | ||||||
|  |     elif isinstr(c,"#"): # 宏定义 | ||||||
|  |       c_old=c | ||||||
|  |       buff=bytearray() | ||||||
|  |       while (c!=TOKEN("\n") and c!=-1): | ||||||
|  |         c=lex_obj.get_next_char() | ||||||
|  |         if(c_old==TOKEN('/') and c==TOKEN('*')):# 适配宏后面有注释的情况 | ||||||
|  |           while not (c_old==TOKEN("*") and c==TOKEN("/")) or c==-1: | ||||||
|  |             c_old=c | ||||||
|  |             c=lex_obj.get_next_char() | ||||||
|  |         elif(c_old==TOKEN('/') and c==TOKEN('/')): | ||||||
|  |           while not (c==TOKEN('\n') or c==-1): | ||||||
|  |             c=lex_obj.get_next_char() | ||||||
|  |         elif(c_old==TOKEN('\\') and c in [TOKEN('\n'),TOKEN('\r')]):# 适配多行 | ||||||
|  |           c=lex_obj.get_next_char() | ||||||
|  |         else: | ||||||
|  |           buff.append(c_old&0xff) | ||||||
|  |         c_old=c | ||||||
|  |       if not (lex_obj.deal_macro(buff)): # 处理宏 | ||||||
|  |         is_space=True | ||||||
|  |         while True: | ||||||
|  |           c=lex_obj.get_next_char() | ||||||
|  |           if(is_space and c==TOKEN('#')): | ||||||
|  |             break | ||||||
|  |           if(c==-1): | ||||||
|  |             break | ||||||
|  |           if not isspace(c): | ||||||
|  |             is_space=False | ||||||
|  |           elif(c==TOKEN('\n')): | ||||||
|  |             is_space=True | ||||||
|  |     elif isinstr(c,"/"): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |       if(c==TOKEN("/")): | ||||||
|  |         while (c!=TOKEN("\n") and c!=-1): | ||||||
|  |           c=lex_obj.get_next_char() | ||||||
|  |       elif(c==TOKEN("*")): | ||||||
|  |         c_old=lex_obj.get_next_char() | ||||||
|  |         c=lex_obj.get_next_char() | ||||||
|  |         while not (c_old==TOKEN("*") and c==TOKEN("/")): | ||||||
|  |           c_old=c | ||||||
|  |           c=lex_obj.get_next_char() | ||||||
|  |         c=lex_obj.get_next_char() | ||||||
|  |       elif(c==TOKEN("=")): | ||||||
|  |         lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos)) | ||||||
|  |         c=lex_obj.get_next_char() | ||||||
|  |       else: | ||||||
|  |         lex_obj.save_one_char_token(TOKEN("/")) | ||||||
|  |     elif isinstr(c,"|"): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |       if(c==TOKEN("|")): | ||||||
|  |         lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos)) | ||||||
|  |       else: | ||||||
|  |         lex_obj.save_one_char_token(TOKEN("|")) | ||||||
|  |     elif isinstr(c,"&"): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |       if(c==TOKEN("&")): | ||||||
|  |         lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos)) | ||||||
|  |       else: | ||||||
|  |         lex_obj.save_one_char_token(TOKEN("&")) | ||||||
|  |     elif isinstr(c,'.'): | ||||||
|  |       c=lex_obj.get_next_char() | ||||||
|  |       if(c==TOKEN('.')): | ||||||
|  |         c=lex_obj.get_next_char() | ||||||
|  |         if(c==TOKEN('.')): | ||||||
|  |           lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos)) | ||||||
|  |         else: | ||||||
|  |           raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") | ||||||
|  |       else: | ||||||
|  |         lex_obj.save_one_char_token(TOKEN(".")) | ||||||
|  |     else: | ||||||
|  |       raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") | ||||||
|  |       # c=lex_obj.get_next_char() | ||||||
|  |     # if(line_old==lex_obj.line and pos_old==lex_obj.pos): | ||||||
|  |     #   print(f"pointer not move.") | ||||||
|  |     # print(line_old,pos_old) | ||||||
|  |   # for item in lex_obj.token_list: | ||||||
|  |   #   print(f"{item}") | ||||||
|  |   return lex_obj.token_list | ||||||
|  |  | ||||||
| _NodeTypeTable=[ |  | ||||||
|   "file","vdecl","fdef" |  | ||||||
| ] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @dataclasses.dataclass | @dataclasses.dataclass | ||||||
| class node: | class node: | ||||||
|   name:list[str]=dataclasses.field(default_factory=list) |   name:list=dataclasses.field(default_factory=list) | ||||||
|   type:str="base" |   type:str="base" | ||||||
|   token_list:list[lex_token]=dataclasses.field(default_factory=list) |   token_list:list=dataclasses.field(default_factory=list) | ||||||
|   child:list=dataclasses.field(default_factory=list) |   child:list=dataclasses.field(default_factory=list) | ||||||
|   def complite(self): |   def complite(self): | ||||||
|     print(f"complite {self.type}") |     print(f"complite {self.type}") | ||||||
| @@ -125,7 +546,7 @@ class node_int(node): | |||||||
|  |  | ||||||
|  |  | ||||||
| # 找到闭合的括号 | # 找到闭合的括号 | ||||||
| def find_close(token_list:list[lex_token],token:tuple[int,int]): | def find_close(token_list:list,token:tuple): | ||||||
|   if token_list[0].token!=token[0]: |   if token_list[0].token!=token[0]: | ||||||
|     return 0 |     return 0 | ||||||
|   num=0 |   num=0 | ||||||
| @@ -136,20 +557,33 @@ def find_close(token_list:list[lex_token],token:tuple[int,int]): | |||||||
|       num-=1 |       num-=1 | ||||||
|     if(num==0): |     if(num==0): | ||||||
|       return index |       return index | ||||||
|   raise Exception(f"没有找到闭合的符号 {token[1]}") |   raise Exception(f"没有找到闭合的符号 {token_list[0]}") | ||||||
|  |  | ||||||
|  | # 找到指定token的index | ||||||
|  | def find_token(token_list:list,token:int): | ||||||
|  |   num=0 | ||||||
|  |   for index,item in enumerate(token_list): | ||||||
|  |     if(item.token!=token): | ||||||
|  |       num+=1 | ||||||
|  |     else: | ||||||
|  |       return num | ||||||
|  |   return num | ||||||
|  |  | ||||||
|  |  | ||||||
| # 找到一个完整的语句 | # 找到一个完整的语句 | ||||||
| def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]): | def find_sentence(token_list:list,sep:list=[TOKEN(";"),TOKEN(":")]): | ||||||
|   bracket_flag=False |   bracket_flag=False | ||||||
|   index=0 |   index=0 | ||||||
|  |   if(len(token_list)==1): | ||||||
|  |     return token_list | ||||||
|   while index<len(token_list): |   while index<len(token_list): | ||||||
|     if(token_list[index].token==lex_c.TOKEN("(")): |     if(token_list[index].token==TOKEN("(")): | ||||||
|       bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")"))) |       bracket_index=find_close(token_list[index:],(TOKEN("("),TOKEN(")"))) | ||||||
|       if(bracket_index>0): |       if(bracket_index>0): | ||||||
|         bracket_flag=True |         bracket_flag=True | ||||||
|         index+=bracket_index |         index+=bracket_index | ||||||
|     elif(token_list[index].token==lex_c.TOKEN("{")): |     elif(token_list[index].token==TOKEN("{")): | ||||||
|       bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}"))) |       bracket_index=find_close(token_list[index:],(TOKEN("{"),TOKEN("}"))) | ||||||
|       if(bracket_index>0): |       if(bracket_index>0): | ||||||
|         index+=bracket_index |         index+=bracket_index | ||||||
|         if(bracket_flag==True): |         if(bracket_flag==True): | ||||||
| @@ -157,7 +591,7 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex | |||||||
|     elif(token_list[index].token in sep): |     elif(token_list[index].token in sep): | ||||||
|       return token_list[:index+1] |       return token_list[:index+1] | ||||||
|     index+=1 |     index+=1 | ||||||
|   raise Exception(f"没有找到完整的语句") |   raise Exception(f"没有找到完整的语句 sep={sep} token={token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -171,140 +605,162 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_struct(token_list:list[lex_token]): | def dist_node_type_struct(token_list:list): | ||||||
|   if(token_list[0].token==lex_c.TOKEN_STRUCT): |   if(token_list[0].token==TOKEN_STRUCT): | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |     if(token_list[1].token==TOKEN_SYMBOL): | ||||||
|       if(len(token_list)==2): |       if(len(token_list)==2): | ||||||
|         return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |         return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |       elif(token_list[2].token==TOKEN("{")): | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |         # if not token_list[-1].token==TOKEN("}"): | ||||||
|           raise Exception("没有出现预期的符号 '}'") |         #   raise Exception("没有出现预期的符号 '}'") | ||||||
|         v_list:list[node_variable_def]=[] |         # v_list:list[node_variable_def]=[] | ||||||
|         token_list_local=token_list[3:-1] |         # token_list_local=token_list[3:-1] | ||||||
|         while len(token_list_local)>0: |         # while len(token_list_local)>0: | ||||||
|           sentence=find_sentence(token_list_local) |         #   sentence=find_sentence(token_list_local) | ||||||
|           v_list.append(dist_node_type(token_list=sentence)) |         #   v_list.append(dist_node_type(token_list=sentence)) | ||||||
|           token_list_local=token_list_local[len(sentence):] |         #   token_list_local=token_list_local[len(sentence):] | ||||||
|         return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list) |         return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) | ||||||
|  |       else: | ||||||
|  |         return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) | ||||||
|  |   if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))): | ||||||
|  |     # 函数定义 | ||||||
|  |     return dist_node_type_funcdef(token_list=token_list) | ||||||
|  |  | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |   raise Exception(f"语法错误 {token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_union(token_list:list[lex_token]): | def dist_node_type_union(token_list:list): | ||||||
|   if(token_list[0].token==lex_c.TOKEN_UNION): |   if(token_list[0].token==TOKEN_UNION): | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |     if(token_list[1].token==TOKEN_SYMBOL): | ||||||
|       if(len(token_list)==2): |       if(len(token_list)==2): | ||||||
|         return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |         return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |       elif(token_list[2].token==TOKEN("{")): | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |         # if not token_list[-1].token==TOKEN("}"): | ||||||
|           raise Exception("没有出现预期的符号 '}'") |         #   raise Exception("没有出现预期的符号 '}'") | ||||||
|         v_list:list[node_variable_def]=[] |         # v_list:list[node_variable_def]=[] | ||||||
|         token_list_local=token_list[3:-1] |         # token_list_local=token_list[3:-1] | ||||||
|         while len(token_list_local)>0: |         # while len(token_list_local)>0: | ||||||
|           sentence=find_sentence(token_list_local) |         #   sentence=find_sentence(token_list_local) | ||||||
|           v_list.append(dist_node_type(token_list=sentence)) |         #   v_list.append(dist_node_type(token_list=sentence)) | ||||||
|           token_list_local=token_list_local[len(sentence):] |         #   token_list_local=token_list_local[len(sentence):] | ||||||
|         return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list) |         return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) | ||||||
|  |   if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))): | ||||||
|  |     # 函数定义 | ||||||
|  |     return dist_node_type_funcdef(token_list=token_list) | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |   raise Exception(f"语法错误 {token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_enum(token_list:list[lex_token]): | def dist_node_type_enum(token_list:list): | ||||||
|   if(token_list[0].token==lex_c.TOKEN_ENUM): |   if(token_list[0].token==TOKEN_ENUM): | ||||||
|     if(token_list[1].token==lex_c.TOKEN_SYMBOL): |     if(token_list[1].token==TOKEN_SYMBOL): | ||||||
|       if(len(token_list)==2): |       if(len(token_list)==2): | ||||||
|         return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) |         return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) | ||||||
|       elif(token_list[2].token==lex_c.TOKEN("{")): |       elif(token_list[2].token==TOKEN("{")): | ||||||
|         if not token_list[-1].token==lex_c.TOKEN("}"): |         # if not token_list[-1].token==TOKEN("}"): | ||||||
|           raise Exception("没有出现预期的符号 '}'") |         #   raise Exception("没有出现预期的符号 '}'") | ||||||
|         # token_list_local=token_list[3:-1] |         # token_list_local=token_list[3:-1] | ||||||
|         return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) |         return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[]) | ||||||
|  |     elif(token_list[1].token==TOKEN("{")): | ||||||
|  |       # if not token_list[-1].token==TOKEN("}"): | ||||||
|  |       #   raise Exception("没有出现预期的符号 '}'") | ||||||
|  |       # token_list_local=token_list[3:-1] | ||||||
|  |       return node_enum_def(name='unnamed_enum',token_list=token_list,child=[]) | ||||||
|  |   if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))): | ||||||
|  |     # 函数定义 | ||||||
|  |     return dist_node_type_funcdef(token_list=token_list) | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |   raise Exception(f"语法错误 {token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_typedef(token_list:list[lex_token]): | def dist_node_type_typedef(token_list:list): | ||||||
|   if(token_list[0].token==lex_c.TOKEN_TYPEDEF): |   if(token_list[0].token==TOKEN_TYPEDEF): | ||||||
|     attr=[] |     attr=[] | ||||||
|     token_list_local=token_list |     token_list_local=token_list | ||||||
|     if(token_list[-1].token!=lex_c.TOKEN_SYMBOL): |     # if(token_list[-1].token!=TOKEN_SYMBOL): | ||||||
|       raise Exception(f"没有定义新类型 {token_list[-1]}") |     #   raise Exception(f"没有定义新类型 {token_list[-1]}") | ||||||
|     name=token_list[-1].buff.decode("utf-8") |     name=token_list[-1].buff.decode("utf-8") | ||||||
|     token_list=token_list[1:] |     token_list=token_list[1:] | ||||||
|     while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]: |     while token_list[0].token in [TOKEN_UNSIGNED,TOKEN_CONST]: | ||||||
|       attr.append(token_list[0].name) |       attr.append(token_list[0].name) | ||||||
|       token_list=token_list[1:] |       token_list=token_list[1:] | ||||||
|     if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION): |     if(token_list[0].token==TOKEN_STRUCT or token_list[0].token==TOKEN_UNION): | ||||||
|       attr.append(token_list[0].name) |       attr.append(token_list[0].name) | ||||||
|       if(token_list[1].token==lex_c.TOKEN_SYMBOL): |       if(token_list[1].token==TOKEN_SYMBOL): | ||||||
|         node_r=None |         node_r=None | ||||||
|         attr.append(token_list[1].buff.decode("utf-8")) |         attr.append(token_list[1].buff.decode("utf-8")) | ||||||
|         if(token_list[2].token==lex_c.TOKEN("{")): |         if(token_list[2].token==TOKEN("{")): | ||||||
|           node_r=dist_node_type(token_list=token_list[1:-1]) |           node_r=dist_node_type(token_list=token_list[1:-1]) | ||||||
|         elif(token_list[2].token==lex_c.TOKEN("*")): |         elif(token_list[2].token==TOKEN("*")): | ||||||
|           attr.append(token_list[2].name) |           attr.append(token_list[2].name) | ||||||
|         return node_typedef(name=name,token_list=token_list_local,child=node_r) |         return node_typedef(name=name,token_list=token_list_local,child=[]) | ||||||
|     if(token_list[0].token==lex_c.TOKEN_SYMBOL): |     if(token_list[0].token==TOKEN_SYMBOL): | ||||||
|       # 使用typedef 定义过的自定义类型 |       # 使用typedef 定义过的自定义类型 | ||||||
|       attr.append(token_list[0].buff.decode("utf-8")) |       attr.append(token_list[0].buff.decode("utf-8")) | ||||||
|       token_list=token_list[1:] |       token_list=token_list[1:] | ||||||
|     else: |     else: | ||||||
|       # c语言预设类型 |       # c语言预设类型 | ||||||
|       while(token_list[0].token in |       while(token_list[0].token in | ||||||
|         [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT, |         [TOKEN_INT,TOKEN_CHAR,TOKEN_SHORT,TOKEN_LONG,TOKEN_FLOAT, | ||||||
|          lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]): |          TOKEN_DOUBLE,TOKEN_VOID,TOKEN("*")]): | ||||||
|         attr.append(token_list[0].name) |         attr.append(token_list[0].name) | ||||||
|         token_list=token_list[1:] |         token_list=token_list[1:] | ||||||
|     if(len(token_list)>1): |     # if(len(token_list)>1): | ||||||
|       raise Exception(f"意外的token {token_list[0]}") |     #   raise Exception(f"意外的token {token_list[0]}") | ||||||
|     return node_typedef(name=name,token_list=token_list_local,child=[]) |     return node_typedef(name=name,token_list=token_list_local,child=[]) | ||||||
|   raise Exception(f"语法错误 {token_list[0]}") |   raise Exception(f"语法错误 {token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # 找到子节点 | # 找到子节点 | ||||||
| def find_child(token_list:list[lex_token],seq:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]): | def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]): | ||||||
|   child=[] |   child=[] | ||||||
|  |   token_list_local=[] | ||||||
|   for i in range(len(token_list)): |   for i in range(len(token_list)): | ||||||
|     if(token_list[i].token==lex_c.TOKEN("{")): |     if(token_list[i].token==TOKEN("{")): | ||||||
|       token_list_local=token_list[i+1:-1] |       token_list_local=token_list[i+1:-1] | ||||||
|       break |       break | ||||||
|   while len(token_list_local)>0: |   while len(token_list_local)>0: | ||||||
|     sentence=find_sentence(token_list_local,seq) |     sentence=find_sentence(token_list_local,seq) | ||||||
|     node_d=dist_node_type(sentence) |     node_d=dist_node_type(sentence) | ||||||
|  |     if not node_d is None: | ||||||
|       child.append(node_d) |       child.append(node_d) | ||||||
|     token_list_local=token_list_local[len(sentence):] |     token_list_local=token_list_local[len(sentence):] | ||||||
|   return child |   return child | ||||||
|  |  | ||||||
|  |  | ||||||
| def dist_node_type_funcdef(token_list:list[lex_token]): | def dist_node_type_funcdef(token_list:list): | ||||||
|   for i in range(len(token_list)): |   for i in range(len(token_list)): | ||||||
|     if(token_list[i].token==lex_c.TOKEN_SYMBOL): |     if(token_list[i].token==TOKEN('(')): | ||||||
|       name=token_list[i].buff.decode("utf-8") |       name=token_list[i-1].buff.decode("utf-8") | ||||||
|       break |       break | ||||||
|   return node_func_def(name=[name],token_list=token_list,child=find_child(token_list)) |   # return node_func_def(name=[name],token_list=token_list,child=find_child(token_list)) | ||||||
|  |   return node_func_def(name=[name],token_list=token_list,child=[]) | ||||||
|  |  | ||||||
| def dist_node_type_funcdecl(token_list:list[lex_token]): | def dist_node_type_funcdecl(token_list:list): | ||||||
|   for i in range(len(token_list)): |   for i in range(len(token_list)): | ||||||
|     if(token_list[i].token==lex_c.TOKEN_SYMBOL): |     if(token_list[i].token==TOKEN_SYMBOL): | ||||||
|       name=token_list[i].buff.decode("utf-8") |       name=token_list[i].buff.decode("utf-8") | ||||||
|       return node_func_decl(name=[name],token_list=token_list,child=[]) |       return node_func_decl(name=[name],token_list=token_list,child=[]) | ||||||
|   raise Exception(f"函数声明格式错误 {token_list[0]}") |   raise Exception(f"函数声明格式错误 {token_list[0]}") | ||||||
|  |  | ||||||
|  |  | ||||||
| # 第一个token是symbol的处理 | # 第一个token是symbol的处理 | ||||||
| def dist_node_type_symbol(token_list:list[lex_token]): | def dist_node_type_symbol(token_list:list): | ||||||
|   # 变量赋值或函数调用 |   # 变量赋值或函数调用 | ||||||
|   if(len(token_list)==1): |   if(len(token_list)==1): | ||||||
|     return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list) |     return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list) | ||||||
|   if(token_list[1].token == lex_c.TOKEN("(")): |   if(token_list[1].token == TOKEN("(")): | ||||||
|     child=find_child(token_list=token_list[2:-1]) |     child=find_child(token_list=token_list[2:-1]) | ||||||
|     return node_call("call",token_list=token_list,child=child) |     return node_call("call",token_list=token_list,child=child) | ||||||
|   elif(token_list[1].token in [ |   elif(token_list[1].token in [ | ||||||
|     lex_c.TOKEN("="),lex_c.TOKEN_ASSIG_ADD,lex_c.TOKEN_ASSIG_DIV,lex_c.TOKEN_ASSIG_LSH, |     TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH, | ||||||
|     lex_c.TOKEN_ASSIG_MUL,lex_c.TOKEN_ASSIG_RSH,lex_c.TOKEN_ASSIG_SUB]): |     TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]): | ||||||
|     name=token_list[1].name |     name=token_list[1].name | ||||||
|     child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]), |     child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),] | ||||||
|             dist_node_type(token_list=token_list[2:])] |     child_d=dist_node_type(token_list=token_list[2:]) | ||||||
|  |     if not child_d is None: | ||||||
|  |       child.append(child_d) | ||||||
|     return node_opt(name=name,token_list=token_list,child=child) |     return node_opt(name=name,token_list=token_list,child=child) | ||||||
|   else: |   else: | ||||||
|     # 没有赋值属性的操作 |     # 没有赋值属性的操作 | ||||||
| @@ -325,55 +781,62 @@ def dist_node_type_symbol(token_list:list[lex_token]): | |||||||
|  |  | ||||||
|  |  | ||||||
| # 判断一个语句的类型 | # 判断一个语句的类型 | ||||||
| def dist_node_type(token_list:list[lex_token]): | def dist_node_type(token_list:list): | ||||||
|   if(token_list[0].token==lex_c.TOKEN_EXTERN): |   # print(f"{token_list[0]}") | ||||||
|  |   if(token_list[0].token==TOKEN_EXTERN): | ||||||
|     token_list=token_list[1:] |     token_list=token_list[1:] | ||||||
|   if(token_list[-1].token==lex_c.TOKEN(";")): |   if(token_list[-1].token==TOKEN(";")): | ||||||
|     token_list=token_list[:-1] |     token_list=token_list[:-1] | ||||||
|   if(token_list[0].token==lex_c.TOKEN_STRUCT): |   if(len(token_list)==0): | ||||||
|  |     return None | ||||||
|  |   if(token_list[0].token==TOKEN_STRUCT): | ||||||
|     return dist_node_type_struct(token_list=token_list) |     return dist_node_type_struct(token_list=token_list) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_UNION): |   if(token_list[0].token==TOKEN_UNION): | ||||||
|     return dist_node_type_union(token_list=token_list) |     return dist_node_type_union(token_list=token_list) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_ENUM): |   if(token_list[0].token==TOKEN_ENUM): | ||||||
|     return dist_node_type_enum(token_list=token_list) |     return dist_node_type_enum(token_list=token_list) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_TYPEDEF): |   if(token_list[0].token==TOKEN_TYPEDEF): | ||||||
|     return dist_node_type_typedef(token_list=token_list) |     return dist_node_type_typedef(token_list=token_list) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_SWITCH): |   # if(token_list[0].token==TOKEN_SWITCH): | ||||||
|     child=find_child(token_list) |   #   child=find_child(token_list) | ||||||
|     return node_switch(name="",token_list=token_list,child=child) |   #   return node_switch(name="",token_list=token_list,child=child) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_CASE): |   # if(token_list[0].token==TOKEN_CASE): | ||||||
|     name=token_list[1].buff.decode("utf-8") |   #   name=token_list[1].buff.decode("utf-8") | ||||||
|     return node_case(name=name,token_list=token_list,child=[]) |   #   return node_case(name=name,token_list=token_list,child=[]) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_DEFAULT): |   # if(token_list[0].token==TOKEN_DEFAULT): | ||||||
|     return node_default(name="",token_list=token_list,child=[]) |   #   return node_default(name="",token_list=token_list,child=[]) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_BREAK): |   # if(token_list[0].token==TOKEN_BREAK): | ||||||
|     return node_break(name="",token_list=token_list,child=[]) |   #   return node_break(name="",token_list=token_list,child=[]) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_RETURN): |   # if(token_list[0].token==TOKEN_RETURN): | ||||||
|     if(len(token_list)>1): |   #   if(len(token_list)>1): | ||||||
|       child=[dist_node_type(token_list[1:])] |   #     child=[dist_node_type(token_list[1:])] | ||||||
|     else: |   #   else: | ||||||
|       child=[] |   #     child=[] | ||||||
|     return node_return(name="",token_list=token_list,child=child) |   #   return node_return(name="",token_list=token_list,child=child) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_STRING): |   if(token_list[0].token==TOKEN_STRING): | ||||||
|     name=token_list[0].buff.decode("utf-8") |     name=token_list[0].buff.decode("utf-8") | ||||||
|     return node_string(name=name,token_list=token_list,child=[]) |     return node_string(name=name,token_list=token_list,child=[]) | ||||||
|   if(token_list[0].token==lex_c.TOKEN_NUM): |   if(token_list[0].token==TOKEN_NUM): | ||||||
|     name=token_list[0].buff.decode("utf-8") |     name=token_list[0].buff.decode("utf-8") | ||||||
|     return node_int(name=name,token_list=token_list,child=[]) |     return node_int(name=name,token_list=token_list,child=[]) | ||||||
|  |  | ||||||
|   if(token_list[-1].token==lex_c.TOKEN(")")): |   if(token_list[-1].token==TOKEN(")")): | ||||||
|     # 函数声明 |     # 函数声明 | ||||||
|     return dist_node_type_funcdecl(token_list) |     return dist_node_type_funcdecl(token_list) | ||||||
|   elif(token_list[-1].token==lex_c.TOKEN("}")): |   elif(token_list[-1].token==TOKEN("}")): | ||||||
|  |     if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))): | ||||||
|       # 函数定义 |       # 函数定义 | ||||||
|       return dist_node_type_funcdef(token_list=token_list) |       return dist_node_type_funcdef(token_list=token_list) | ||||||
|   elif(token_list[0].token==lex_c.TOKEN_SYMBOL): |  | ||||||
|     # 变量赋值或函数调用 |  | ||||||
|     return dist_node_type_symbol(token_list=token_list) |  | ||||||
|     else: |     else: | ||||||
|  |       # 变量定义 | ||||||
|  |       pass | ||||||
|  |   elif(token_list[0].token==TOKEN_SYMBOL): | ||||||
|  |     # 变量赋值或函数调用 | ||||||
|  |     return dist_node_type_symbol(token_list=token_list) | ||||||
|  |  | ||||||
|   # 变量定义 |   # 变量定义 | ||||||
|   for i in range(len(token_list)): |   for i in range(len(token_list)): | ||||||
|       if(token_list[i].token==lex_c.TOKEN_SYMBOL): |     if(token_list[i].token==TOKEN_SYMBOL): | ||||||
|       name=token_list[i].buff.decode("utf-8") |       name=token_list[i].buff.decode("utf-8") | ||||||
|       return node_variable_def(name=[name],token_list=token_list,child=[]) |       return node_variable_def(name=[name],token_list=token_list,child=[]) | ||||||
|   raise Exception(f"变量定义格式错误 {token_list[0]}") |   raise Exception(f"变量定义格式错误 {token_list[0]}") | ||||||
| @@ -381,24 +844,118 @@ def dist_node_type(token_list:list[lex_token]): | |||||||
|  |  | ||||||
| def print_node(n:node,deep:int): | def print_node(n:node,deep:int): | ||||||
|   s="|"*deep |   s="|"*deep | ||||||
|   print(f"{s} {n.type} {n.name}") |   print(f"{s} {n.type} {n.name} {n.token_list[0]}") | ||||||
|   # n.complite() |   # n.complite() | ||||||
|   if (not n.child is None) and len(n.child)>0: |   if (not n.child is None) and len(n.child)>0: | ||||||
|     for item in n.child: |     for item in n.child: | ||||||
|       print_node(item,deep+1) |       print_node(item,deep+1) | ||||||
|  |  | ||||||
|  | def find_func_def_in_file(n:node,deep:int,func_name_list:list): | ||||||
|  |   ack=False | ||||||
|  |   if(n.type=='func_def') and (n.name[0] in func_name_list): | ||||||
|  |     print(f"{n.type} {n.name}") | ||||||
|  |     return True | ||||||
|  |   # n.complite() | ||||||
|  |   if (not n.child is None) and len(n.child)>0: | ||||||
|  |     for item in n.child: | ||||||
|  |       ack=find_func_def_in_file(item,deep+1,func_name_list) | ||||||
|  |       if(ack): | ||||||
|  |         return ack | ||||||
|  |   return False | ||||||
|  |  | ||||||
| if __name__ == "__main__": | def check_func_def(file_name:str,func_name_list:list): | ||||||
|   file_name="main.c" |  | ||||||
|   with open(file_name,mode='rb') as f: |   with open(file_name,mode='rb') as f: | ||||||
|     token_list=lex(f.read()) |     read_d=f.read() | ||||||
|  |     if(read_d[:3]==bytes([0xef,0xbb,0xbf])): | ||||||
|  |       read_d=read_d[3:] | ||||||
|  |     token_list=lex(read_d,file_name) | ||||||
|   file=node_file(name=file_name,token_list=token_list) |   file=node_file(name=file_name,token_list=token_list) | ||||||
|   while len(token_list)>0: |   while len(token_list)>0: | ||||||
|  |     node_d=None | ||||||
|  |     try: | ||||||
|       sentence=find_sentence(token_list) |       sentence=find_sentence(token_list) | ||||||
|       node_d=dist_node_type(sentence) |       node_d=dist_node_type(sentence) | ||||||
|  |     except Exception as e: | ||||||
|  |       print(f"in {file_name}") | ||||||
|  |       print(f"\t {e}") | ||||||
|  |       break | ||||||
|  |     if not node_d is None: | ||||||
|       file.child.append(node_d) |       file.child.append(node_d) | ||||||
|     # print('找到一个语句:') |  | ||||||
|     # for item in sentence: |  | ||||||
|     #   print(f"\t{item}") |  | ||||||
|     token_list=token_list[len(sentence):] |     token_list=token_list[len(sentence):] | ||||||
|   print_node(file,0) |   print_node(file,0) | ||||||
|  |   return find_func_def_in_file(file,0,func_name_list) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # 找到定义函数的文件 | ||||||
|  | def find_func_def(file_list:list,func_name_list:str): | ||||||
|  |   ret_list=[] | ||||||
|  |   err_list=[] | ||||||
|  |   for item in file_list: | ||||||
|  |     sys.stdout.write('.') | ||||||
|  |     sys.stdout.flush() | ||||||
|  |     # try: | ||||||
|  |     ack=check_func_def(item,func_name_list) | ||||||
|  |     if(ack): | ||||||
|  |       ret_list.append(item) | ||||||
|  |     # except Exception as e: | ||||||
|  |     #   print(e) | ||||||
|  |     #   err_list.append(item) | ||||||
|  |   return ret_list,err_list | ||||||
|  |  | ||||||
|  | # 找到指定后缀的文件 | ||||||
|  | def find_type(path:str,fix:str): | ||||||
|  |     dlist=os.listdir(path) | ||||||
|  |     file_list=[] | ||||||
|  |     for i in dlist: | ||||||
|  |         ps=os.path.join(path, i) | ||||||
|  |         if os.path.isdir(ps): | ||||||
|  |             file_list+=find_type(ps,fix) | ||||||
|  |             pass | ||||||
|  |         else: | ||||||
|  |             if(ps[-len(fix):]==fix): | ||||||
|  |                 file_list.append(ps) | ||||||
|  |     return file_list | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # with open("build/build_log.log",mode="r",encoding="utf-8") as f: | ||||||
|  | #   _out_text=f.readlines() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def get_func_list(): | ||||||
|  |   func_list=[] | ||||||
|  |   _out_text=sys.stdin.readlines() | ||||||
|  |   for item in _out_text: | ||||||
|  |     key_str='undefined reference to `' | ||||||
|  |     index=item.find(key_str) | ||||||
|  |     if(index<0): | ||||||
|  |       continue | ||||||
|  |     index+=len(key_str) | ||||||
|  |     index_end=item[index:].find('\'') | ||||||
|  |     func=item[index:index+index_end] | ||||||
|  |     if not (func in func_list): | ||||||
|  |       func_list.append(func) | ||||||
|  |   return func_list | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # 参数是扫描的目录列表 | ||||||
|  | if __name__=="__main__": | ||||||
|  |   file_list=[] | ||||||
|  |   for item in sys.argv[1:]: | ||||||
|  |     file_list+=find_type(item,'.c') | ||||||
|  |   # file_list=["./dtest/dtest3/kl3_core_mark/core_main.c"] | ||||||
|  |   print(f"there is {len(file_list)} .c file.") | ||||||
|  |   # func_list=get_func_list() | ||||||
|  |   func_list=['main'] | ||||||
|  |   print(func_list) | ||||||
|  |   # find_func_def(['driver/src/hw3/efuse.c'],['efuse_get_d_bg_vbg_cntl']) | ||||||
|  |   ret_list,err_list=find_func_def(file_list,func_list) | ||||||
|  |   print("已找到的文件") | ||||||
|  |   for item in ret_list: | ||||||
|  |     print(item) | ||||||
|  |   print("分析失败的文件") | ||||||
|  |   for item in err_list: | ||||||
|  |     print(item) | ||||||
		Reference in New Issue
	
	Block a user
	 ranchuan
					ranchuan