diff --git a/find_func_def.py b/find_func_def.py index d9c181f..e06af4f 100644 --- a/find_func_def.py +++ b/find_func_def.py @@ -2,7 +2,7 @@ import os import sys import shutil import dataclasses - +import copy @@ -37,18 +37,21 @@ TOKEN_UNION = 283, TOKEN_STRING = 284, TOKEN_DEFAULT = 285, TOKEN_RETURN = 286, -TOKEN_ASSIG_ADD = 287 -TOKEN_ASSIG_SUB = 288 -TOKEN_ASSIG_MUL = 289 -TOKEN_ASSIG_DIV = 290 -TOKEN_ASSIG_LSH = 291 -TOKEN_ASSIG_RSH = 292 -TOKEN_EXTERN = 293 -TOKEN_FLOAT = 294 -TOKEN_DOUBLE = 295 -TOKEN_SHORT = 296 -TOKEN_LONG = 297 -TOKEN_POINTER = 298 +TOKEN_ASSIG_ADD = 287, +TOKEN_ASSIG_SUB = 288, +TOKEN_ASSIG_MUL = 289, +TOKEN_ASSIG_DIV = 290, +TOKEN_ASSIG_LSH = 291, +TOKEN_ASSIG_RSH = 292, +TOKEN_EXTERN = 293, +TOKEN_FLOAT = 294, +TOKEN_DOUBLE = 295, +TOKEN_SHORT = 296, +TOKEN_LONG = 297, +TOKEN_POINTER = 298, +TOKEN_LOGICAL_OR = 299,# 逻辑或 +TOKEN_LOGICAL_AND = 300,# 逻辑与 +TOKEN_OMIT = 301,# 省略符 ... def TOKEN(t:str): @@ -153,15 +156,41 @@ class lex_token: line:int pos:int +# 连写的操作符,这些实际上是多个操作符写在一起的结果 +_NotMarkTable={ + "!!":[lex_token("operator",'!',TOKEN('!'),0,0), + lex_token("operator",'!',TOKEN('!'),0,0)], + "=-":[lex_token("operator",'=',TOKEN('='),0,0), + lex_token("operator",'-',TOKEN('-'),0,0)], + "--=":[lex_token("operator",'--',TOKEN_DEC,0,0), + lex_token("operator",'=',TOKEN('='),0,0)], + "++=":[lex_token("operator",'++',TOKEN_INC,0,0), + lex_token("operator",'=',TOKEN('='),0,0)], + "=--":[lex_token("operator",'=',TOKEN('='),0,0), + lex_token("operator",'--',TOKEN_DEC,0,0)], + "=++":[lex_token("operator",'=',TOKEN('='),0,0), + lex_token("operator",'++',TOKEN_INC,0,0)], + "!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0), + lex_token("operator",'--',TOKEN_DEC,0,0)], + "!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0), + lex_token("operator",'++',TOKEN_INC,0,0)], + "==--":[lex_token("operator",'==',TOKEN_EQ,0,0), + lex_token("operator",'--',TOKEN_DEC,0,0)], + "==++":[lex_token("operator",'==',TOKEN_EQ,0,0), + lex_token("operator",'++',TOKEN_INC,0,0)], +} + + class lex_class(object): - def __init__(self,text:bytes) -> None: + def __init__(self,text:bytes,file_name:str="") -> None: self.text=text self.index=-1 self.line=1 self.pos=-1 self.token_list:list[lex_token]=[] self.token_buff=bytearray() + self.file_name=file_name def save_char(self,c:int): self.token_buff.append(c&0xff) def save_token(self,token:lex_token): @@ -218,9 +247,17 @@ class lex_class(object): token.name=name token.buff=self.token_buff self.save_token(token) + elif(name in _NotMarkTable): + tokens=_NotMarkTable[name] + for t in tokens: + token.token=t.token + token.name=t.name + token.buff=token.name.encode("utf-8") + self.save_token(copy.deepcopy(token)) + token.pos+=len(token.name) else: - # raise Exception(f"不存在的操作符 {name} ") - print(f"不存在的操作符 {name} ") + raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}") + # print(f"不存在的操作符 {name} ") return c def read_num_and_save(self,c:int): token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos) @@ -232,7 +269,7 @@ class lex_class(object): else: break if(self.token_buff.count(b'.')>1): - raise Exception("数字不能包含多个点号") + raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}") token.buff=self.token_buff self.save_token(token) return c @@ -242,8 +279,6 @@ class lex_class(object): while c!=b'\"'[0]: if(c==TOKEN('\\')):# \ c=self.get_next_char() - s=str(bytearray([c]),encoding='utf-8') - # if(c in self._escape_table.keys()): self.save_char(self._escape_table.get(c,0)) else: self.save_char(c) @@ -255,8 +290,6 @@ class lex_class(object): while c!=b'\''[0]: if(c==TOKEN('\\')):# \ c=self.get_next_char() - s=str(bytearray([c]),encoding='utf-8') - # if(c in self._escape_table.keys()): self.save_char(self._escape_table.get(c,0)) else: self.save_char(c) @@ -264,8 +297,8 @@ class lex_class(object): self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos)) return self.get_next_char() -def lex(text:bytes): - lex_obj = lex_class(text) +def lex(text:bytes,file_name:str=""): + lex_obj = lex_class(text,file_name) c=lex_obj.get_next_char() line_old=0 pos_old=0 @@ -274,7 +307,7 @@ def lex(text:bytes): pos_old=lex_obj.pos if isalpha_(c): c=lex_obj.read_name_and_save(c) - elif isinstr(c,"{}[]()~,;:*"): + elif isinstr(c,"{}[]()~,;:*?%^"): lex_obj.save_one_char_token(c) c=lex_obj.get_next_char() elif isdigit(c): @@ -290,7 +323,7 @@ def lex(text:bytes): elif isinstr(c,"\\"): c=lex_obj.get_next_char() if(c!=TOKEN("\r") and c!=TOKEN("\n")): - raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}") + raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") elif isinstr(c,"#"): # 宏定义 c_old=c while (c!=TOKEN("\n") and c!=-1): @@ -319,9 +352,31 @@ def lex(text:bytes): c=lex_obj.get_next_char() else: lex_obj.save_one_char_token(TOKEN("/")) - else: - # raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}") + elif isinstr(c,"|"): c=lex_obj.get_next_char() + if(c==TOKEN("|")): + lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos)) + else: + lex_obj.save_one_char_token(TOKEN("|")) + elif isinstr(c,"&"): + c=lex_obj.get_next_char() + if(c==TOKEN("&")): + lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos)) + else: + lex_obj.save_one_char_token(TOKEN("&")) + elif isinstr(c,'.'): + c=lex_obj.get_next_char() + if(c==TOKEN('.')): + c=lex_obj.get_next_char() + if(c==TOKEN('.')): + lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos)) + else: + raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") + else: + lex_obj.save_one_char_token(TOKEN(".")) + else: + raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}") + # c=lex_obj.get_next_char() # if(line_old==lex_obj.line and pos_old==lex_obj.pos): # print(f"pointer not move.") # print(line_old,pos_old) @@ -457,7 +512,7 @@ def find_close(token_list:list,token:tuple): num-=1 if(num==0): return index - raise Exception(f"没有找到闭合的符号 {token_list[0]} {token[1]}") + raise Exception(f"没有找到闭合的符号 {token_list[0]}") # 找到指定token的index def find_token(token_list:list,token:int): @@ -615,6 +670,7 @@ def dist_node_type_typedef(token_list:list): # 找到子节点 def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]): child=[] + token_list_local=[] for i in range(len(token_list)): if(token_list[i].token==TOKEN("{")): token_list_local=token_list[i+1:-1] @@ -622,7 +678,8 @@ def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]): while len(token_list_local)>0: sentence=find_sentence(token_list_local,seq) node_d=dist_node_type(sentence) - child.append(node_d) + if not node_d is None: + child.append(node_d) token_list_local=token_list_local[len(sentence):] return child @@ -655,8 +712,10 @@ def dist_node_type_symbol(token_list:list): TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH, TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]): name=token_list[1].name - child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]), - dist_node_type(token_list=token_list[2:])] + child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),] + child_d=dist_node_type(token_list=token_list[2:]) + if not child_d is None: + child.append(child_d) return node_opt(name=name,token_list=token_list,child=child) else: # 没有赋值属性的操作 @@ -678,10 +737,13 @@ def dist_node_type_symbol(token_list:list): # 判断一个语句的类型 def dist_node_type(token_list:list): + # print(f"{token_list[0]}") if(token_list[0].token==TOKEN_EXTERN): token_list=token_list[1:] if(token_list[-1].token==TOKEN(";")): token_list=token_list[:-1] + if(len(token_list)==0): + return None if(token_list[0].token==TOKEN_STRUCT): return dist_node_type_struct(token_list=token_list) if(token_list[0].token==TOKEN_UNION): @@ -758,17 +820,22 @@ def find_func_def_in_file(n:node,deep:int,func_name_list:list): def check_func_def(file_name:str,func_name_list:list): with open(file_name,mode='rb') as f: - # print("start read") - token_list=lex(f.read()) - # print("end read") + read_d=f.read() + if(read_d[:3]==bytes([0xef,0xbb,0xbf])): + read_d=read_d[3:] + token_list=lex(read_d,file_name) file=node_file(name=file_name,token_list=token_list) while len(token_list)>0: - sentence=find_sentence(token_list) - node_d=dist_node_type(sentence) - file.child.append(node_d) - # print('找到一个语句:') - # for item in sentence: - # print(f"\t{item}") + node_d=None + try: + sentence=find_sentence(token_list) + node_d=dist_node_type(sentence) + except Exception as e: + print(f"in {file_name}") + print(f"\t {e}") + break + if not node_d is None: + file.child.append(node_d) token_list=token_list[len(sentence):] # print_node(file,0) return find_func_def_in_file(file,0,func_name_list) @@ -779,7 +846,8 @@ def find_func_def(file_list:list,func_name_list:str): ret_list=[] err_list=[] for item in file_list: - print(f"check {item}") + sys.stdout.write('.') + sys.stdout.flush() try: ack=check_func_def(item,func_name_list) if(ack): @@ -821,6 +889,7 @@ if __name__=="__main__": file_list=[] for item in sys.argv[1:]: file_list+=find_type(item,'.c') + # file_list=["./app/iot_plc_uart/iot_plc_demo.c"] print(f"there is {len(file_list)} .c file.") func_list=[] for item in _out_text: