解决一些符号不识别的问题
This commit is contained in:
151
find_func_def.py
151
find_func_def.py
@@ -2,7 +2,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -37,18 +37,21 @@ TOKEN_UNION = 283,
|
|||||||
TOKEN_STRING = 284,
|
TOKEN_STRING = 284,
|
||||||
TOKEN_DEFAULT = 285,
|
TOKEN_DEFAULT = 285,
|
||||||
TOKEN_RETURN = 286,
|
TOKEN_RETURN = 286,
|
||||||
TOKEN_ASSIG_ADD = 287
|
TOKEN_ASSIG_ADD = 287,
|
||||||
TOKEN_ASSIG_SUB = 288
|
TOKEN_ASSIG_SUB = 288,
|
||||||
TOKEN_ASSIG_MUL = 289
|
TOKEN_ASSIG_MUL = 289,
|
||||||
TOKEN_ASSIG_DIV = 290
|
TOKEN_ASSIG_DIV = 290,
|
||||||
TOKEN_ASSIG_LSH = 291
|
TOKEN_ASSIG_LSH = 291,
|
||||||
TOKEN_ASSIG_RSH = 292
|
TOKEN_ASSIG_RSH = 292,
|
||||||
TOKEN_EXTERN = 293
|
TOKEN_EXTERN = 293,
|
||||||
TOKEN_FLOAT = 294
|
TOKEN_FLOAT = 294,
|
||||||
TOKEN_DOUBLE = 295
|
TOKEN_DOUBLE = 295,
|
||||||
TOKEN_SHORT = 296
|
TOKEN_SHORT = 296,
|
||||||
TOKEN_LONG = 297
|
TOKEN_LONG = 297,
|
||||||
TOKEN_POINTER = 298
|
TOKEN_POINTER = 298,
|
||||||
|
TOKEN_LOGICAL_OR = 299,# 逻辑或
|
||||||
|
TOKEN_LOGICAL_AND = 300,# 逻辑与
|
||||||
|
TOKEN_OMIT = 301,# 省略符 ...
|
||||||
|
|
||||||
|
|
||||||
def TOKEN(t:str):
|
def TOKEN(t:str):
|
||||||
@@ -153,15 +156,41 @@ class lex_token:
|
|||||||
line:int
|
line:int
|
||||||
pos:int
|
pos:int
|
||||||
|
|
||||||
|
# 连写的操作符,这些实际上是多个操作符写在一起的结果
|
||||||
|
_NotMarkTable={
|
||||||
|
"!!":[lex_token("operator",'!',TOKEN('!'),0,0),
|
||||||
|
lex_token("operator",'!',TOKEN('!'),0,0)],
|
||||||
|
"=-":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'-',TOKEN('-'),0,0)],
|
||||||
|
"--=":[lex_token("operator",'--',TOKEN_DEC,0,0),
|
||||||
|
lex_token("operator",'=',TOKEN('='),0,0)],
|
||||||
|
"++=":[lex_token("operator",'++',TOKEN_INC,0,0),
|
||||||
|
lex_token("operator",'=',TOKEN('='),0,0)],
|
||||||
|
"=--":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"=++":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
"!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
"==--":[lex_token("operator",'==',TOKEN_EQ,0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"==++":[lex_token("operator",'==',TOKEN_EQ,0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class lex_class(object):
|
class lex_class(object):
|
||||||
def __init__(self,text:bytes) -> None:
|
def __init__(self,text:bytes,file_name:str="") -> None:
|
||||||
self.text=text
|
self.text=text
|
||||||
self.index=-1
|
self.index=-1
|
||||||
self.line=1
|
self.line=1
|
||||||
self.pos=-1
|
self.pos=-1
|
||||||
self.token_list:list[lex_token]=[]
|
self.token_list:list[lex_token]=[]
|
||||||
self.token_buff=bytearray()
|
self.token_buff=bytearray()
|
||||||
|
self.file_name=file_name
|
||||||
def save_char(self,c:int):
|
def save_char(self,c:int):
|
||||||
self.token_buff.append(c&0xff)
|
self.token_buff.append(c&0xff)
|
||||||
def save_token(self,token:lex_token):
|
def save_token(self,token:lex_token):
|
||||||
@@ -218,9 +247,17 @@ class lex_class(object):
|
|||||||
token.name=name
|
token.name=name
|
||||||
token.buff=self.token_buff
|
token.buff=self.token_buff
|
||||||
self.save_token(token)
|
self.save_token(token)
|
||||||
|
elif(name in _NotMarkTable):
|
||||||
|
tokens=_NotMarkTable[name]
|
||||||
|
for t in tokens:
|
||||||
|
token.token=t.token
|
||||||
|
token.name=t.name
|
||||||
|
token.buff=token.name.encode("utf-8")
|
||||||
|
self.save_token(copy.deepcopy(token))
|
||||||
|
token.pos+=len(token.name)
|
||||||
else:
|
else:
|
||||||
# raise Exception(f"不存在的操作符 {name} ")
|
raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}")
|
||||||
print(f"不存在的操作符 {name} ")
|
# print(f"不存在的操作符 {name} ")
|
||||||
return c
|
return c
|
||||||
def read_num_and_save(self,c:int):
|
def read_num_and_save(self,c:int):
|
||||||
token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
|
token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
|
||||||
@@ -232,7 +269,7 @@ class lex_class(object):
|
|||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if(self.token_buff.count(b'.')>1):
|
if(self.token_buff.count(b'.')>1):
|
||||||
raise Exception("数字不能包含多个点号")
|
raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}")
|
||||||
token.buff=self.token_buff
|
token.buff=self.token_buff
|
||||||
self.save_token(token)
|
self.save_token(token)
|
||||||
return c
|
return c
|
||||||
@@ -242,8 +279,6 @@ class lex_class(object):
|
|||||||
while c!=b'\"'[0]:
|
while c!=b'\"'[0]:
|
||||||
if(c==TOKEN('\\')):# \
|
if(c==TOKEN('\\')):# \
|
||||||
c=self.get_next_char()
|
c=self.get_next_char()
|
||||||
s=str(bytearray([c]),encoding='utf-8')
|
|
||||||
# if(c in self._escape_table.keys()):
|
|
||||||
self.save_char(self._escape_table.get(c,0))
|
self.save_char(self._escape_table.get(c,0))
|
||||||
else:
|
else:
|
||||||
self.save_char(c)
|
self.save_char(c)
|
||||||
@@ -255,8 +290,6 @@ class lex_class(object):
|
|||||||
while c!=b'\''[0]:
|
while c!=b'\''[0]:
|
||||||
if(c==TOKEN('\\')):# \
|
if(c==TOKEN('\\')):# \
|
||||||
c=self.get_next_char()
|
c=self.get_next_char()
|
||||||
s=str(bytearray([c]),encoding='utf-8')
|
|
||||||
# if(c in self._escape_table.keys()):
|
|
||||||
self.save_char(self._escape_table.get(c,0))
|
self.save_char(self._escape_table.get(c,0))
|
||||||
else:
|
else:
|
||||||
self.save_char(c)
|
self.save_char(c)
|
||||||
@@ -264,8 +297,8 @@ class lex_class(object):
|
|||||||
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
|
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
|
||||||
return self.get_next_char()
|
return self.get_next_char()
|
||||||
|
|
||||||
def lex(text:bytes):
|
def lex(text:bytes,file_name:str=""):
|
||||||
lex_obj = lex_class(text)
|
lex_obj = lex_class(text,file_name)
|
||||||
c=lex_obj.get_next_char()
|
c=lex_obj.get_next_char()
|
||||||
line_old=0
|
line_old=0
|
||||||
pos_old=0
|
pos_old=0
|
||||||
@@ -274,7 +307,7 @@ def lex(text:bytes):
|
|||||||
pos_old=lex_obj.pos
|
pos_old=lex_obj.pos
|
||||||
if isalpha_(c):
|
if isalpha_(c):
|
||||||
c=lex_obj.read_name_and_save(c)
|
c=lex_obj.read_name_and_save(c)
|
||||||
elif isinstr(c,"{}[]()~,;:*"):
|
elif isinstr(c,"{}[]()~,;:*?%^"):
|
||||||
lex_obj.save_one_char_token(c)
|
lex_obj.save_one_char_token(c)
|
||||||
c=lex_obj.get_next_char()
|
c=lex_obj.get_next_char()
|
||||||
elif isdigit(c):
|
elif isdigit(c):
|
||||||
@@ -290,7 +323,7 @@ def lex(text:bytes):
|
|||||||
elif isinstr(c,"\\"):
|
elif isinstr(c,"\\"):
|
||||||
c=lex_obj.get_next_char()
|
c=lex_obj.get_next_char()
|
||||||
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
|
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
|
||||||
raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
|
raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
elif isinstr(c,"#"): # 宏定义
|
elif isinstr(c,"#"): # 宏定义
|
||||||
c_old=c
|
c_old=c
|
||||||
while (c!=TOKEN("\n") and c!=-1):
|
while (c!=TOKEN("\n") and c!=-1):
|
||||||
@@ -319,9 +352,31 @@ def lex(text:bytes):
|
|||||||
c=lex_obj.get_next_char()
|
c=lex_obj.get_next_char()
|
||||||
else:
|
else:
|
||||||
lex_obj.save_one_char_token(TOKEN("/"))
|
lex_obj.save_one_char_token(TOKEN("/"))
|
||||||
else:
|
elif isinstr(c,"|"):
|
||||||
# raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
|
|
||||||
c=lex_obj.get_next_char()
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN("|")):
|
||||||
|
lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("|"))
|
||||||
|
elif isinstr(c,"&"):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN("&")):
|
||||||
|
lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("&"))
|
||||||
|
elif isinstr(c,'.'):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN('.')):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN('.')):
|
||||||
|
lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("."))
|
||||||
|
else:
|
||||||
|
raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
|
# c=lex_obj.get_next_char()
|
||||||
# if(line_old==lex_obj.line and pos_old==lex_obj.pos):
|
# if(line_old==lex_obj.line and pos_old==lex_obj.pos):
|
||||||
# print(f"pointer not move.")
|
# print(f"pointer not move.")
|
||||||
# print(line_old,pos_old)
|
# print(line_old,pos_old)
|
||||||
@@ -457,7 +512,7 @@ def find_close(token_list:list,token:tuple):
|
|||||||
num-=1
|
num-=1
|
||||||
if(num==0):
|
if(num==0):
|
||||||
return index
|
return index
|
||||||
raise Exception(f"没有找到闭合的符号 {token_list[0]} {token[1]}")
|
raise Exception(f"没有找到闭合的符号 {token_list[0]}")
|
||||||
|
|
||||||
# 找到指定token的index
|
# 找到指定token的index
|
||||||
def find_token(token_list:list,token:int):
|
def find_token(token_list:list,token:int):
|
||||||
@@ -615,6 +670,7 @@ def dist_node_type_typedef(token_list:list):
|
|||||||
# 找到子节点
|
# 找到子节点
|
||||||
def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
|
def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
|
||||||
child=[]
|
child=[]
|
||||||
|
token_list_local=[]
|
||||||
for i in range(len(token_list)):
|
for i in range(len(token_list)):
|
||||||
if(token_list[i].token==TOKEN("{")):
|
if(token_list[i].token==TOKEN("{")):
|
||||||
token_list_local=token_list[i+1:-1]
|
token_list_local=token_list[i+1:-1]
|
||||||
@@ -622,7 +678,8 @@ def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
|
|||||||
while len(token_list_local)>0:
|
while len(token_list_local)>0:
|
||||||
sentence=find_sentence(token_list_local,seq)
|
sentence=find_sentence(token_list_local,seq)
|
||||||
node_d=dist_node_type(sentence)
|
node_d=dist_node_type(sentence)
|
||||||
child.append(node_d)
|
if not node_d is None:
|
||||||
|
child.append(node_d)
|
||||||
token_list_local=token_list_local[len(sentence):]
|
token_list_local=token_list_local[len(sentence):]
|
||||||
return child
|
return child
|
||||||
|
|
||||||
@@ -655,8 +712,10 @@ def dist_node_type_symbol(token_list:list):
|
|||||||
TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
|
TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
|
||||||
TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
|
TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
|
||||||
name=token_list[1].name
|
name=token_list[1].name
|
||||||
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),
|
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),]
|
||||||
dist_node_type(token_list=token_list[2:])]
|
child_d=dist_node_type(token_list=token_list[2:])
|
||||||
|
if not child_d is None:
|
||||||
|
child.append(child_d)
|
||||||
return node_opt(name=name,token_list=token_list,child=child)
|
return node_opt(name=name,token_list=token_list,child=child)
|
||||||
else:
|
else:
|
||||||
# 没有赋值属性的操作
|
# 没有赋值属性的操作
|
||||||
@@ -678,10 +737,13 @@ def dist_node_type_symbol(token_list:list):
|
|||||||
|
|
||||||
# 判断一个语句的类型
|
# 判断一个语句的类型
|
||||||
def dist_node_type(token_list:list):
|
def dist_node_type(token_list:list):
|
||||||
|
# print(f"{token_list[0]}")
|
||||||
if(token_list[0].token==TOKEN_EXTERN):
|
if(token_list[0].token==TOKEN_EXTERN):
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
if(token_list[-1].token==TOKEN(";")):
|
if(token_list[-1].token==TOKEN(";")):
|
||||||
token_list=token_list[:-1]
|
token_list=token_list[:-1]
|
||||||
|
if(len(token_list)==0):
|
||||||
|
return None
|
||||||
if(token_list[0].token==TOKEN_STRUCT):
|
if(token_list[0].token==TOKEN_STRUCT):
|
||||||
return dist_node_type_struct(token_list=token_list)
|
return dist_node_type_struct(token_list=token_list)
|
||||||
if(token_list[0].token==TOKEN_UNION):
|
if(token_list[0].token==TOKEN_UNION):
|
||||||
@@ -758,17 +820,22 @@ def find_func_def_in_file(n:node,deep:int,func_name_list:list):
|
|||||||
|
|
||||||
def check_func_def(file_name:str,func_name_list:list):
|
def check_func_def(file_name:str,func_name_list:list):
|
||||||
with open(file_name,mode='rb') as f:
|
with open(file_name,mode='rb') as f:
|
||||||
# print("start read")
|
read_d=f.read()
|
||||||
token_list=lex(f.read())
|
if(read_d[:3]==bytes([0xef,0xbb,0xbf])):
|
||||||
# print("end read")
|
read_d=read_d[3:]
|
||||||
|
token_list=lex(read_d,file_name)
|
||||||
file=node_file(name=file_name,token_list=token_list)
|
file=node_file(name=file_name,token_list=token_list)
|
||||||
while len(token_list)>0:
|
while len(token_list)>0:
|
||||||
sentence=find_sentence(token_list)
|
node_d=None
|
||||||
node_d=dist_node_type(sentence)
|
try:
|
||||||
file.child.append(node_d)
|
sentence=find_sentence(token_list)
|
||||||
# print('找到一个语句:')
|
node_d=dist_node_type(sentence)
|
||||||
# for item in sentence:
|
except Exception as e:
|
||||||
# print(f"\t{item}")
|
print(f"in {file_name}")
|
||||||
|
print(f"\t {e}")
|
||||||
|
break
|
||||||
|
if not node_d is None:
|
||||||
|
file.child.append(node_d)
|
||||||
token_list=token_list[len(sentence):]
|
token_list=token_list[len(sentence):]
|
||||||
# print_node(file,0)
|
# print_node(file,0)
|
||||||
return find_func_def_in_file(file,0,func_name_list)
|
return find_func_def_in_file(file,0,func_name_list)
|
||||||
@@ -779,7 +846,8 @@ def find_func_def(file_list:list,func_name_list:str):
|
|||||||
ret_list=[]
|
ret_list=[]
|
||||||
err_list=[]
|
err_list=[]
|
||||||
for item in file_list:
|
for item in file_list:
|
||||||
print(f"check {item}")
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
try:
|
try:
|
||||||
ack=check_func_def(item,func_name_list)
|
ack=check_func_def(item,func_name_list)
|
||||||
if(ack):
|
if(ack):
|
||||||
@@ -821,6 +889,7 @@ if __name__=="__main__":
|
|||||||
file_list=[]
|
file_list=[]
|
||||||
for item in sys.argv[1:]:
|
for item in sys.argv[1:]:
|
||||||
file_list+=find_type(item,'.c')
|
file_list+=find_type(item,'.c')
|
||||||
|
# file_list=["./app/iot_plc_uart/iot_plc_demo.c"]
|
||||||
print(f"there is {len(file_list)} .c file.")
|
print(f"there is {len(file_list)} .c file.")
|
||||||
func_list=[]
|
func_list=[]
|
||||||
for item in _out_text:
|
for item in _out_text:
|
||||||
|
Reference in New Issue
Block a user