解决一些符号不识别的问题

This commit is contained in:
2025-01-14 14:32:23 +08:00
parent 6e6c2909a7
commit ddb81ff6b0

View File

@@ -2,7 +2,7 @@ import os
import sys
import shutil
import dataclasses
import copy
@@ -37,18 +37,21 @@ TOKEN_UNION = 283,
TOKEN_STRING = 284,
TOKEN_DEFAULT = 285,
TOKEN_RETURN = 286,
TOKEN_ASSIG_ADD = 287
TOKEN_ASSIG_SUB = 288
TOKEN_ASSIG_MUL = 289
TOKEN_ASSIG_DIV = 290
TOKEN_ASSIG_LSH = 291
TOKEN_ASSIG_RSH = 292
TOKEN_EXTERN = 293
TOKEN_FLOAT = 294
TOKEN_DOUBLE = 295
TOKEN_SHORT = 296
TOKEN_LONG = 297
TOKEN_POINTER = 298
TOKEN_ASSIG_ADD = 287,
TOKEN_ASSIG_SUB = 288,
TOKEN_ASSIG_MUL = 289,
TOKEN_ASSIG_DIV = 290,
TOKEN_ASSIG_LSH = 291,
TOKEN_ASSIG_RSH = 292,
TOKEN_EXTERN = 293,
TOKEN_FLOAT = 294,
TOKEN_DOUBLE = 295,
TOKEN_SHORT = 296,
TOKEN_LONG = 297,
TOKEN_POINTER = 298,
TOKEN_LOGICAL_OR = 299,# 逻辑或
TOKEN_LOGICAL_AND = 300,# 逻辑与
TOKEN_OMIT = 301,# 省略符 ...
def TOKEN(t:str):
@@ -153,15 +156,41 @@ class lex_token:
line:int
pos:int
# 连写的操作符,这些实际上是多个操作符写在一起的结果
_NotMarkTable={
"!!":[lex_token("operator",'!',TOKEN('!'),0,0),
lex_token("operator",'!',TOKEN('!'),0,0)],
"=-":[lex_token("operator",'=',TOKEN('='),0,0),
lex_token("operator",'-',TOKEN('-'),0,0)],
"--=":[lex_token("operator",'--',TOKEN_DEC,0,0),
lex_token("operator",'=',TOKEN('='),0,0)],
"++=":[lex_token("operator",'++',TOKEN_INC,0,0),
lex_token("operator",'=',TOKEN('='),0,0)],
"=--":[lex_token("operator",'=',TOKEN('='),0,0),
lex_token("operator",'--',TOKEN_DEC,0,0)],
"=++":[lex_token("operator",'=',TOKEN('='),0,0),
lex_token("operator",'++',TOKEN_INC,0,0)],
"!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
lex_token("operator",'--',TOKEN_DEC,0,0)],
"!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
lex_token("operator",'++',TOKEN_INC,0,0)],
"==--":[lex_token("operator",'==',TOKEN_EQ,0,0),
lex_token("operator",'--',TOKEN_DEC,0,0)],
"==++":[lex_token("operator",'==',TOKEN_EQ,0,0),
lex_token("operator",'++',TOKEN_INC,0,0)],
}
class lex_class(object):
def __init__(self,text:bytes) -> None:
def __init__(self,text:bytes,file_name:str="") -> None:
self.text=text
self.index=-1
self.line=1
self.pos=-1
self.token_list:list[lex_token]=[]
self.token_buff=bytearray()
self.file_name=file_name
def save_char(self,c:int):
self.token_buff.append(c&0xff)
def save_token(self,token:lex_token):
@@ -218,9 +247,17 @@ class lex_class(object):
token.name=name
token.buff=self.token_buff
self.save_token(token)
elif(name in _NotMarkTable):
tokens=_NotMarkTable[name]
for t in tokens:
token.token=t.token
token.name=t.name
token.buff=token.name.encode("utf-8")
self.save_token(copy.deepcopy(token))
token.pos+=len(token.name)
else:
# raise Exception(f"不存在的操作符 {name} ")
print(f"不存在的操作符 {name} ")
raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}")
# print(f"不存在的操作符 {name} ")
return c
def read_num_and_save(self,c:int):
token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
@@ -232,7 +269,7 @@ class lex_class(object):
else:
break
if(self.token_buff.count(b'.')>1):
raise Exception("数字不能包含多个点号")
raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}")
token.buff=self.token_buff
self.save_token(token)
return c
@@ -242,8 +279,6 @@ class lex_class(object):
while c!=b'\"'[0]:
if(c==TOKEN('\\')):# \
c=self.get_next_char()
s=str(bytearray([c]),encoding='utf-8')
# if(c in self._escape_table.keys()):
self.save_char(self._escape_table.get(c,0))
else:
self.save_char(c)
@@ -255,8 +290,6 @@ class lex_class(object):
while c!=b'\''[0]:
if(c==TOKEN('\\')):# \
c=self.get_next_char()
s=str(bytearray([c]),encoding='utf-8')
# if(c in self._escape_table.keys()):
self.save_char(self._escape_table.get(c,0))
else:
self.save_char(c)
@@ -264,8 +297,8 @@ class lex_class(object):
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
return self.get_next_char()
def lex(text:bytes):
lex_obj = lex_class(text)
def lex(text:bytes,file_name:str=""):
lex_obj = lex_class(text,file_name)
c=lex_obj.get_next_char()
line_old=0
pos_old=0
@@ -274,7 +307,7 @@ def lex(text:bytes):
pos_old=lex_obj.pos
if isalpha_(c):
c=lex_obj.read_name_and_save(c)
elif isinstr(c,"{}[]()~,;:*"):
elif isinstr(c,"{}[]()~,;:*?%^"):
lex_obj.save_one_char_token(c)
c=lex_obj.get_next_char()
elif isdigit(c):
@@ -290,7 +323,7 @@ def lex(text:bytes):
elif isinstr(c,"\\"):
c=lex_obj.get_next_char()
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
elif isinstr(c,"#"): # 宏定义
c_old=c
while (c!=TOKEN("\n") and c!=-1):
@@ -319,9 +352,31 @@ def lex(text:bytes):
c=lex_obj.get_next_char()
else:
lex_obj.save_one_char_token(TOKEN("/"))
else:
# raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
elif isinstr(c,"|"):
c=lex_obj.get_next_char()
if(c==TOKEN("|")):
lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos))
else:
lex_obj.save_one_char_token(TOKEN("|"))
elif isinstr(c,"&"):
c=lex_obj.get_next_char()
if(c==TOKEN("&")):
lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos))
else:
lex_obj.save_one_char_token(TOKEN("&"))
elif isinstr(c,'.'):
c=lex_obj.get_next_char()
if(c==TOKEN('.')):
c=lex_obj.get_next_char()
if(c==TOKEN('.')):
lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos))
else:
raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
else:
lex_obj.save_one_char_token(TOKEN("."))
else:
raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
# c=lex_obj.get_next_char()
# if(line_old==lex_obj.line and pos_old==lex_obj.pos):
# print(f"pointer not move.")
# print(line_old,pos_old)
@@ -457,7 +512,7 @@ def find_close(token_list:list,token:tuple):
num-=1
if(num==0):
return index
raise Exception(f"没有找到闭合的符号 {token_list[0]} {token[1]}")
raise Exception(f"没有找到闭合的符号 {token_list[0]}")
# 找到指定token的index
def find_token(token_list:list,token:int):
@@ -615,6 +670,7 @@ def dist_node_type_typedef(token_list:list):
# 找到子节点
def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
child=[]
token_list_local=[]
for i in range(len(token_list)):
if(token_list[i].token==TOKEN("{")):
token_list_local=token_list[i+1:-1]
@@ -622,6 +678,7 @@ def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
while len(token_list_local)>0:
sentence=find_sentence(token_list_local,seq)
node_d=dist_node_type(sentence)
if not node_d is None:
child.append(node_d)
token_list_local=token_list_local[len(sentence):]
return child
@@ -655,8 +712,10 @@ def dist_node_type_symbol(token_list:list):
TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
name=token_list[1].name
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),
dist_node_type(token_list=token_list[2:])]
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),]
child_d=dist_node_type(token_list=token_list[2:])
if not child_d is None:
child.append(child_d)
return node_opt(name=name,token_list=token_list,child=child)
else:
# 没有赋值属性的操作
@@ -678,10 +737,13 @@ def dist_node_type_symbol(token_list:list):
# 判断一个语句的类型
def dist_node_type(token_list:list):
# print(f"{token_list[0]}")
if(token_list[0].token==TOKEN_EXTERN):
token_list=token_list[1:]
if(token_list[-1].token==TOKEN(";")):
token_list=token_list[:-1]
if(len(token_list)==0):
return None
if(token_list[0].token==TOKEN_STRUCT):
return dist_node_type_struct(token_list=token_list)
if(token_list[0].token==TOKEN_UNION):
@@ -758,17 +820,22 @@ def find_func_def_in_file(n:node,deep:int,func_name_list:list):
def check_func_def(file_name:str,func_name_list:list):
with open(file_name,mode='rb') as f:
# print("start read")
token_list=lex(f.read())
# print("end read")
read_d=f.read()
if(read_d[:3]==bytes([0xef,0xbb,0xbf])):
read_d=read_d[3:]
token_list=lex(read_d,file_name)
file=node_file(name=file_name,token_list=token_list)
while len(token_list)>0:
node_d=None
try:
sentence=find_sentence(token_list)
node_d=dist_node_type(sentence)
except Exception as e:
print(f"in {file_name}")
print(f"\t {e}")
break
if not node_d is None:
file.child.append(node_d)
# print('找到一个语句:')
# for item in sentence:
# print(f"\t{item}")
token_list=token_list[len(sentence):]
# print_node(file,0)
return find_func_def_in_file(file,0,func_name_list)
@@ -779,7 +846,8 @@ def find_func_def(file_list:list,func_name_list:str):
ret_list=[]
err_list=[]
for item in file_list:
print(f"check {item}")
sys.stdout.write('.')
sys.stdout.flush()
try:
ack=check_func_def(item,func_name_list)
if(ack):
@@ -821,6 +889,7 @@ if __name__=="__main__":
file_list=[]
for item in sys.argv[1:]:
file_list+=find_type(item,'.c')
# file_list=["./app/iot_plc_uart/iot_plc_demo.c"]
print(f"there is {len(file_list)} .c file.")
func_list=[]
for item in _out_text: