修复一些符号不识别的问题
This commit is contained in:
287
lex_c.py
287
lex_c.py
@@ -1,287 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
import dataclasses
|
|
||||||
|
|
||||||
|
|
||||||
TOKEN_IF = 256,
|
|
||||||
TOKEN_BREAK = 257,
|
|
||||||
TOKEN_WHILE=258,
|
|
||||||
TOKEN_SWITCH=259,
|
|
||||||
TOKEN_CASE=260,
|
|
||||||
TOKEN_DO=261,
|
|
||||||
TOKEN_CHAR=262,
|
|
||||||
TOKEN_INT=263,
|
|
||||||
TOKEN_VOID=264,
|
|
||||||
TOKEN_SYMBOL = 265 ,
|
|
||||||
TOKEN_NUM = 266 ,# 数字
|
|
||||||
TOKEN_INC = 267,# 自增
|
|
||||||
TOKEN_DEC = 268,# 自减
|
|
||||||
TOKEN_EQ = 269,# 相等
|
|
||||||
TOKEN_NEQ = 270,# 不相等
|
|
||||||
TOKEN_LSH = 271,# 左移
|
|
||||||
TOKEN_RSH = 272,# 右移
|
|
||||||
TOKEN_LEQ = 273,# 小于等于
|
|
||||||
TOKEN_GEQ = 274,# 大于等于
|
|
||||||
TOKEN_ELSE = 275,
|
|
||||||
TOKEN_CONTINUE = 276 ,
|
|
||||||
TOKEN_CONST = 277 ,
|
|
||||||
TOKEN_STATIC = 278 ,
|
|
||||||
TOKEN_UNSIGNED = 279 ,
|
|
||||||
TOKEN_TYPEDEF = 280 ,
|
|
||||||
TOKEN_STRUCT = 281 ,
|
|
||||||
TOKEN_ENUM = 282 ,
|
|
||||||
TOKEN_UNION = 283,
|
|
||||||
TOKEN_STRING = 284,
|
|
||||||
TOKEN_DEFAULT = 285,
|
|
||||||
TOKEN_RETURN = 286,
|
|
||||||
TOKEN_ASSIG_ADD = 287
|
|
||||||
TOKEN_ASSIG_SUB = 288
|
|
||||||
TOKEN_ASSIG_MUL = 289
|
|
||||||
TOKEN_ASSIG_DIV = 290
|
|
||||||
TOKEN_ASSIG_LSH = 291
|
|
||||||
TOKEN_ASSIG_RSH = 292
|
|
||||||
TOKEN_EXTERN = 293
|
|
||||||
TOKEN_FLOAT = 294
|
|
||||||
TOKEN_DOUBLE = 295
|
|
||||||
TOKEN_SHORT = 296
|
|
||||||
TOKEN_LONG = 297
|
|
||||||
|
|
||||||
|
|
||||||
def TOKEN(t:str):
|
|
||||||
return t.encode("utf-8")[0]
|
|
||||||
|
|
||||||
_KeyWordTable={
|
|
||||||
"if":TOKEN_IF,
|
|
||||||
"else":TOKEN_ELSE,
|
|
||||||
"break":TOKEN_BREAK,
|
|
||||||
"while":TOKEN_WHILE,
|
|
||||||
"switch":TOKEN_SWITCH,
|
|
||||||
"case":TOKEN_CASE,
|
|
||||||
"do":TOKEN_DO,
|
|
||||||
"char":TOKEN_CHAR,
|
|
||||||
"int":TOKEN_INT,
|
|
||||||
"void":TOKEN_VOID,
|
|
||||||
"continue":TOKEN_CONTINUE,
|
|
||||||
"const":TOKEN_CONST,
|
|
||||||
"static":TOKEN_STATIC,
|
|
||||||
"unisgned":TOKEN_UNSIGNED,
|
|
||||||
"typedef":TOKEN_TYPEDEF,
|
|
||||||
"struct":TOKEN_STRUCT,
|
|
||||||
"enum":TOKEN_ENUM,
|
|
||||||
"union":TOKEN_UNION,
|
|
||||||
"default":TOKEN_DEFAULT,
|
|
||||||
"return":TOKEN_RETURN,
|
|
||||||
"extern":TOKEN_EXTERN,
|
|
||||||
"float":TOKEN_FLOAT,
|
|
||||||
"double":TOKEN_DOUBLE,
|
|
||||||
"short":TOKEN_SHORT,
|
|
||||||
"long":TOKEN_LONG,
|
|
||||||
}
|
|
||||||
|
|
||||||
_MarkTable={
|
|
||||||
"<<":TOKEN_LSH,
|
|
||||||
">>":TOKEN_RSH,
|
|
||||||
"<=":TOKEN_LEQ,
|
|
||||||
">=":TOKEN_GEQ,
|
|
||||||
"!=":TOKEN_NEQ,
|
|
||||||
"==":TOKEN_EQ,
|
|
||||||
"++":TOKEN_INC,
|
|
||||||
"--":TOKEN_DEC,
|
|
||||||
"+=":TOKEN_ASSIG_ADD,
|
|
||||||
"-=":TOKEN_ASSIG_SUB,
|
|
||||||
"*=":TOKEN_ASSIG_MUL,
|
|
||||||
"<<=":TOKEN_ASSIG_LSH,
|
|
||||||
">>=":TOKEN_ASSIG_RSH,
|
|
||||||
"=":TOKEN("="),
|
|
||||||
"!":TOKEN("!"),
|
|
||||||
"<":TOKEN("<"),
|
|
||||||
">":TOKEN(">"),
|
|
||||||
"+":TOKEN("+"),
|
|
||||||
"-":TOKEN("-"),
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 是否是数字加字母
|
|
||||||
def isalnum(num:int):
|
|
||||||
return bytes([num]).isalnum()
|
|
||||||
|
|
||||||
# 是否是数字加字母或下划线
|
|
||||||
def isalnum_(num:int):
|
|
||||||
return bytes([num]).isalnum() or num==TOKEN("_")
|
|
||||||
|
|
||||||
# 是否是字母
|
|
||||||
def isalpha(num:int):
|
|
||||||
return bytes([num]).isalpha()
|
|
||||||
|
|
||||||
# 是否是字母或下划线
|
|
||||||
def isalpha_(num:int):
|
|
||||||
return bytes([num]).isalpha() or num==TOKEN("_")
|
|
||||||
|
|
||||||
# 是否是数字
|
|
||||||
def isdigit(num:int):
|
|
||||||
return bytes([num]).isdigit()
|
|
||||||
|
|
||||||
# 是否是数字或小数点
|
|
||||||
def isdigitdot(num:int):
|
|
||||||
return bytes([num]).isdigit() or num==TOKEN(".")
|
|
||||||
|
|
||||||
# 是否是空白字符 包括换行符
|
|
||||||
def isspace(num:int):
|
|
||||||
return bytes([num]).isspace()
|
|
||||||
|
|
||||||
# 是否是给定字符串之一
|
|
||||||
def isinstr(num:int,t:str):
|
|
||||||
c=bytes([num])
|
|
||||||
return c in t.encode("utf-8")
|
|
||||||
|
|
||||||
# 是否是操作符
|
|
||||||
def isoperator(num:int):
|
|
||||||
return isinstr(num,"<>!+-=")
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class lex_token:
|
|
||||||
name:str
|
|
||||||
buff:bytearray
|
|
||||||
token:int
|
|
||||||
line:int
|
|
||||||
pos:int
|
|
||||||
|
|
||||||
|
|
||||||
class lex_class(object):
|
|
||||||
def __init__(self,text:bytes) -> None:
|
|
||||||
self.text=text
|
|
||||||
self.index=-1
|
|
||||||
self.line=1
|
|
||||||
self.pos=-1
|
|
||||||
self.token_list:list[lex_token]=[]
|
|
||||||
self.token_buff=bytearray()
|
|
||||||
def save_char(self,c:int):
|
|
||||||
self.token_buff.append(c&0xff)
|
|
||||||
def save_token(self,token:lex_token):
|
|
||||||
self.token_list.append(token)
|
|
||||||
self.token_buff=bytearray()
|
|
||||||
def _get_char(self):
|
|
||||||
if(self.index<len(self.text)):
|
|
||||||
c= self.text[self.index]
|
|
||||||
return c
|
|
||||||
return -1
|
|
||||||
def get_next_char(self):
|
|
||||||
if not self.is_end():
|
|
||||||
self.index+=1
|
|
||||||
c= self._get_char()
|
|
||||||
if(c==b'\n'[0]):
|
|
||||||
self.line+=1
|
|
||||||
self.pos=-1
|
|
||||||
else:
|
|
||||||
self.pos+=1
|
|
||||||
return c
|
|
||||||
def is_end(self):
|
|
||||||
return self.index>=len(self.text)
|
|
||||||
def save_one_char_token(self,c:int):
|
|
||||||
token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos)
|
|
||||||
self.save_token(token)
|
|
||||||
def read_name_and_save(self,c:int):
|
|
||||||
token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
|
|
||||||
self.save_char(c)
|
|
||||||
while True:
|
|
||||||
c=self.get_next_char()
|
|
||||||
if(isalnum_(c)):
|
|
||||||
self.save_char(c)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
name=self.token_buff.decode("utf-8")
|
|
||||||
if(name in _KeyWordTable):
|
|
||||||
token.token=_KeyWordTable[name]
|
|
||||||
token.name=name
|
|
||||||
token.buff=self.token_buff
|
|
||||||
self.save_token(token)
|
|
||||||
return c
|
|
||||||
def read_operator_and_save(self,c:int):
|
|
||||||
token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
|
|
||||||
self.save_char(c)
|
|
||||||
while True:
|
|
||||||
c=self.get_next_char()
|
|
||||||
if(isoperator(c)):
|
|
||||||
self.save_char(c)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
name=self.token_buff.decode("utf-8")
|
|
||||||
if(name in _MarkTable):
|
|
||||||
token.token=_MarkTable[name]
|
|
||||||
token.name=name
|
|
||||||
else:
|
|
||||||
raise Exception(f"不存在的操作符 {name} ")
|
|
||||||
token.buff=self.token_buff
|
|
||||||
self.save_token(token)
|
|
||||||
return c
|
|
||||||
def read_num_and_save(self,c:int):
|
|
||||||
token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
|
|
||||||
self.save_char(c)
|
|
||||||
while True:
|
|
||||||
c=self.get_next_char()
|
|
||||||
if(isdigitdot(c)):
|
|
||||||
self.save_char(c)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
if(self.token_buff.count(b'.')>1):
|
|
||||||
raise Exception("数字不能包含多个点号")
|
|
||||||
token.buff=self.token_buff
|
|
||||||
self.save_token(token)
|
|
||||||
return c
|
|
||||||
def read_str_and_save(self,c:int):
|
|
||||||
c=self.get_next_char()
|
|
||||||
while c!=b'\"'[0]:
|
|
||||||
self.save_char(c)
|
|
||||||
c=self.get_next_char()
|
|
||||||
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
|
|
||||||
return self.get_next_char()
|
|
||||||
|
|
||||||
def lex(text:bytes):
|
|
||||||
lex_obj = lex_class(text)
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
while not lex_obj.is_end():
|
|
||||||
if isalpha_(c):
|
|
||||||
c=lex_obj.read_name_and_save(c)
|
|
||||||
elif isinstr(c,"{}[]()~,;:*"):
|
|
||||||
lex_obj.save_one_char_token(c)
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
elif isdigit(c):
|
|
||||||
c=lex_obj.read_num_and_save(c)
|
|
||||||
elif isspace(c):
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
elif isoperator(c):
|
|
||||||
c=lex_obj.read_operator_and_save(c)
|
|
||||||
elif isinstr(c,"\""):
|
|
||||||
c=lex_obj.read_str_and_save(c)
|
|
||||||
elif isinstr(c,"\\"):
|
|
||||||
c=lex_obj.get_next_char(c)
|
|
||||||
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
|
|
||||||
raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
|
|
||||||
elif isinstr(c,"/"):
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
if(c==TOKEN("/")):
|
|
||||||
while c!=TOKEN("\n"):
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
elif(c==TOKEN("*")):
|
|
||||||
c_old=lex_obj.get_next_char()
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
while not (c_old==TOKEN("*") and c==TOKEN("/")):
|
|
||||||
c_old=c
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
elif(c==TOKEN("=")):
|
|
||||||
lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
|
|
||||||
c=lex_obj.get_next_char()
|
|
||||||
else:
|
|
||||||
lex_obj.save_one_char_token(TOKEN("/"))
|
|
||||||
else:
|
|
||||||
raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
|
|
||||||
# for item in lex_obj.token_list:
|
|
||||||
# print(f"{item}")
|
|
||||||
return lex_obj.token_list
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
with open("main.c",mode='rb') as f:
|
|
||||||
lex(f.read())
|
|
128
node_declear.py
128
node_declear.py
@@ -1,128 +0,0 @@
|
|||||||
from lex_c import lex_token
|
|
||||||
import lex_c
|
|
||||||
from parser_c import node
|
|
||||||
from parser_c import node_file
|
|
||||||
from parser_c import node_variable_def
|
|
||||||
from parser_c import node_struct_decl
|
|
||||||
from parser_c import node_struct_def
|
|
||||||
from parser_c import node_union_decl
|
|
||||||
from parser_c import node_union_def
|
|
||||||
from parser_c import node_enum_decl
|
|
||||||
from parser_c import node_enum_def
|
|
||||||
from parser_c import node_func_decl
|
|
||||||
from parser_c import node_typedef
|
|
||||||
from parser_c import node_func_def
|
|
||||||
|
|
||||||
from parser_c import find_sentence
|
|
||||||
from parser_c import dist_node_type
|
|
||||||
from parser_c import find_close
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_struct(token_list:list[lex_token]):
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRUCT):
|
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
if(len(token_list)==2):
|
|
||||||
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
|
||||||
v_list:list[node_variable_def]=[]
|
|
||||||
token_list_local=token_list[3:-1]
|
|
||||||
while len(token_list_local)>0:
|
|
||||||
sentence=find_sentence(token_list_local)
|
|
||||||
v_list.append(dist_node_type(token_list=sentence))
|
|
||||||
token_list_local=token_list_local[len(sentence):]
|
|
||||||
return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_union(token_list:list[lex_token]):
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_UNION):
|
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
if(len(token_list)==2):
|
|
||||||
return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
|
||||||
v_list:list[node_variable_def]=[]
|
|
||||||
token_list_local=token_list[3:-1]
|
|
||||||
while len(token_list_local)>0:
|
|
||||||
sentence=find_sentence(token_list_local)
|
|
||||||
v_list.append(dist_node_type(token_list=sentence))
|
|
||||||
token_list_local=token_list_local[len(sentence):]
|
|
||||||
return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_enum(token_list:list[lex_token]):
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_ENUM):
|
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
if(len(token_list)==2):
|
|
||||||
return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
|
||||||
token_list_local=token_list[3:-1]
|
|
||||||
index=0
|
|
||||||
v_list:list[dict]=[]
|
|
||||||
while len(token_list_local)>0:
|
|
||||||
if(token_list_local[0].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
key=token_list_local[0].buff.decode("utf-8")
|
|
||||||
if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM):
|
|
||||||
index=int(token_list_local[2].buff.decode("utf-8"))
|
|
||||||
token_list_local=token_list_local[3:]
|
|
||||||
else:
|
|
||||||
index+=1
|
|
||||||
token_list_local=token_list_local[1:]
|
|
||||||
v_list.append({key:index})
|
|
||||||
if(len(token_list_local)>0):
|
|
||||||
if(token_list_local[0].token!=lex_c.TOKEN(",")):
|
|
||||||
raise Exception(f"枚举类型应该使用 ',' 分隔符")
|
|
||||||
token_list_local=token_list_local[1:]
|
|
||||||
return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_typedef(token_list:list[lex_token]):
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
|
|
||||||
attr=[]
|
|
||||||
token_list_local=token_list
|
|
||||||
if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
|
|
||||||
raise Exception(f"没有定义新类型 {token_list[-1]}")
|
|
||||||
name=token_list[-1].buff.decode("utf-8")
|
|
||||||
token_list=token_list[1:]
|
|
||||||
while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
|
|
||||||
attr.append(token_list[0].name)
|
|
||||||
token_list=token_list[1:]
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
|
|
||||||
attr.append(token_list[0].name)
|
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
node_r=None
|
|
||||||
attr.append(token_list[1].buff.decode("utf-8"))
|
|
||||||
if(token_list[2].token==lex_c.TOKEN("{")):
|
|
||||||
node_r=dist_node_type(token_list=token_list[1:-1])
|
|
||||||
elif(token_list[2].token==lex_c.TOKEN("*")):
|
|
||||||
attr.append(token_list[2].name)
|
|
||||||
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r)
|
|
||||||
if(token_list[0].token==lex_c.TOKEN_SYMBOL):
|
|
||||||
# 使用typedef 定义过的自定义类型
|
|
||||||
attr.append(token_list[0].buff.decode("utf-8"))
|
|
||||||
token_list=token_list[1:]
|
|
||||||
else:
|
|
||||||
# c语言预设类型
|
|
||||||
while(token_list[0].token in
|
|
||||||
[lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
|
|
||||||
lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
|
|
||||||
attr.append(token_list[0].name)
|
|
||||||
token_list=token_list[1:]
|
|
||||||
if(len(token_list)>1):
|
|
||||||
raise Exception(f"意外的token {token_list[0]}")
|
|
||||||
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None)
|
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
|
||||||
|
|
||||||
|
|
811
parser_c.py
811
parser_c.py
@@ -1,20 +1,441 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import shutil
|
||||||
import dataclasses
|
import dataclasses
|
||||||
from lex_c import lex_token
|
import copy
|
||||||
from lex_c import lex
|
|
||||||
import lex_c
|
|
||||||
|
|
||||||
|
TOKEN_IF = 256,
|
||||||
|
TOKEN_BREAK = 257,
|
||||||
|
TOKEN_WHILE=258,
|
||||||
|
TOKEN_SWITCH=259,
|
||||||
|
TOKEN_CASE=260,
|
||||||
|
TOKEN_DO=261,
|
||||||
|
TOKEN_CHAR=262,
|
||||||
|
TOKEN_INT=263,
|
||||||
|
TOKEN_VOID=264,
|
||||||
|
TOKEN_SYMBOL = 265 ,
|
||||||
|
TOKEN_NUM = 266 ,# 数字
|
||||||
|
TOKEN_INC = 267,# 自增
|
||||||
|
TOKEN_DEC = 268,# 自减
|
||||||
|
TOKEN_EQ = 269,# 相等
|
||||||
|
TOKEN_NEQ = 270,# 不相等
|
||||||
|
TOKEN_LSH = 271,# 左移
|
||||||
|
TOKEN_RSH = 272,# 右移
|
||||||
|
TOKEN_LEQ = 273,# 小于等于
|
||||||
|
TOKEN_GEQ = 274,# 大于等于
|
||||||
|
TOKEN_ELSE = 275,
|
||||||
|
TOKEN_CONTINUE = 276 ,
|
||||||
|
TOKEN_CONST = 277 ,
|
||||||
|
TOKEN_STATIC = 278 ,
|
||||||
|
TOKEN_UNSIGNED = 279 ,
|
||||||
|
TOKEN_TYPEDEF = 280 ,
|
||||||
|
TOKEN_STRUCT = 281 ,
|
||||||
|
TOKEN_ENUM = 282 ,
|
||||||
|
TOKEN_UNION = 283,
|
||||||
|
TOKEN_STRING = 284,
|
||||||
|
TOKEN_DEFAULT = 285,
|
||||||
|
TOKEN_RETURN = 286,
|
||||||
|
TOKEN_ASSIG_ADD = 287,
|
||||||
|
TOKEN_ASSIG_SUB = 288,
|
||||||
|
TOKEN_ASSIG_MUL = 289,
|
||||||
|
TOKEN_ASSIG_DIV = 290,
|
||||||
|
TOKEN_ASSIG_LSH = 291,
|
||||||
|
TOKEN_ASSIG_RSH = 292,
|
||||||
|
TOKEN_EXTERN = 293,
|
||||||
|
TOKEN_FLOAT = 294,
|
||||||
|
TOKEN_DOUBLE = 295,
|
||||||
|
TOKEN_SHORT = 296,
|
||||||
|
TOKEN_LONG = 297,
|
||||||
|
TOKEN_POINTER = 298,
|
||||||
|
TOKEN_LOGICAL_OR = 299,# 逻辑或
|
||||||
|
TOKEN_LOGICAL_AND = 300,# 逻辑与
|
||||||
|
TOKEN_OMIT = 301,# 省略符 ...
|
||||||
|
|
||||||
|
|
||||||
|
def TOKEN(t:str):
|
||||||
|
return t.encode("utf-8")[0]
|
||||||
|
|
||||||
|
_KeyWordTable={
|
||||||
|
"if":TOKEN_IF,
|
||||||
|
"else":TOKEN_ELSE,
|
||||||
|
"break":TOKEN_BREAK,
|
||||||
|
"while":TOKEN_WHILE,
|
||||||
|
"switch":TOKEN_SWITCH,
|
||||||
|
"case":TOKEN_CASE,
|
||||||
|
"do":TOKEN_DO,
|
||||||
|
"char":TOKEN_CHAR,
|
||||||
|
"int":TOKEN_INT,
|
||||||
|
"void":TOKEN_VOID,
|
||||||
|
"continue":TOKEN_CONTINUE,
|
||||||
|
"const":TOKEN_CONST,
|
||||||
|
"static":TOKEN_STATIC,
|
||||||
|
"unisgned":TOKEN_UNSIGNED,
|
||||||
|
"typedef":TOKEN_TYPEDEF,
|
||||||
|
"struct":TOKEN_STRUCT,
|
||||||
|
"enum":TOKEN_ENUM,
|
||||||
|
"union":TOKEN_UNION,
|
||||||
|
"default":TOKEN_DEFAULT,
|
||||||
|
"return":TOKEN_RETURN,
|
||||||
|
"extern":TOKEN_EXTERN,
|
||||||
|
"float":TOKEN_FLOAT,
|
||||||
|
"double":TOKEN_DOUBLE,
|
||||||
|
"short":TOKEN_SHORT,
|
||||||
|
"long":TOKEN_LONG,
|
||||||
|
}
|
||||||
|
|
||||||
|
_MarkTable={
|
||||||
|
"<<":TOKEN_LSH,
|
||||||
|
">>":TOKEN_RSH,
|
||||||
|
"<=":TOKEN_LEQ,
|
||||||
|
">=":TOKEN_GEQ,
|
||||||
|
"!=":TOKEN_NEQ,
|
||||||
|
"==":TOKEN_EQ,
|
||||||
|
"++":TOKEN_INC,
|
||||||
|
"--":TOKEN_DEC,
|
||||||
|
"->":TOKEN_POINTER,
|
||||||
|
"+=":TOKEN_ASSIG_ADD,
|
||||||
|
"-=":TOKEN_ASSIG_SUB,
|
||||||
|
"*=":TOKEN_ASSIG_MUL,
|
||||||
|
"<<=":TOKEN_ASSIG_LSH,
|
||||||
|
">>=":TOKEN_ASSIG_RSH,
|
||||||
|
"=":TOKEN("="),
|
||||||
|
"!":TOKEN("!"),
|
||||||
|
"<":TOKEN("<"),
|
||||||
|
">":TOKEN(">"),
|
||||||
|
"+":TOKEN("+"),
|
||||||
|
"-":TOKEN("-"),
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 是否是数字加字母
|
||||||
|
def isalnum(num:int):
|
||||||
|
return bytes([num]).isalnum()
|
||||||
|
|
||||||
|
# 是否是数字加字母或下划线
|
||||||
|
def isalnum_(num:int):
|
||||||
|
return bytes([num]).isalnum() or num==TOKEN("_")
|
||||||
|
|
||||||
|
# 是否是字母
|
||||||
|
def isalpha(num:int):
|
||||||
|
return bytes([num]).isalpha()
|
||||||
|
|
||||||
|
# 是否是字母或下划线
|
||||||
|
def isalpha_(num:int):
|
||||||
|
return bytes([num]).isalpha() or num==TOKEN("_")
|
||||||
|
|
||||||
|
# 是否是数字
|
||||||
|
def isdigit(num:int):
|
||||||
|
return bytes([num]).isdigit()
|
||||||
|
|
||||||
|
# 是否是数字或小数点
|
||||||
|
def isdigitdot(num:int):
|
||||||
|
return bytes([num]).isdigit() or num==TOKEN(".")
|
||||||
|
|
||||||
|
# 是否是空白字符 包括换行符
|
||||||
|
def isspace(num:int):
|
||||||
|
return bytes([num]).isspace()
|
||||||
|
|
||||||
|
# 是否是给定字符串之一
|
||||||
|
def isinstr(num:int,t:str):
|
||||||
|
c=bytes([num])
|
||||||
|
return c in t.encode("utf-8")
|
||||||
|
|
||||||
|
# 是否是操作符
|
||||||
|
def isoperator(num:int):
|
||||||
|
return isinstr(num,"<>!+-=")
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class lex_token:
|
||||||
|
name:str
|
||||||
|
buff:bytearray
|
||||||
|
token:int
|
||||||
|
line:int
|
||||||
|
pos:int
|
||||||
|
|
||||||
|
# 连写的操作符,这些实际上是多个操作符写在一起的结果
|
||||||
|
_NotMarkTable={
|
||||||
|
"!!":[lex_token("operator",'!',TOKEN('!'),0,0),
|
||||||
|
lex_token("operator",'!',TOKEN('!'),0,0)],
|
||||||
|
"=-":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'-',TOKEN('-'),0,0)],
|
||||||
|
"--=":[lex_token("operator",'--',TOKEN_DEC,0,0),
|
||||||
|
lex_token("operator",'=',TOKEN('='),0,0)],
|
||||||
|
"++=":[lex_token("operator",'++',TOKEN_INC,0,0),
|
||||||
|
lex_token("operator",'=',TOKEN('='),0,0)],
|
||||||
|
"=--":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"=++":[lex_token("operator",'=',TOKEN('='),0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
"!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
"==--":[lex_token("operator",'==',TOKEN_EQ,0,0),
|
||||||
|
lex_token("operator",'--',TOKEN_DEC,0,0)],
|
||||||
|
"==++":[lex_token("operator",'==',TOKEN_EQ,0,0),
|
||||||
|
lex_token("operator",'++',TOKEN_INC,0,0)],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class lex_class(object):
|
||||||
|
def __init__(self,text:bytes,file_name:str="") -> None:
|
||||||
|
self.text=text
|
||||||
|
self.index=-1
|
||||||
|
self.line=1
|
||||||
|
self.pos=-1
|
||||||
|
self.token_list:list[lex_token]=[]
|
||||||
|
self.token_buff=bytearray()
|
||||||
|
self.file_name=file_name
|
||||||
|
self.macro_table={}
|
||||||
|
def save_char(self,c:int):
|
||||||
|
self.token_buff.append(c&0xff)
|
||||||
|
def save_token(self,token:lex_token):
|
||||||
|
self.token_list.append(token)
|
||||||
|
self.token_buff=bytearray()
|
||||||
|
def _get_char(self):
|
||||||
|
if(self.index<len(self.text)):
|
||||||
|
c= self.text[self.index]
|
||||||
|
return c
|
||||||
|
return -1
|
||||||
|
def get_next_char(self):
|
||||||
|
if not self.is_end():
|
||||||
|
self.index+=1
|
||||||
|
c= self._get_char()
|
||||||
|
if(c==b'\n'[0]):
|
||||||
|
self.line+=1
|
||||||
|
self.pos=-1
|
||||||
|
else:
|
||||||
|
self.pos+=1
|
||||||
|
return c
|
||||||
|
def is_end(self):
|
||||||
|
return self.index>=len(self.text)
|
||||||
|
def save_one_char_token(self,c:int):
|
||||||
|
token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos)
|
||||||
|
self.save_token(token)
|
||||||
|
def read_name_and_save(self,c:int):
|
||||||
|
token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
|
||||||
|
self.save_char(c)
|
||||||
|
while True:
|
||||||
|
c=self.get_next_char()
|
||||||
|
if(isalnum_(c)):
|
||||||
|
self.save_char(c)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
name=self.token_buff.decode("utf-8")
|
||||||
|
if(name in _KeyWordTable):
|
||||||
|
token.token=_KeyWordTable[name]
|
||||||
|
token.name=name
|
||||||
|
token.buff=self.token_buff
|
||||||
|
self.save_token(token)
|
||||||
|
return c
|
||||||
|
def read_operator_and_save(self,c:int):
|
||||||
|
token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
|
||||||
|
self.save_char(c)
|
||||||
|
while True:
|
||||||
|
c=self.get_next_char()
|
||||||
|
if(isoperator(c)):
|
||||||
|
self.save_char(c)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
name=self.token_buff.decode("utf-8")
|
||||||
|
if(name in _MarkTable):
|
||||||
|
token.token=_MarkTable[name]
|
||||||
|
token.name=name
|
||||||
|
token.buff=self.token_buff
|
||||||
|
self.save_token(token)
|
||||||
|
elif(name in _NotMarkTable):
|
||||||
|
tokens=_NotMarkTable[name]
|
||||||
|
for t in tokens:
|
||||||
|
token.token=t.token
|
||||||
|
token.name=t.name
|
||||||
|
token.buff=token.name.encode("utf-8")
|
||||||
|
self.save_token(copy.deepcopy(token))
|
||||||
|
token.pos+=len(token.name)
|
||||||
|
else:
|
||||||
|
raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}")
|
||||||
|
# print(f"不存在的操作符 {name} ")
|
||||||
|
return c
|
||||||
|
def read_num_and_save(self,c:int):
|
||||||
|
token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
|
||||||
|
self.save_char(c)
|
||||||
|
while True:
|
||||||
|
c=self.get_next_char()
|
||||||
|
if(isdigitdot(c)):
|
||||||
|
self.save_char(c)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if(self.token_buff.count(b'.')>1):
|
||||||
|
raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}")
|
||||||
|
token.buff=self.token_buff
|
||||||
|
self.save_token(token)
|
||||||
|
return c
|
||||||
|
_escape_table={'0':0,'a':7,'b':8,'t':9,'n':10,'v':11,'f':12,'r':13,'"':34,'\'':39,'?':63,'\\':92}
|
||||||
|
def read_str_and_save(self,c:int):
|
||||||
|
c=self.get_next_char()
|
||||||
|
while c!=b'\"'[0]:
|
||||||
|
if(c==TOKEN('\\')):# \
|
||||||
|
c=self.get_next_char()
|
||||||
|
self.save_char(self._escape_table.get(c,0))
|
||||||
|
else:
|
||||||
|
self.save_char(c)
|
||||||
|
c=self.get_next_char()
|
||||||
|
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
|
||||||
|
return self.get_next_char()
|
||||||
|
def read_char_and_save(self,c:int):
|
||||||
|
c=self.get_next_char()
|
||||||
|
while c!=b'\''[0]:
|
||||||
|
if(c==TOKEN('\\')):# \
|
||||||
|
c=self.get_next_char()
|
||||||
|
self.save_char(self._escape_table.get(c,0))
|
||||||
|
else:
|
||||||
|
self.save_char(c)
|
||||||
|
c=self.get_next_char()
|
||||||
|
self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
|
||||||
|
return self.get_next_char()
|
||||||
|
def deal_macro(self,buff:bytearray):
|
||||||
|
self.macro_result=False
|
||||||
|
sp=buff.decode('utf-8').split()
|
||||||
|
if(len(sp)>0):
|
||||||
|
if(sp[0]=='#define'):
|
||||||
|
if(len(sp)>=3):
|
||||||
|
if not (sp[1] in self.macro_table):
|
||||||
|
self.macro_table[sp[1]]=' '.join(sp[2:])
|
||||||
|
else:
|
||||||
|
if not (sp[1] in self.macro_table):
|
||||||
|
self.macro_table[sp[1]]=""
|
||||||
|
elif(sp[0]=='#ifdef'):
|
||||||
|
self.macro_result= (sp[1] in self.macro_table)
|
||||||
|
return self.macro_result
|
||||||
|
elif(sp[0]=='#if'):
|
||||||
|
t=' '.join(sp[1:])# 判断条件比较复杂,暂时固定返回失败
|
||||||
|
return self.macro_result
|
||||||
|
elif(sp[0]=='#elif'):
|
||||||
|
return self.macro_result
|
||||||
|
elif(sp[0]=='#else'):
|
||||||
|
self.macro_result= not self.macro_result
|
||||||
|
return self.macro_result
|
||||||
|
elif(sp[0]=='#endif'):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def lex(text:bytes,file_name:str=""):
|
||||||
|
lex_obj = lex_class(text,file_name)
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
line_old=0
|
||||||
|
pos_old=0
|
||||||
|
while not lex_obj.is_end():
|
||||||
|
line_old=lex_obj.line
|
||||||
|
pos_old=lex_obj.pos
|
||||||
|
if isalpha_(c):
|
||||||
|
c=lex_obj.read_name_and_save(c)
|
||||||
|
elif isinstr(c,"{}[]()~,;:*?%^"):
|
||||||
|
lex_obj.save_one_char_token(c)
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif isdigit(c):
|
||||||
|
c=lex_obj.read_num_and_save(c)
|
||||||
|
elif isspace(c):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif isoperator(c):
|
||||||
|
c=lex_obj.read_operator_and_save(c)
|
||||||
|
elif isinstr(c,"\""):
|
||||||
|
c=lex_obj.read_str_and_save(c)
|
||||||
|
elif isinstr(c,"\'"):
|
||||||
|
c=lex_obj.read_char_and_save(c)
|
||||||
|
elif isinstr(c,"\\"):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
|
||||||
|
raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
|
elif isinstr(c,"#"): # 宏定义
|
||||||
|
c_old=c
|
||||||
|
buff=bytearray()
|
||||||
|
while (c!=TOKEN("\n") and c!=-1):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c_old==TOKEN('/') and c==TOKEN('*')):# 适配宏后面有注释的情况
|
||||||
|
while not (c_old==TOKEN("*") and c==TOKEN("/")) or c==-1:
|
||||||
|
c_old=c
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif(c_old==TOKEN('/') and c==TOKEN('/')):
|
||||||
|
while not (c==TOKEN('\n') or c==-1):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif(c_old==TOKEN('\\') and c in [TOKEN('\n'),TOKEN('\r')]):# 适配多行
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
else:
|
||||||
|
buff.append(c_old&0xff)
|
||||||
|
c_old=c
|
||||||
|
if not (lex_obj.deal_macro(buff)): # 处理宏
|
||||||
|
is_space=True
|
||||||
|
while True:
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(is_space and c==TOKEN('#')):
|
||||||
|
break
|
||||||
|
if(c==-1):
|
||||||
|
break
|
||||||
|
if not isspace(c):
|
||||||
|
is_space=False
|
||||||
|
elif(c==TOKEN('\n')):
|
||||||
|
is_space=True
|
||||||
|
elif isinstr(c,"/"):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN("/")):
|
||||||
|
while (c!=TOKEN("\n") and c!=-1):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif(c==TOKEN("*")):
|
||||||
|
c_old=lex_obj.get_next_char()
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
while not (c_old==TOKEN("*") and c==TOKEN("/")):
|
||||||
|
c_old=c
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
elif(c==TOKEN("=")):
|
||||||
|
lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("/"))
|
||||||
|
elif isinstr(c,"|"):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN("|")):
|
||||||
|
lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("|"))
|
||||||
|
elif isinstr(c,"&"):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN("&")):
|
||||||
|
lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("&"))
|
||||||
|
elif isinstr(c,'.'):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN('.')):
|
||||||
|
c=lex_obj.get_next_char()
|
||||||
|
if(c==TOKEN('.')):
|
||||||
|
lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos))
|
||||||
|
else:
|
||||||
|
raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
|
else:
|
||||||
|
lex_obj.save_one_char_token(TOKEN("."))
|
||||||
|
else:
|
||||||
|
raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
|
||||||
|
# c=lex_obj.get_next_char()
|
||||||
|
# if(line_old==lex_obj.line and pos_old==lex_obj.pos):
|
||||||
|
# print(f"pointer not move.")
|
||||||
|
# print(line_old,pos_old)
|
||||||
|
# for item in lex_obj.token_list:
|
||||||
|
# print(f"{item}")
|
||||||
|
return lex_obj.token_list
|
||||||
|
|
||||||
_NodeTypeTable=[
|
|
||||||
"file","vdecl","fdef"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class node:
|
class node:
|
||||||
name:list[str]=dataclasses.field(default_factory=list)
|
name:list=dataclasses.field(default_factory=list)
|
||||||
type:str="base"
|
type:str="base"
|
||||||
token_list:list[lex_token]=dataclasses.field(default_factory=list)
|
token_list:list=dataclasses.field(default_factory=list)
|
||||||
child:list=dataclasses.field(default_factory=list)
|
child:list=dataclasses.field(default_factory=list)
|
||||||
def complite(self):
|
def complite(self):
|
||||||
print(f"complite {self.type}")
|
print(f"complite {self.type}")
|
||||||
@@ -125,7 +546,7 @@ class node_int(node):
|
|||||||
|
|
||||||
|
|
||||||
# 找到闭合的括号
|
# 找到闭合的括号
|
||||||
def find_close(token_list:list[lex_token],token:tuple[int,int]):
|
def find_close(token_list:list,token:tuple):
|
||||||
if token_list[0].token!=token[0]:
|
if token_list[0].token!=token[0]:
|
||||||
return 0
|
return 0
|
||||||
num=0
|
num=0
|
||||||
@@ -136,20 +557,33 @@ def find_close(token_list:list[lex_token],token:tuple[int,int]):
|
|||||||
num-=1
|
num-=1
|
||||||
if(num==0):
|
if(num==0):
|
||||||
return index
|
return index
|
||||||
raise Exception(f"没有找到闭合的符号 {token[1]}")
|
raise Exception(f"没有找到闭合的符号 {token_list[0]}")
|
||||||
|
|
||||||
|
# 找到指定token的index
|
||||||
|
def find_token(token_list:list,token:int):
|
||||||
|
num=0
|
||||||
|
for index,item in enumerate(token_list):
|
||||||
|
if(item.token!=token):
|
||||||
|
num+=1
|
||||||
|
else:
|
||||||
|
return num
|
||||||
|
return num
|
||||||
|
|
||||||
|
|
||||||
# 找到一个完整的语句
|
# 找到一个完整的语句
|
||||||
def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]):
|
def find_sentence(token_list:list,sep:list=[TOKEN(";"),TOKEN(":")]):
|
||||||
bracket_flag=False
|
bracket_flag=False
|
||||||
index=0
|
index=0
|
||||||
|
if(len(token_list)==1):
|
||||||
|
return token_list
|
||||||
while index<len(token_list):
|
while index<len(token_list):
|
||||||
if(token_list[index].token==lex_c.TOKEN("(")):
|
if(token_list[index].token==TOKEN("(")):
|
||||||
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")")))
|
bracket_index=find_close(token_list[index:],(TOKEN("("),TOKEN(")")))
|
||||||
if(bracket_index>0):
|
if(bracket_index>0):
|
||||||
bracket_flag=True
|
bracket_flag=True
|
||||||
index+=bracket_index
|
index+=bracket_index
|
||||||
elif(token_list[index].token==lex_c.TOKEN("{")):
|
elif(token_list[index].token==TOKEN("{")):
|
||||||
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}")))
|
bracket_index=find_close(token_list[index:],(TOKEN("{"),TOKEN("}")))
|
||||||
if(bracket_index>0):
|
if(bracket_index>0):
|
||||||
index+=bracket_index
|
index+=bracket_index
|
||||||
if(bracket_flag==True):
|
if(bracket_flag==True):
|
||||||
@@ -157,7 +591,7 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex
|
|||||||
elif(token_list[index].token in sep):
|
elif(token_list[index].token in sep):
|
||||||
return token_list[:index+1]
|
return token_list[:index+1]
|
||||||
index+=1
|
index+=1
|
||||||
raise Exception(f"没有找到完整的语句")
|
raise Exception(f"没有找到完整的语句 sep={sep} token={token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -171,140 +605,162 @@ def find_sentence(token_list:list[lex_token],sep:list[int]=[lex_c.TOKEN(";"),lex
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_struct(token_list:list[lex_token]):
|
def dist_node_type_struct(token_list:list):
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRUCT):
|
if(token_list[0].token==TOKEN_STRUCT):
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[1].token==TOKEN_SYMBOL):
|
||||||
if(len(token_list)==2):
|
if(len(token_list)==2):
|
||||||
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
elif(token_list[2].token==TOKEN("{")):
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
# if not token_list[-1].token==TOKEN("}"):
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
# raise Exception("没有出现预期的符号 '}'")
|
||||||
v_list:list[node_variable_def]=[]
|
# v_list:list[node_variable_def]=[]
|
||||||
token_list_local=token_list[3:-1]
|
# token_list_local=token_list[3:-1]
|
||||||
while len(token_list_local)>0:
|
# while len(token_list_local)>0:
|
||||||
sentence=find_sentence(token_list_local)
|
# sentence=find_sentence(token_list_local)
|
||||||
v_list.append(dist_node_type(token_list=sentence))
|
# v_list.append(dist_node_type(token_list=sentence))
|
||||||
token_list_local=token_list_local[len(sentence):]
|
# token_list_local=token_list_local[len(sentence):]
|
||||||
return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list)
|
return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
||||||
|
else:
|
||||||
|
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
||||||
|
if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
|
||||||
|
# 函数定义
|
||||||
|
return dist_node_type_funcdef(token_list=token_list)
|
||||||
|
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
raise Exception(f"语法错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_union(token_list:list[lex_token]):
|
def dist_node_type_union(token_list:list):
|
||||||
if(token_list[0].token==lex_c.TOKEN_UNION):
|
if(token_list[0].token==TOKEN_UNION):
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[1].token==TOKEN_SYMBOL):
|
||||||
if(len(token_list)==2):
|
if(len(token_list)==2):
|
||||||
return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
elif(token_list[2].token==TOKEN("{")):
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
# if not token_list[-1].token==TOKEN("}"):
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
# raise Exception("没有出现预期的符号 '}'")
|
||||||
v_list:list[node_variable_def]=[]
|
# v_list:list[node_variable_def]=[]
|
||||||
token_list_local=token_list[3:-1]
|
# token_list_local=token_list[3:-1]
|
||||||
while len(token_list_local)>0:
|
# while len(token_list_local)>0:
|
||||||
sentence=find_sentence(token_list_local)
|
# sentence=find_sentence(token_list_local)
|
||||||
v_list.append(dist_node_type(token_list=sentence))
|
# v_list.append(dist_node_type(token_list=sentence))
|
||||||
token_list_local=token_list_local[len(sentence):]
|
# token_list_local=token_list_local[len(sentence):]
|
||||||
return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=v_list)
|
return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
||||||
|
if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
|
||||||
|
# 函数定义
|
||||||
|
return dist_node_type_funcdef(token_list=token_list)
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
raise Exception(f"语法错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_enum(token_list:list[lex_token]):
|
def dist_node_type_enum(token_list:list):
|
||||||
if(token_list[0].token==lex_c.TOKEN_ENUM):
|
if(token_list[0].token==TOKEN_ENUM):
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[1].token==TOKEN_SYMBOL):
|
||||||
if(len(token_list)==2):
|
if(len(token_list)==2):
|
||||||
return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
||||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
elif(token_list[2].token==TOKEN("{")):
|
||||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
# if not token_list[-1].token==TOKEN("}"):
|
||||||
raise Exception("没有出现预期的符号 '}'")
|
# raise Exception("没有出现预期的符号 '}'")
|
||||||
# token_list_local=token_list[3:-1]
|
# token_list_local=token_list[3:-1]
|
||||||
return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
|
||||||
|
elif(token_list[1].token==TOKEN("{")):
|
||||||
|
# if not token_list[-1].token==TOKEN("}"):
|
||||||
|
# raise Exception("没有出现预期的符号 '}'")
|
||||||
|
# token_list_local=token_list[3:-1]
|
||||||
|
return node_enum_def(name='unnamed_enum',token_list=token_list,child=[])
|
||||||
|
if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
|
||||||
|
# 函数定义
|
||||||
|
return dist_node_type_funcdef(token_list=token_list)
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
raise Exception(f"语法错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_typedef(token_list:list[lex_token]):
|
def dist_node_type_typedef(token_list:list):
|
||||||
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
|
if(token_list[0].token==TOKEN_TYPEDEF):
|
||||||
attr=[]
|
attr=[]
|
||||||
token_list_local=token_list
|
token_list_local=token_list
|
||||||
if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
|
# if(token_list[-1].token!=TOKEN_SYMBOL):
|
||||||
raise Exception(f"没有定义新类型 {token_list[-1]}")
|
# raise Exception(f"没有定义新类型 {token_list[-1]}")
|
||||||
name=token_list[-1].buff.decode("utf-8")
|
name=token_list[-1].buff.decode("utf-8")
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
|
while token_list[0].token in [TOKEN_UNSIGNED,TOKEN_CONST]:
|
||||||
attr.append(token_list[0].name)
|
attr.append(token_list[0].name)
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
|
if(token_list[0].token==TOKEN_STRUCT or token_list[0].token==TOKEN_UNION):
|
||||||
attr.append(token_list[0].name)
|
attr.append(token_list[0].name)
|
||||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[1].token==TOKEN_SYMBOL):
|
||||||
node_r=None
|
node_r=None
|
||||||
attr.append(token_list[1].buff.decode("utf-8"))
|
attr.append(token_list[1].buff.decode("utf-8"))
|
||||||
if(token_list[2].token==lex_c.TOKEN("{")):
|
if(token_list[2].token==TOKEN("{")):
|
||||||
node_r=dist_node_type(token_list=token_list[1:-1])
|
node_r=dist_node_type(token_list=token_list[1:-1])
|
||||||
elif(token_list[2].token==lex_c.TOKEN("*")):
|
elif(token_list[2].token==TOKEN("*")):
|
||||||
attr.append(token_list[2].name)
|
attr.append(token_list[2].name)
|
||||||
return node_typedef(name=name,token_list=token_list_local,child=node_r)
|
return node_typedef(name=name,token_list=token_list_local,child=[])
|
||||||
if(token_list[0].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[0].token==TOKEN_SYMBOL):
|
||||||
# 使用typedef 定义过的自定义类型
|
# 使用typedef 定义过的自定义类型
|
||||||
attr.append(token_list[0].buff.decode("utf-8"))
|
attr.append(token_list[0].buff.decode("utf-8"))
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
else:
|
else:
|
||||||
# c语言预设类型
|
# c语言预设类型
|
||||||
while(token_list[0].token in
|
while(token_list[0].token in
|
||||||
[lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
|
[TOKEN_INT,TOKEN_CHAR,TOKEN_SHORT,TOKEN_LONG,TOKEN_FLOAT,
|
||||||
lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
|
TOKEN_DOUBLE,TOKEN_VOID,TOKEN("*")]):
|
||||||
attr.append(token_list[0].name)
|
attr.append(token_list[0].name)
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
if(len(token_list)>1):
|
# if(len(token_list)>1):
|
||||||
raise Exception(f"意外的token {token_list[0]}")
|
# raise Exception(f"意外的token {token_list[0]}")
|
||||||
return node_typedef(name=name,token_list=token_list_local,child=[])
|
return node_typedef(name=name,token_list=token_list_local,child=[])
|
||||||
raise Exception(f"语法错误 {token_list[0]}")
|
raise Exception(f"语法错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 找到子节点
|
# 找到子节点
|
||||||
def find_child(token_list:list[lex_token],seq:list[int]=[lex_c.TOKEN(";"),lex_c.TOKEN(":")]):
|
def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
|
||||||
child=[]
|
child=[]
|
||||||
|
token_list_local=[]
|
||||||
for i in range(len(token_list)):
|
for i in range(len(token_list)):
|
||||||
if(token_list[i].token==lex_c.TOKEN("{")):
|
if(token_list[i].token==TOKEN("{")):
|
||||||
token_list_local=token_list[i+1:-1]
|
token_list_local=token_list[i+1:-1]
|
||||||
break
|
break
|
||||||
while len(token_list_local)>0:
|
while len(token_list_local)>0:
|
||||||
sentence=find_sentence(token_list_local,seq)
|
sentence=find_sentence(token_list_local,seq)
|
||||||
node_d=dist_node_type(sentence)
|
node_d=dist_node_type(sentence)
|
||||||
child.append(node_d)
|
if not node_d is None:
|
||||||
|
child.append(node_d)
|
||||||
token_list_local=token_list_local[len(sentence):]
|
token_list_local=token_list_local[len(sentence):]
|
||||||
return child
|
return child
|
||||||
|
|
||||||
|
|
||||||
def dist_node_type_funcdef(token_list:list[lex_token]):
|
def dist_node_type_funcdef(token_list:list):
|
||||||
for i in range(len(token_list)):
|
for i in range(len(token_list)):
|
||||||
if(token_list[i].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[i].token==TOKEN('(')):
|
||||||
name=token_list[i].buff.decode("utf-8")
|
name=token_list[i-1].buff.decode("utf-8")
|
||||||
break
|
break
|
||||||
return node_func_def(name=[name],token_list=token_list,child=find_child(token_list))
|
# return node_func_def(name=[name],token_list=token_list,child=find_child(token_list))
|
||||||
|
return node_func_def(name=[name],token_list=token_list,child=[])
|
||||||
|
|
||||||
def dist_node_type_funcdecl(token_list:list[lex_token]):
|
def dist_node_type_funcdecl(token_list:list):
|
||||||
for i in range(len(token_list)):
|
for i in range(len(token_list)):
|
||||||
if(token_list[i].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[i].token==TOKEN_SYMBOL):
|
||||||
name=token_list[i].buff.decode("utf-8")
|
name=token_list[i].buff.decode("utf-8")
|
||||||
return node_func_decl(name=[name],token_list=token_list,child=[])
|
return node_func_decl(name=[name],token_list=token_list,child=[])
|
||||||
raise Exception(f"函数声明格式错误 {token_list[0]}")
|
raise Exception(f"函数声明格式错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
# 第一个token是symbol的处理
|
# 第一个token是symbol的处理
|
||||||
def dist_node_type_symbol(token_list:list[lex_token]):
|
def dist_node_type_symbol(token_list:list):
|
||||||
# 变量赋值或函数调用
|
# 变量赋值或函数调用
|
||||||
if(len(token_list)==1):
|
if(len(token_list)==1):
|
||||||
return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list)
|
return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list)
|
||||||
if(token_list[1].token == lex_c.TOKEN("(")):
|
if(token_list[1].token == TOKEN("(")):
|
||||||
child=find_child(token_list=token_list[2:-1])
|
child=find_child(token_list=token_list[2:-1])
|
||||||
return node_call("call",token_list=token_list,child=child)
|
return node_call("call",token_list=token_list,child=child)
|
||||||
elif(token_list[1].token in [
|
elif(token_list[1].token in [
|
||||||
lex_c.TOKEN("="),lex_c.TOKEN_ASSIG_ADD,lex_c.TOKEN_ASSIG_DIV,lex_c.TOKEN_ASSIG_LSH,
|
TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
|
||||||
lex_c.TOKEN_ASSIG_MUL,lex_c.TOKEN_ASSIG_RSH,lex_c.TOKEN_ASSIG_SUB]):
|
TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
|
||||||
name=token_list[1].name
|
name=token_list[1].name
|
||||||
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),
|
child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),]
|
||||||
dist_node_type(token_list=token_list[2:])]
|
child_d=dist_node_type(token_list=token_list[2:])
|
||||||
|
if not child_d is None:
|
||||||
|
child.append(child_d)
|
||||||
return node_opt(name=name,token_list=token_list,child=child)
|
return node_opt(name=name,token_list=token_list,child=child)
|
||||||
else:
|
else:
|
||||||
# 没有赋值属性的操作
|
# 没有赋值属性的操作
|
||||||
@@ -325,80 +781,181 @@ def dist_node_type_symbol(token_list:list[lex_token]):
|
|||||||
|
|
||||||
|
|
||||||
# 判断一个语句的类型
|
# 判断一个语句的类型
|
||||||
def dist_node_type(token_list:list[lex_token]):
|
def dist_node_type(token_list:list):
|
||||||
if(token_list[0].token==lex_c.TOKEN_EXTERN):
|
# print(f"{token_list[0]}")
|
||||||
|
if(token_list[0].token==TOKEN_EXTERN):
|
||||||
token_list=token_list[1:]
|
token_list=token_list[1:]
|
||||||
if(token_list[-1].token==lex_c.TOKEN(";")):
|
if(token_list[-1].token==TOKEN(";")):
|
||||||
token_list=token_list[:-1]
|
token_list=token_list[:-1]
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRUCT):
|
if(len(token_list)==0):
|
||||||
|
return None
|
||||||
|
if(token_list[0].token==TOKEN_STRUCT):
|
||||||
return dist_node_type_struct(token_list=token_list)
|
return dist_node_type_struct(token_list=token_list)
|
||||||
if(token_list[0].token==lex_c.TOKEN_UNION):
|
if(token_list[0].token==TOKEN_UNION):
|
||||||
return dist_node_type_union(token_list=token_list)
|
return dist_node_type_union(token_list=token_list)
|
||||||
if(token_list[0].token==lex_c.TOKEN_ENUM):
|
if(token_list[0].token==TOKEN_ENUM):
|
||||||
return dist_node_type_enum(token_list=token_list)
|
return dist_node_type_enum(token_list=token_list)
|
||||||
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
|
if(token_list[0].token==TOKEN_TYPEDEF):
|
||||||
return dist_node_type_typedef(token_list=token_list)
|
return dist_node_type_typedef(token_list=token_list)
|
||||||
if(token_list[0].token==lex_c.TOKEN_SWITCH):
|
# if(token_list[0].token==TOKEN_SWITCH):
|
||||||
child=find_child(token_list)
|
# child=find_child(token_list)
|
||||||
return node_switch(name="",token_list=token_list,child=child)
|
# return node_switch(name="",token_list=token_list,child=child)
|
||||||
if(token_list[0].token==lex_c.TOKEN_CASE):
|
# if(token_list[0].token==TOKEN_CASE):
|
||||||
name=token_list[1].buff.decode("utf-8")
|
# name=token_list[1].buff.decode("utf-8")
|
||||||
return node_case(name=name,token_list=token_list,child=[])
|
# return node_case(name=name,token_list=token_list,child=[])
|
||||||
if(token_list[0].token==lex_c.TOKEN_DEFAULT):
|
# if(token_list[0].token==TOKEN_DEFAULT):
|
||||||
return node_default(name="",token_list=token_list,child=[])
|
# return node_default(name="",token_list=token_list,child=[])
|
||||||
if(token_list[0].token==lex_c.TOKEN_BREAK):
|
# if(token_list[0].token==TOKEN_BREAK):
|
||||||
return node_break(name="",token_list=token_list,child=[])
|
# return node_break(name="",token_list=token_list,child=[])
|
||||||
if(token_list[0].token==lex_c.TOKEN_RETURN):
|
# if(token_list[0].token==TOKEN_RETURN):
|
||||||
if(len(token_list)>1):
|
# if(len(token_list)>1):
|
||||||
child=[dist_node_type(token_list[1:])]
|
# child=[dist_node_type(token_list[1:])]
|
||||||
else:
|
# else:
|
||||||
child=[]
|
# child=[]
|
||||||
return node_return(name="",token_list=token_list,child=child)
|
# return node_return(name="",token_list=token_list,child=child)
|
||||||
if(token_list[0].token==lex_c.TOKEN_STRING):
|
if(token_list[0].token==TOKEN_STRING):
|
||||||
name=token_list[0].buff.decode("utf-8")
|
name=token_list[0].buff.decode("utf-8")
|
||||||
return node_string(name=name,token_list=token_list,child=[])
|
return node_string(name=name,token_list=token_list,child=[])
|
||||||
if(token_list[0].token==lex_c.TOKEN_NUM):
|
if(token_list[0].token==TOKEN_NUM):
|
||||||
name=token_list[0].buff.decode("utf-8")
|
name=token_list[0].buff.decode("utf-8")
|
||||||
return node_int(name=name,token_list=token_list,child=[])
|
return node_int(name=name,token_list=token_list,child=[])
|
||||||
|
|
||||||
if(token_list[-1].token==lex_c.TOKEN(")")):
|
if(token_list[-1].token==TOKEN(")")):
|
||||||
# 函数声明
|
# 函数声明
|
||||||
return dist_node_type_funcdecl(token_list)
|
return dist_node_type_funcdecl(token_list)
|
||||||
elif(token_list[-1].token==lex_c.TOKEN("}")):
|
elif(token_list[-1].token==TOKEN("}")):
|
||||||
# 函数定义
|
if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
|
||||||
return dist_node_type_funcdef(token_list=token_list)
|
# 函数定义
|
||||||
elif(token_list[0].token==lex_c.TOKEN_SYMBOL):
|
return dist_node_type_funcdef(token_list=token_list)
|
||||||
|
else:
|
||||||
|
# 变量定义
|
||||||
|
pass
|
||||||
|
elif(token_list[0].token==TOKEN_SYMBOL):
|
||||||
# 变量赋值或函数调用
|
# 变量赋值或函数调用
|
||||||
return dist_node_type_symbol(token_list=token_list)
|
return dist_node_type_symbol(token_list=token_list)
|
||||||
else:
|
|
||||||
# 变量定义
|
# 变量定义
|
||||||
for i in range(len(token_list)):
|
for i in range(len(token_list)):
|
||||||
if(token_list[i].token==lex_c.TOKEN_SYMBOL):
|
if(token_list[i].token==TOKEN_SYMBOL):
|
||||||
name=token_list[i].buff.decode("utf-8")
|
name=token_list[i].buff.decode("utf-8")
|
||||||
return node_variable_def(name=[name],token_list=token_list,child=[])
|
return node_variable_def(name=[name],token_list=token_list,child=[])
|
||||||
raise Exception(f"变量定义格式错误 {token_list[0]}")
|
raise Exception(f"变量定义格式错误 {token_list[0]}")
|
||||||
|
|
||||||
|
|
||||||
def print_node(n:node,deep:int):
|
def print_node(n:node,deep:int):
|
||||||
s="|"*deep
|
s="|"*deep
|
||||||
print(f"{s} {n.type} {n.name}")
|
print(f"{s} {n.type} {n.name} {n.token_list[0]}")
|
||||||
# n.complite()
|
# n.complite()
|
||||||
if (not n.child is None) and len(n.child)>0:
|
if (not n.child is None) and len(n.child)>0:
|
||||||
for item in n.child:
|
for item in n.child:
|
||||||
print_node(item,deep+1)
|
print_node(item,deep+1)
|
||||||
|
|
||||||
|
def find_func_def_in_file(n:node,deep:int,func_name_list:list):
|
||||||
|
ack=False
|
||||||
|
if(n.type=='func_def') and (n.name[0] in func_name_list):
|
||||||
|
print(f"{n.type} {n.name}")
|
||||||
|
return True
|
||||||
|
# n.complite()
|
||||||
|
if (not n.child is None) and len(n.child)>0:
|
||||||
|
for item in n.child:
|
||||||
|
ack=find_func_def_in_file(item,deep+1,func_name_list)
|
||||||
|
if(ack):
|
||||||
|
return ack
|
||||||
|
return False
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def check_func_def(file_name:str,func_name_list:list):
|
||||||
file_name="main.c"
|
|
||||||
with open(file_name,mode='rb') as f:
|
with open(file_name,mode='rb') as f:
|
||||||
token_list=lex(f.read())
|
read_d=f.read()
|
||||||
|
if(read_d[:3]==bytes([0xef,0xbb,0xbf])):
|
||||||
|
read_d=read_d[3:]
|
||||||
|
token_list=lex(read_d,file_name)
|
||||||
file=node_file(name=file_name,token_list=token_list)
|
file=node_file(name=file_name,token_list=token_list)
|
||||||
while len(token_list)>0:
|
while len(token_list)>0:
|
||||||
sentence=find_sentence(token_list)
|
node_d=None
|
||||||
node_d=dist_node_type(sentence)
|
try:
|
||||||
file.child.append(node_d)
|
sentence=find_sentence(token_list)
|
||||||
# print('找到一个语句:')
|
node_d=dist_node_type(sentence)
|
||||||
# for item in sentence:
|
except Exception as e:
|
||||||
# print(f"\t{item}")
|
print(f"in {file_name}")
|
||||||
|
print(f"\t {e}")
|
||||||
|
break
|
||||||
|
if not node_d is None:
|
||||||
|
file.child.append(node_d)
|
||||||
token_list=token_list[len(sentence):]
|
token_list=token_list[len(sentence):]
|
||||||
print_node(file,0)
|
print_node(file,0)
|
||||||
|
return find_func_def_in_file(file,0,func_name_list)
|
||||||
|
|
||||||
|
|
||||||
|
# 找到定义函数的文件
|
||||||
|
def find_func_def(file_list:list,func_name_list:str):
|
||||||
|
ret_list=[]
|
||||||
|
err_list=[]
|
||||||
|
for item in file_list:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
# try:
|
||||||
|
ack=check_func_def(item,func_name_list)
|
||||||
|
if(ack):
|
||||||
|
ret_list.append(item)
|
||||||
|
# except Exception as e:
|
||||||
|
# print(e)
|
||||||
|
# err_list.append(item)
|
||||||
|
return ret_list,err_list
|
||||||
|
|
||||||
|
# 找到指定后缀的文件
|
||||||
|
def find_type(path:str,fix:str):
|
||||||
|
dlist=os.listdir(path)
|
||||||
|
file_list=[]
|
||||||
|
for i in dlist:
|
||||||
|
ps=os.path.join(path, i)
|
||||||
|
if os.path.isdir(ps):
|
||||||
|
file_list+=find_type(ps,fix)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if(ps[-len(fix):]==fix):
|
||||||
|
file_list.append(ps)
|
||||||
|
return file_list
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# with open("build/build_log.log",mode="r",encoding="utf-8") as f:
|
||||||
|
# _out_text=f.readlines()
|
||||||
|
|
||||||
|
|
||||||
|
def get_func_list():
|
||||||
|
func_list=[]
|
||||||
|
_out_text=sys.stdin.readlines()
|
||||||
|
for item in _out_text:
|
||||||
|
key_str='undefined reference to `'
|
||||||
|
index=item.find(key_str)
|
||||||
|
if(index<0):
|
||||||
|
continue
|
||||||
|
index+=len(key_str)
|
||||||
|
index_end=item[index:].find('\'')
|
||||||
|
func=item[index:index+index_end]
|
||||||
|
if not (func in func_list):
|
||||||
|
func_list.append(func)
|
||||||
|
return func_list
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 参数是扫描的目录列表
|
||||||
|
if __name__=="__main__":
|
||||||
|
file_list=[]
|
||||||
|
for item in sys.argv[1:]:
|
||||||
|
file_list+=find_type(item,'.c')
|
||||||
|
# file_list=["./dtest/dtest3/kl3_core_mark/core_main.c"]
|
||||||
|
print(f"there is {len(file_list)} .c file.")
|
||||||
|
# func_list=get_func_list()
|
||||||
|
func_list=['main']
|
||||||
|
print(func_list)
|
||||||
|
# find_func_def(['driver/src/hw3/efuse.c'],['efuse_get_d_bg_vbg_cntl'])
|
||||||
|
ret_list,err_list=find_func_def(file_list,func_list)
|
||||||
|
print("已找到的文件")
|
||||||
|
for item in ret_list:
|
||||||
|
print(item)
|
||||||
|
print("分析失败的文件")
|
||||||
|
for item in err_list:
|
||||||
|
print(item)
|
Reference in New Issue
Block a user