diff --git a/lex_c.py b/lex_c.py index 819450a..b2e2192 100644 --- a/lex_c.py +++ b/lex_c.py @@ -34,6 +34,18 @@ TOKEN_UNION = 283, TOKEN_STRING = 284, TOKEN_DEFAULT = 285, TOKEN_RETURN = 286, +TOKEN_ASSIG_ADD = 287 +TOKEN_ASSIG_SUB = 288 +TOKEN_ASSIG_MUL = 289 +TOKEN_ASSIG_DIV = 290 +TOKEN_ASSIG_LSH = 291 +TOKEN_ASSIG_RSH = 292 +TOKEN_EXTERN = 293 +TOKEN_FLOAT = 294 +TOKEN_DOUBLE = 295 +TOKEN_SHORT = 296 +TOKEN_LONG = 297 + def TOKEN(t:str): return t.encode("utf-8")[0] @@ -59,6 +71,11 @@ _KeyWordTable={ "union":TOKEN_UNION, "default":TOKEN_DEFAULT, "return":TOKEN_RETURN, + "extern":TOKEN_EXTERN, + "float":TOKEN_FLOAT, + "double":TOKEN_DOUBLE, + "short":TOKEN_SHORT, + "long":TOKEN_LONG, } _MarkTable={ @@ -70,6 +87,11 @@ _MarkTable={ "==":TOKEN_EQ, "++":TOKEN_INC, "--":TOKEN_DEC, + "+=":TOKEN_ASSIG_ADD, + "-=":TOKEN_ASSIG_SUB, + "*=":TOKEN_ASSIG_MUL, + "<<=":TOKEN_ASSIG_LSH, + ">>=":TOKEN_ASSIG_RSH, "=":TOKEN("="), "!":TOKEN("!"), "<":TOKEN("<"), @@ -233,8 +255,29 @@ def lex(text:bytes): c=lex_obj.read_operator_and_save(c) elif isinstr(c,"\""): c=lex_obj.read_str_and_save(c) + elif isinstr(c,"\\"): + c=lex_obj.get_next_char(c) + if(c!=TOKEN("\r") and c!=TOKEN("\n")): + raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}") + elif isinstr(c,"/"): + c=lex_obj.get_next_char() + if(c==TOKEN("/")): + while c!=TOKEN("\n"): + c=lex_obj.get_next_char() + elif(c==TOKEN("*")): + c_old=lex_obj.get_next_char() + c=lex_obj.get_next_char() + while not (c_old==TOKEN("*") and c==TOKEN("/")): + c_old=c + c=lex_obj.get_next_char() + c=lex_obj.get_next_char() + elif(c==TOKEN("=")): + lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos)) + c=lex_obj.get_next_char() + else: + lex_obj.save_one_char_token(TOKEN("/")) else: - raise Exception(f"err char {bytes([c])} at line:{lex_obj.line} pos:{lex_obj.pos}") + raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}") # for item in lex_obj.token_list: # print(f"{item}") return lex_obj.token_list diff --git a/main.c b/main.c index 998cf87..fde8bd5 100644 --- a/main.c +++ b/main.c @@ -2,6 +2,49 @@ +// 行注释 +struct _struct_a; + +typedef const struct _struct_a _typedef_struct_a; + +struct _struct_a /* 块注释 */ { + int a; + int b; +}; + + + + +enum _enum_a; + +enum _enum_a { + Enum0=0, + Enum1, + Enum2, +}; + +// 暂不支持匿名枚举类型 +// enum { +// Enumb0=0, +// Enumb1, +// Enumb2, +// }; + + +union _union_a { + int a; + float b; + double c; + short d; +}; + + + + +typedef int _typedef_int; + + + const char* get_type(int s) { const char* ret; switch (s) @@ -23,3 +66,7 @@ const char* get_type(int s) { return ret; } + +int main(){ + return 0; +} diff --git a/node_declear.py b/node_declear.py new file mode 100644 index 0000000..a178e28 --- /dev/null +++ b/node_declear.py @@ -0,0 +1,128 @@ +from lex_c import lex_token +import lex_c +from parser_c import node +from parser_c import node_file +from parser_c import node_variable_def +from parser_c import node_struct_decl +from parser_c import node_struct_def +from parser_c import node_union_decl +from parser_c import node_union_def +from parser_c import node_enum_decl +from parser_c import node_enum_def +from parser_c import node_func_decl +from parser_c import node_typedef +from parser_c import node_func_def + +from parser_c import find_sentence +from parser_c import dist_node_type +from parser_c import find_close + + + + + +def dist_node_type_struct(token_list:list[lex_token]): + if(token_list[0].token==lex_c.TOKEN_STRUCT): + if(token_list[1].token==lex_c.TOKEN_SYMBOL): + if(len(token_list)==2): + return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) + elif(token_list[2].token==lex_c.TOKEN("{")): + if not token_list[-1].token==lex_c.TOKEN("}"): + raise Exception("没有出现预期的符号 '}'") + v_list:list[node_variable_def]=[] + token_list_local=token_list[3:-1] + while len(token_list_local)>0: + sentence=find_sentence(token_list_local) + v_list.append(dist_node_type(token_list=sentence)) + token_list_local=token_list_local[len(sentence):] + return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) + raise Exception(f"语法错误 {token_list[0]}") + + + +def dist_node_type_union(token_list:list[lex_token]): + if(token_list[0].token==lex_c.TOKEN_UNION): + if(token_list[1].token==lex_c.TOKEN_SYMBOL): + if(len(token_list)==2): + return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) + elif(token_list[2].token==lex_c.TOKEN("{")): + if not token_list[-1].token==lex_c.TOKEN("}"): + raise Exception("没有出现预期的符号 '}'") + v_list:list[node_variable_def]=[] + token_list_local=token_list[3:-1] + while len(token_list_local)>0: + sentence=find_sentence(token_list_local) + v_list.append(dist_node_type(token_list=sentence)) + token_list_local=token_list_local[len(sentence):] + return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) + raise Exception(f"语法错误 {token_list[0]}") + + + +def dist_node_type_enum(token_list:list[lex_token]): + if(token_list[0].token==lex_c.TOKEN_ENUM): + if(token_list[1].token==lex_c.TOKEN_SYMBOL): + if(len(token_list)==2): + return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list) + elif(token_list[2].token==lex_c.TOKEN("{")): + if not token_list[-1].token==lex_c.TOKEN("}"): + raise Exception("没有出现预期的符号 '}'") + token_list_local=token_list[3:-1] + index=0 + v_list:list[dict]=[] + while len(token_list_local)>0: + if(token_list_local[0].token==lex_c.TOKEN_SYMBOL): + key=token_list_local[0].buff.decode("utf-8") + if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM): + index=int(token_list_local[2].buff.decode("utf-8")) + token_list_local=token_list_local[3:] + else: + index+=1 + token_list_local=token_list_local[1:] + v_list.append({key:index}) + if(len(token_list_local)>0): + if(token_list_local[0].token!=lex_c.TOKEN(",")): + raise Exception(f"枚举类型应该使用 ',' 分隔符") + token_list_local=token_list_local[1:] + return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list) + raise Exception(f"语法错误 {token_list[0]}") + + +def dist_node_type_typedef(token_list:list[lex_token]): + if(token_list[0].token==lex_c.TOKEN_TYPEDEF): + attr=[] + token_list_local=token_list + if(token_list[-1].token!=lex_c.TOKEN_SYMBOL): + raise Exception(f"没有定义新类型 {token_list[-1]}") + name=token_list[-1].buff.decode("utf-8") + token_list=token_list[1:] + while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]: + attr.append(token_list[0].name) + token_list=token_list[1:] + if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION): + attr.append(token_list[0].name) + if(token_list[1].token==lex_c.TOKEN_SYMBOL): + node_r=None + attr.append(token_list[1].buff.decode("utf-8")) + if(token_list[2].token==lex_c.TOKEN("{")): + node_r=dist_node_type(token_list=token_list[1:-1]) + elif(token_list[2].token==lex_c.TOKEN("*")): + attr.append(token_list[2].name) + return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r) + if(token_list[0].token==lex_c.TOKEN_SYMBOL): + # 使用typedef 定义过的自定义类型 + attr.append(token_list[0].buff.decode("utf-8")) + token_list=token_list[1:] + else: + # c语言预设类型 + while(token_list[0].token in + [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT, + lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]): + attr.append(token_list[0].name) + token_list=token_list[1:] + if(len(token_list)>1): + raise Exception(f"意外的token {token_list[0]}") + return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None) + raise Exception(f"语法错误 {token_list[0]}") + + diff --git a/node_run.py b/node_run.py new file mode 100644 index 0000000..a937d28 --- /dev/null +++ b/node_run.py @@ -0,0 +1,37 @@ +from lex_c import lex_token +import lex_c +from parser_c import node +from parser_c import node_file +from parser_c import node_variable_def +from parser_c import node_struct_decl +from parser_c import node_struct_def +from parser_c import node_union_decl +from parser_c import node_union_def +from parser_c import node_enum_decl +from parser_c import node_enum_def +from parser_c import node_func_decl +from parser_c import node_typedef +from parser_c import node_func_def + + + + +class variable(object): + def __init__(self,name:str,value=None,attr:list[str]=[]): + self.name=name + self.value_=value + self.attr=attr + def set_value(self,value): + if("const" in self.attr): + raise Exception(f"变量 {self.name} 不可写") + self.value_=ValueError + def value(self): + return self.value_ + + +class file(object): + def __init__(self): + self.variable_list:list[variable]=[] + self.function_list:list=[] + self.variable_type_list:list=[] + diff --git a/parser_c.py b/parser_c.py index 11b75ac..2ab59e3 100644 --- a/parser_c.py +++ b/parser_c.py @@ -3,28 +3,160 @@ import sys import dataclasses from lex_c import lex_token from lex_c import lex +import lex_c +from node_declear import dist_node_type_struct +from node_declear import dist_node_type_union +from node_declear import dist_node_type_enum +from node_declear import dist_node_type_typedef +_NodeTypeTable=[ + "file","vdecl","fdef" +] @dataclasses.dataclass class node: name:str - next:None - chid:None - token_list:list[lex_token] + type:str="base" + token_list:list[lex_token]=dataclasses.field(default_factory=list) -# 变量声明节点 +# 文件节点 @dataclasses.dataclass -class node_vdecl(node): - vvalue:None - vtype:str - vattr:list[str] +class node_file(node): + type:str="file" + body:list=dataclasses.field(default_factory=list) + +# 变量定义节点 +@dataclasses.dataclass +class node_variable_def(node): + type:str="variable_def" + vvalue=None + vtype:str="unknown" + vattr:list[str]=dataclasses.field(default_factory=list) + +# 结构体声明节点 +@dataclasses.dataclass +class node_struct_decl(node): + type:str="struct_decl" + +# 结构体定义节点 +@dataclasses.dataclass +class node_struct_def(node): + type:str="struct_def" + body:list[node_variable_def]=dataclasses.field(default_factory=list) + +# 联合体声明节点 +@dataclasses.dataclass +class node_union_decl(node): + type:str="union_decl" + +# 联合体定义节点 +@dataclasses.dataclass +class node_union_def(node): + type:str="union_def" + body:list[node_variable_def]=dataclasses.field(default_factory=list) + +# 枚举声明节点 +@dataclasses.dataclass +class node_enum_decl(node): + type:str="enum_decl" + +# 枚举定义节点 +@dataclasses.dataclass +class node_enum_def(node): + type:str="enum_def" + body:list[dict]=dataclasses.field(default_factory=list) + +# 函数声明节点 +@dataclasses.dataclass +class node_func_decl(node): + type:str="func_decl" + rettype:str="unknown" + retattr:list[str]=dataclasses.field(default_factory=list) + para:list[node_variable_def]=dataclasses.field(default_factory=list) + +#typedef 节点 +@dataclasses.dataclass +class node_typedef(node): + type:str="typedef" + attr:list[str]=dataclasses.field(default_factory=list) + body:node=None # 函数定义节点 @dataclasses.dataclass -class node_fdef(node): - rettype:str +class node_func_def(node): + type:str="func_def" + rettype:str="unknown" + retattr:list[str]=dataclasses.field(default_factory=list) + para:list[node_variable_def]=dataclasses.field(default_factory=list) + body:list[node]=dataclasses.field(default_factory=list) + + +# 找到闭合的括号 +def find_close(token_list:list[lex_token],token:tuple[int,int]): + if token_list[0].token!=token[0]: + return 0 + num=0 + for index,item in enumerate(token_list): + if(item.token==token[0]): + num+=1 + elif(item.token==token[1]): + num-=1 + if(num==0): + return index + raise Exception(f"没有找到闭合的符号 {token[1]}") + +# 找到一个完整的语句 +def find_sentence(token_list:list[lex_token]): + bracket_flag=False + index=0 + while index0): + bracket_flag=True + index+=bracket_index + elif(token_list[index].token==lex_c.TOKEN("{")): + bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}"))) + if(bracket_index>0): + index+=bracket_index + if(bracket_flag==True): + return token_list[:index+1] + elif(token_list[index].token==lex_c.TOKEN(";")): + return token_list[:index+1] + index+=1 + raise Exception(f"没有找到完整的语句") + + + +# 判断一个语句的类型 +def dist_node_type(token_list:list[lex_token]): + if(token_list[0].token==lex_c.TOKEN_EXTERN): + token_list=token_list[1:] + if(token_list[-1].token==lex_c.TOKEN(";")): + token_list=token_list[:-1] + if(token_list[0].token==lex_c.TOKEN_STRUCT): + return dist_node_type_struct(token_list=token_list) + if(token_list[0].token==lex_c.TOKEN_UNION): + return dist_node_type_union(token_list=token_list) + if(token_list[0].token==lex_c.TOKEN_ENUM): + return dist_node_type_enum(token_list=token_list) + if(token_list[0].token==lex_c.TOKEN_TYPEDEF): + return dist_node_type_typedef(token_list=token_list) + + raise Exception(f"无法处理的token类型 {token_list[0]}") + if __name__ == "__main__": - with open("main.c",mode='rb') as f: + file_name="main.c" + with open(file_name,mode='rb') as f: token_list=lex(f.read()) + file=node_file(name=file_name,token_list=token_list,body=[]) + while len(token_list)>0: + sentence=find_sentence(token_list) + node_d=dist_node_type(sentence) + file.body.append(node_d) + print('找到一个语句:') + for item in sentence: + print(f"\t{item}") + token_list=token_list[len(sentence):] \ No newline at end of file