添加一些语法

This commit is contained in:
2024-12-03 08:53:09 +08:00
parent c736ef5b1b
commit dac9dad45d
5 changed files with 399 additions and 12 deletions

View File

@@ -34,6 +34,18 @@ TOKEN_UNION = 283,
TOKEN_STRING = 284,
TOKEN_DEFAULT = 285,
TOKEN_RETURN = 286,
TOKEN_ASSIG_ADD = 287
TOKEN_ASSIG_SUB = 288
TOKEN_ASSIG_MUL = 289
TOKEN_ASSIG_DIV = 290
TOKEN_ASSIG_LSH = 291
TOKEN_ASSIG_RSH = 292
TOKEN_EXTERN = 293
TOKEN_FLOAT = 294
TOKEN_DOUBLE = 295
TOKEN_SHORT = 296
TOKEN_LONG = 297
def TOKEN(t:str):
return t.encode("utf-8")[0]
@@ -59,6 +71,11 @@ _KeyWordTable={
"union":TOKEN_UNION,
"default":TOKEN_DEFAULT,
"return":TOKEN_RETURN,
"extern":TOKEN_EXTERN,
"float":TOKEN_FLOAT,
"double":TOKEN_DOUBLE,
"short":TOKEN_SHORT,
"long":TOKEN_LONG,
}
_MarkTable={
@@ -70,6 +87,11 @@ _MarkTable={
"==":TOKEN_EQ,
"++":TOKEN_INC,
"--":TOKEN_DEC,
"+=":TOKEN_ASSIG_ADD,
"-=":TOKEN_ASSIG_SUB,
"*=":TOKEN_ASSIG_MUL,
"<<=":TOKEN_ASSIG_LSH,
">>=":TOKEN_ASSIG_RSH,
"=":TOKEN("="),
"!":TOKEN("!"),
"<":TOKEN("<"),
@@ -233,8 +255,29 @@ def lex(text:bytes):
c=lex_obj.read_operator_and_save(c)
elif isinstr(c,"\""):
c=lex_obj.read_str_and_save(c)
elif isinstr(c,"\\"):
c=lex_obj.get_next_char(c)
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
elif isinstr(c,"/"):
c=lex_obj.get_next_char()
if(c==TOKEN("/")):
while c!=TOKEN("\n"):
c=lex_obj.get_next_char()
elif(c==TOKEN("*")):
c_old=lex_obj.get_next_char()
c=lex_obj.get_next_char()
while not (c_old==TOKEN("*") and c==TOKEN("/")):
c_old=c
c=lex_obj.get_next_char()
c=lex_obj.get_next_char()
elif(c==TOKEN("=")):
lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
c=lex_obj.get_next_char()
else:
lex_obj.save_one_char_token(TOKEN("/"))
else:
raise Exception(f"err char {bytes([c])} at line:{lex_obj.line} pos:{lex_obj.pos}")
raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
# for item in lex_obj.token_list:
# print(f"{item}")
return lex_obj.token_list

47
main.c
View File

@@ -2,6 +2,49 @@
// 行注释
struct _struct_a;
typedef const struct _struct_a _typedef_struct_a;
struct _struct_a /* 块注释 */ {
int a;
int b;
};
enum _enum_a;
enum _enum_a {
Enum0=0,
Enum1,
Enum2,
};
// 暂不支持匿名枚举类型
// enum {
// Enumb0=0,
// Enumb1,
// Enumb2,
// };
union _union_a {
int a;
float b;
double c;
short d;
};
typedef int _typedef_int;
const char* get_type(int s) {
const char* ret;
switch (s)
@@ -23,3 +66,7 @@ const char* get_type(int s) {
return ret;
}
int main(){
return 0;
}

128
node_declear.py Normal file
View File

@@ -0,0 +1,128 @@
from lex_c import lex_token
import lex_c
from parser_c import node
from parser_c import node_file
from parser_c import node_variable_def
from parser_c import node_struct_decl
from parser_c import node_struct_def
from parser_c import node_union_decl
from parser_c import node_union_def
from parser_c import node_enum_decl
from parser_c import node_enum_def
from parser_c import node_func_decl
from parser_c import node_typedef
from parser_c import node_func_def
from parser_c import find_sentence
from parser_c import dist_node_type
from parser_c import find_close
def dist_node_type_struct(token_list:list[lex_token]):
if(token_list[0].token==lex_c.TOKEN_STRUCT):
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
if(len(token_list)==2):
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
elif(token_list[2].token==lex_c.TOKEN("{")):
if not token_list[-1].token==lex_c.TOKEN("}"):
raise Exception("没有出现预期的符号 '}'")
v_list:list[node_variable_def]=[]
token_list_local=token_list[3:-1]
while len(token_list_local)>0:
sentence=find_sentence(token_list_local)
v_list.append(dist_node_type(token_list=sentence))
token_list_local=token_list_local[len(sentence):]
return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
raise Exception(f"语法错误 {token_list[0]}")
def dist_node_type_union(token_list:list[lex_token]):
if(token_list[0].token==lex_c.TOKEN_UNION):
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
if(len(token_list)==2):
return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
elif(token_list[2].token==lex_c.TOKEN("{")):
if not token_list[-1].token==lex_c.TOKEN("}"):
raise Exception("没有出现预期的符号 '}'")
v_list:list[node_variable_def]=[]
token_list_local=token_list[3:-1]
while len(token_list_local)>0:
sentence=find_sentence(token_list_local)
v_list.append(dist_node_type(token_list=sentence))
token_list_local=token_list_local[len(sentence):]
return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
raise Exception(f"语法错误 {token_list[0]}")
def dist_node_type_enum(token_list:list[lex_token]):
if(token_list[0].token==lex_c.TOKEN_ENUM):
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
if(len(token_list)==2):
return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
elif(token_list[2].token==lex_c.TOKEN("{")):
if not token_list[-1].token==lex_c.TOKEN("}"):
raise Exception("没有出现预期的符号 '}'")
token_list_local=token_list[3:-1]
index=0
v_list:list[dict]=[]
while len(token_list_local)>0:
if(token_list_local[0].token==lex_c.TOKEN_SYMBOL):
key=token_list_local[0].buff.decode("utf-8")
if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM):
index=int(token_list_local[2].buff.decode("utf-8"))
token_list_local=token_list_local[3:]
else:
index+=1
token_list_local=token_list_local[1:]
v_list.append({key:index})
if(len(token_list_local)>0):
if(token_list_local[0].token!=lex_c.TOKEN(",")):
raise Exception(f"枚举类型应该使用 ',' 分隔符")
token_list_local=token_list_local[1:]
return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
raise Exception(f"语法错误 {token_list[0]}")
def dist_node_type_typedef(token_list:list[lex_token]):
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
attr=[]
token_list_local=token_list
if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
raise Exception(f"没有定义新类型 {token_list[-1]}")
name=token_list[-1].buff.decode("utf-8")
token_list=token_list[1:]
while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
attr.append(token_list[0].name)
token_list=token_list[1:]
if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
attr.append(token_list[0].name)
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
node_r=None
attr.append(token_list[1].buff.decode("utf-8"))
if(token_list[2].token==lex_c.TOKEN("{")):
node_r=dist_node_type(token_list=token_list[1:-1])
elif(token_list[2].token==lex_c.TOKEN("*")):
attr.append(token_list[2].name)
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r)
if(token_list[0].token==lex_c.TOKEN_SYMBOL):
# 使用typedef 定义过的自定义类型
attr.append(token_list[0].buff.decode("utf-8"))
token_list=token_list[1:]
else:
# c语言预设类型
while(token_list[0].token in
[lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
attr.append(token_list[0].name)
token_list=token_list[1:]
if(len(token_list)>1):
raise Exception(f"意外的token {token_list[0]}")
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None)
raise Exception(f"语法错误 {token_list[0]}")

37
node_run.py Normal file
View File

@@ -0,0 +1,37 @@
from lex_c import lex_token
import lex_c
from parser_c import node
from parser_c import node_file
from parser_c import node_variable_def
from parser_c import node_struct_decl
from parser_c import node_struct_def
from parser_c import node_union_decl
from parser_c import node_union_def
from parser_c import node_enum_decl
from parser_c import node_enum_def
from parser_c import node_func_decl
from parser_c import node_typedef
from parser_c import node_func_def
class variable(object):
def __init__(self,name:str,value=None,attr:list[str]=[]):
self.name=name
self.value_=value
self.attr=attr
def set_value(self,value):
if("const" in self.attr):
raise Exception(f"变量 {self.name} 不可写")
self.value_=ValueError
def value(self):
return self.value_
class file(object):
def __init__(self):
self.variable_list:list[variable]=[]
self.function_list:list=[]
self.variable_type_list:list=[]

View File

@@ -3,28 +3,160 @@ import sys
import dataclasses
from lex_c import lex_token
from lex_c import lex
import lex_c
from node_declear import dist_node_type_struct
from node_declear import dist_node_type_union
from node_declear import dist_node_type_enum
from node_declear import dist_node_type_typedef
_NodeTypeTable=[
"file","vdecl","fdef"
]
@dataclasses.dataclass
class node:
name:str
next:None
chid:None
token_list:list[lex_token]
type:str="base"
token_list:list[lex_token]=dataclasses.field(default_factory=list)
# 变量声明节点
# 文件节点
@dataclasses.dataclass
class node_vdecl(node):
vvalue:None
vtype:str
vattr:list[str]
class node_file(node):
type:str="file"
body:list=dataclasses.field(default_factory=list)
# 变量定义节点
@dataclasses.dataclass
class node_variable_def(node):
type:str="variable_def"
vvalue=None
vtype:str="unknown"
vattr:list[str]=dataclasses.field(default_factory=list)
# 结构体声明节点
@dataclasses.dataclass
class node_struct_decl(node):
type:str="struct_decl"
# 结构体定义节点
@dataclasses.dataclass
class node_struct_def(node):
type:str="struct_def"
body:list[node_variable_def]=dataclasses.field(default_factory=list)
# 联合体声明节点
@dataclasses.dataclass
class node_union_decl(node):
type:str="union_decl"
# 联合体定义节点
@dataclasses.dataclass
class node_union_def(node):
type:str="union_def"
body:list[node_variable_def]=dataclasses.field(default_factory=list)
# 枚举声明节点
@dataclasses.dataclass
class node_enum_decl(node):
type:str="enum_decl"
# 枚举定义节点
@dataclasses.dataclass
class node_enum_def(node):
type:str="enum_def"
body:list[dict]=dataclasses.field(default_factory=list)
# 函数声明节点
@dataclasses.dataclass
class node_func_decl(node):
type:str="func_decl"
rettype:str="unknown"
retattr:list[str]=dataclasses.field(default_factory=list)
para:list[node_variable_def]=dataclasses.field(default_factory=list)
#typedef 节点
@dataclasses.dataclass
class node_typedef(node):
type:str="typedef"
attr:list[str]=dataclasses.field(default_factory=list)
body:node=None
# 函数定义节点
@dataclasses.dataclass
class node_fdef(node):
rettype:str
class node_func_def(node):
type:str="func_def"
rettype:str="unknown"
retattr:list[str]=dataclasses.field(default_factory=list)
para:list[node_variable_def]=dataclasses.field(default_factory=list)
body:list[node]=dataclasses.field(default_factory=list)
# 找到闭合的括号
def find_close(token_list:list[lex_token],token:tuple[int,int]):
if token_list[0].token!=token[0]:
return 0
num=0
for index,item in enumerate(token_list):
if(item.token==token[0]):
num+=1
elif(item.token==token[1]):
num-=1
if(num==0):
return index
raise Exception(f"没有找到闭合的符号 {token[1]}")
# 找到一个完整的语句
def find_sentence(token_list:list[lex_token]):
bracket_flag=False
index=0
while index<len(token_list):
if(token_list[index].token==lex_c.TOKEN("(")):
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")")))
if(bracket_index>0):
bracket_flag=True
index+=bracket_index
elif(token_list[index].token==lex_c.TOKEN("{")):
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}")))
if(bracket_index>0):
index+=bracket_index
if(bracket_flag==True):
return token_list[:index+1]
elif(token_list[index].token==lex_c.TOKEN(";")):
return token_list[:index+1]
index+=1
raise Exception(f"没有找到完整的语句")
# 判断一个语句的类型
def dist_node_type(token_list:list[lex_token]):
if(token_list[0].token==lex_c.TOKEN_EXTERN):
token_list=token_list[1:]
if(token_list[-1].token==lex_c.TOKEN(";")):
token_list=token_list[:-1]
if(token_list[0].token==lex_c.TOKEN_STRUCT):
return dist_node_type_struct(token_list=token_list)
if(token_list[0].token==lex_c.TOKEN_UNION):
return dist_node_type_union(token_list=token_list)
if(token_list[0].token==lex_c.TOKEN_ENUM):
return dist_node_type_enum(token_list=token_list)
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
return dist_node_type_typedef(token_list=token_list)
raise Exception(f"无法处理的token类型 {token_list[0]}")
if __name__ == "__main__":
with open("main.c",mode='rb') as f:
file_name="main.c"
with open(file_name,mode='rb') as f:
token_list=lex(f.read())
file=node_file(name=file_name,token_list=token_list,body=[])
while len(token_list)>0:
sentence=find_sentence(token_list)
node_d=dist_node_type(sentence)
file.body.append(node_d)
print('找到一个语句:')
for item in sentence:
print(f"\t{item}")
token_list=token_list[len(sentence):]