添加一些语法
This commit is contained in:
45
lex_c.py
45
lex_c.py
@@ -34,6 +34,18 @@ TOKEN_UNION = 283,
|
||||
TOKEN_STRING = 284,
|
||||
TOKEN_DEFAULT = 285,
|
||||
TOKEN_RETURN = 286,
|
||||
TOKEN_ASSIG_ADD = 287
|
||||
TOKEN_ASSIG_SUB = 288
|
||||
TOKEN_ASSIG_MUL = 289
|
||||
TOKEN_ASSIG_DIV = 290
|
||||
TOKEN_ASSIG_LSH = 291
|
||||
TOKEN_ASSIG_RSH = 292
|
||||
TOKEN_EXTERN = 293
|
||||
TOKEN_FLOAT = 294
|
||||
TOKEN_DOUBLE = 295
|
||||
TOKEN_SHORT = 296
|
||||
TOKEN_LONG = 297
|
||||
|
||||
|
||||
def TOKEN(t:str):
|
||||
return t.encode("utf-8")[0]
|
||||
@@ -59,6 +71,11 @@ _KeyWordTable={
|
||||
"union":TOKEN_UNION,
|
||||
"default":TOKEN_DEFAULT,
|
||||
"return":TOKEN_RETURN,
|
||||
"extern":TOKEN_EXTERN,
|
||||
"float":TOKEN_FLOAT,
|
||||
"double":TOKEN_DOUBLE,
|
||||
"short":TOKEN_SHORT,
|
||||
"long":TOKEN_LONG,
|
||||
}
|
||||
|
||||
_MarkTable={
|
||||
@@ -70,6 +87,11 @@ _MarkTable={
|
||||
"==":TOKEN_EQ,
|
||||
"++":TOKEN_INC,
|
||||
"--":TOKEN_DEC,
|
||||
"+=":TOKEN_ASSIG_ADD,
|
||||
"-=":TOKEN_ASSIG_SUB,
|
||||
"*=":TOKEN_ASSIG_MUL,
|
||||
"<<=":TOKEN_ASSIG_LSH,
|
||||
">>=":TOKEN_ASSIG_RSH,
|
||||
"=":TOKEN("="),
|
||||
"!":TOKEN("!"),
|
||||
"<":TOKEN("<"),
|
||||
@@ -233,8 +255,29 @@ def lex(text:bytes):
|
||||
c=lex_obj.read_operator_and_save(c)
|
||||
elif isinstr(c,"\""):
|
||||
c=lex_obj.read_str_and_save(c)
|
||||
elif isinstr(c,"\\"):
|
||||
c=lex_obj.get_next_char(c)
|
||||
if(c!=TOKEN("\r") and c!=TOKEN("\n")):
|
||||
raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
|
||||
elif isinstr(c,"/"):
|
||||
c=lex_obj.get_next_char()
|
||||
if(c==TOKEN("/")):
|
||||
while c!=TOKEN("\n"):
|
||||
c=lex_obj.get_next_char()
|
||||
elif(c==TOKEN("*")):
|
||||
c_old=lex_obj.get_next_char()
|
||||
c=lex_obj.get_next_char()
|
||||
while not (c_old==TOKEN("*") and c==TOKEN("/")):
|
||||
c_old=c
|
||||
c=lex_obj.get_next_char()
|
||||
c=lex_obj.get_next_char()
|
||||
elif(c==TOKEN("=")):
|
||||
lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
|
||||
c=lex_obj.get_next_char()
|
||||
else:
|
||||
raise Exception(f"err char {bytes([c])} at line:{lex_obj.line} pos:{lex_obj.pos}")
|
||||
lex_obj.save_one_char_token(TOKEN("/"))
|
||||
else:
|
||||
raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
|
||||
# for item in lex_obj.token_list:
|
||||
# print(f"{item}")
|
||||
return lex_obj.token_list
|
||||
|
47
main.c
47
main.c
@@ -2,6 +2,49 @@
|
||||
|
||||
|
||||
|
||||
// 行注释
|
||||
struct _struct_a;
|
||||
|
||||
typedef const struct _struct_a _typedef_struct_a;
|
||||
|
||||
struct _struct_a /* 块注释 */ {
|
||||
int a;
|
||||
int b;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
enum _enum_a;
|
||||
|
||||
enum _enum_a {
|
||||
Enum0=0,
|
||||
Enum1,
|
||||
Enum2,
|
||||
};
|
||||
|
||||
// 暂不支持匿名枚举类型
|
||||
// enum {
|
||||
// Enumb0=0,
|
||||
// Enumb1,
|
||||
// Enumb2,
|
||||
// };
|
||||
|
||||
|
||||
union _union_a {
|
||||
int a;
|
||||
float b;
|
||||
double c;
|
||||
short d;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
typedef int _typedef_int;
|
||||
|
||||
|
||||
|
||||
const char* get_type(int s) {
|
||||
const char* ret;
|
||||
switch (s)
|
||||
@@ -23,3 +66,7 @@ const char* get_type(int s) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int main(){
|
||||
return 0;
|
||||
}
|
||||
|
128
node_declear.py
Normal file
128
node_declear.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from lex_c import lex_token
|
||||
import lex_c
|
||||
from parser_c import node
|
||||
from parser_c import node_file
|
||||
from parser_c import node_variable_def
|
||||
from parser_c import node_struct_decl
|
||||
from parser_c import node_struct_def
|
||||
from parser_c import node_union_decl
|
||||
from parser_c import node_union_def
|
||||
from parser_c import node_enum_decl
|
||||
from parser_c import node_enum_def
|
||||
from parser_c import node_func_decl
|
||||
from parser_c import node_typedef
|
||||
from parser_c import node_func_def
|
||||
|
||||
from parser_c import find_sentence
|
||||
from parser_c import dist_node_type
|
||||
from parser_c import find_close
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def dist_node_type_struct(token_list:list[lex_token]):
|
||||
if(token_list[0].token==lex_c.TOKEN_STRUCT):
|
||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
||||
if(len(token_list)==2):
|
||||
return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
||||
raise Exception("没有出现预期的符号 '}'")
|
||||
v_list:list[node_variable_def]=[]
|
||||
token_list_local=token_list[3:-1]
|
||||
while len(token_list_local)>0:
|
||||
sentence=find_sentence(token_list_local)
|
||||
v_list.append(dist_node_type(token_list=sentence))
|
||||
token_list_local=token_list_local[len(sentence):]
|
||||
return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
||||
raise Exception(f"语法错误 {token_list[0]}")
|
||||
|
||||
|
||||
|
||||
def dist_node_type_union(token_list:list[lex_token]):
|
||||
if(token_list[0].token==lex_c.TOKEN_UNION):
|
||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
||||
if(len(token_list)==2):
|
||||
return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
||||
raise Exception("没有出现预期的符号 '}'")
|
||||
v_list:list[node_variable_def]=[]
|
||||
token_list_local=token_list[3:-1]
|
||||
while len(token_list_local)>0:
|
||||
sentence=find_sentence(token_list_local)
|
||||
v_list.append(dist_node_type(token_list=sentence))
|
||||
token_list_local=token_list_local[len(sentence):]
|
||||
return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
||||
raise Exception(f"语法错误 {token_list[0]}")
|
||||
|
||||
|
||||
|
||||
def dist_node_type_enum(token_list:list[lex_token]):
|
||||
if(token_list[0].token==lex_c.TOKEN_ENUM):
|
||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
||||
if(len(token_list)==2):
|
||||
return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
|
||||
elif(token_list[2].token==lex_c.TOKEN("{")):
|
||||
if not token_list[-1].token==lex_c.TOKEN("}"):
|
||||
raise Exception("没有出现预期的符号 '}'")
|
||||
token_list_local=token_list[3:-1]
|
||||
index=0
|
||||
v_list:list[dict]=[]
|
||||
while len(token_list_local)>0:
|
||||
if(token_list_local[0].token==lex_c.TOKEN_SYMBOL):
|
||||
key=token_list_local[0].buff.decode("utf-8")
|
||||
if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM):
|
||||
index=int(token_list_local[2].buff.decode("utf-8"))
|
||||
token_list_local=token_list_local[3:]
|
||||
else:
|
||||
index+=1
|
||||
token_list_local=token_list_local[1:]
|
||||
v_list.append({key:index})
|
||||
if(len(token_list_local)>0):
|
||||
if(token_list_local[0].token!=lex_c.TOKEN(",")):
|
||||
raise Exception(f"枚举类型应该使用 ',' 分隔符")
|
||||
token_list_local=token_list_local[1:]
|
||||
return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
|
||||
raise Exception(f"语法错误 {token_list[0]}")
|
||||
|
||||
|
||||
def dist_node_type_typedef(token_list:list[lex_token]):
|
||||
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
|
||||
attr=[]
|
||||
token_list_local=token_list
|
||||
if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
|
||||
raise Exception(f"没有定义新类型 {token_list[-1]}")
|
||||
name=token_list[-1].buff.decode("utf-8")
|
||||
token_list=token_list[1:]
|
||||
while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
|
||||
attr.append(token_list[0].name)
|
||||
token_list=token_list[1:]
|
||||
if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
|
||||
attr.append(token_list[0].name)
|
||||
if(token_list[1].token==lex_c.TOKEN_SYMBOL):
|
||||
node_r=None
|
||||
attr.append(token_list[1].buff.decode("utf-8"))
|
||||
if(token_list[2].token==lex_c.TOKEN("{")):
|
||||
node_r=dist_node_type(token_list=token_list[1:-1])
|
||||
elif(token_list[2].token==lex_c.TOKEN("*")):
|
||||
attr.append(token_list[2].name)
|
||||
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r)
|
||||
if(token_list[0].token==lex_c.TOKEN_SYMBOL):
|
||||
# 使用typedef 定义过的自定义类型
|
||||
attr.append(token_list[0].buff.decode("utf-8"))
|
||||
token_list=token_list[1:]
|
||||
else:
|
||||
# c语言预设类型
|
||||
while(token_list[0].token in
|
||||
[lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
|
||||
lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
|
||||
attr.append(token_list[0].name)
|
||||
token_list=token_list[1:]
|
||||
if(len(token_list)>1):
|
||||
raise Exception(f"意外的token {token_list[0]}")
|
||||
return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None)
|
||||
raise Exception(f"语法错误 {token_list[0]}")
|
||||
|
||||
|
37
node_run.py
Normal file
37
node_run.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from lex_c import lex_token
|
||||
import lex_c
|
||||
from parser_c import node
|
||||
from parser_c import node_file
|
||||
from parser_c import node_variable_def
|
||||
from parser_c import node_struct_decl
|
||||
from parser_c import node_struct_def
|
||||
from parser_c import node_union_decl
|
||||
from parser_c import node_union_def
|
||||
from parser_c import node_enum_decl
|
||||
from parser_c import node_enum_def
|
||||
from parser_c import node_func_decl
|
||||
from parser_c import node_typedef
|
||||
from parser_c import node_func_def
|
||||
|
||||
|
||||
|
||||
|
||||
class variable(object):
|
||||
def __init__(self,name:str,value=None,attr:list[str]=[]):
|
||||
self.name=name
|
||||
self.value_=value
|
||||
self.attr=attr
|
||||
def set_value(self,value):
|
||||
if("const" in self.attr):
|
||||
raise Exception(f"变量 {self.name} 不可写")
|
||||
self.value_=ValueError
|
||||
def value(self):
|
||||
return self.value_
|
||||
|
||||
|
||||
class file(object):
|
||||
def __init__(self):
|
||||
self.variable_list:list[variable]=[]
|
||||
self.function_list:list=[]
|
||||
self.variable_type_list:list=[]
|
||||
|
154
parser_c.py
154
parser_c.py
@@ -3,28 +3,160 @@ import sys
|
||||
import dataclasses
|
||||
from lex_c import lex_token
|
||||
from lex_c import lex
|
||||
import lex_c
|
||||
from node_declear import dist_node_type_struct
|
||||
from node_declear import dist_node_type_union
|
||||
from node_declear import dist_node_type_enum
|
||||
from node_declear import dist_node_type_typedef
|
||||
|
||||
_NodeTypeTable=[
|
||||
"file","vdecl","fdef"
|
||||
]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class node:
|
||||
name:str
|
||||
next:None
|
||||
chid:None
|
||||
token_list:list[lex_token]
|
||||
type:str="base"
|
||||
token_list:list[lex_token]=dataclasses.field(default_factory=list)
|
||||
|
||||
# 变量声明节点
|
||||
# 文件节点
|
||||
@dataclasses.dataclass
|
||||
class node_vdecl(node):
|
||||
vvalue:None
|
||||
vtype:str
|
||||
vattr:list[str]
|
||||
class node_file(node):
|
||||
type:str="file"
|
||||
body:list=dataclasses.field(default_factory=list)
|
||||
|
||||
# 变量定义节点
|
||||
@dataclasses.dataclass
|
||||
class node_variable_def(node):
|
||||
type:str="variable_def"
|
||||
vvalue=None
|
||||
vtype:str="unknown"
|
||||
vattr:list[str]=dataclasses.field(default_factory=list)
|
||||
|
||||
# 结构体声明节点
|
||||
@dataclasses.dataclass
|
||||
class node_struct_decl(node):
|
||||
type:str="struct_decl"
|
||||
|
||||
# 结构体定义节点
|
||||
@dataclasses.dataclass
|
||||
class node_struct_def(node):
|
||||
type:str="struct_def"
|
||||
body:list[node_variable_def]=dataclasses.field(default_factory=list)
|
||||
|
||||
# 联合体声明节点
|
||||
@dataclasses.dataclass
|
||||
class node_union_decl(node):
|
||||
type:str="union_decl"
|
||||
|
||||
# 联合体定义节点
|
||||
@dataclasses.dataclass
|
||||
class node_union_def(node):
|
||||
type:str="union_def"
|
||||
body:list[node_variable_def]=dataclasses.field(default_factory=list)
|
||||
|
||||
# 枚举声明节点
|
||||
@dataclasses.dataclass
|
||||
class node_enum_decl(node):
|
||||
type:str="enum_decl"
|
||||
|
||||
# 枚举定义节点
|
||||
@dataclasses.dataclass
|
||||
class node_enum_def(node):
|
||||
type:str="enum_def"
|
||||
body:list[dict]=dataclasses.field(default_factory=list)
|
||||
|
||||
# 函数声明节点
|
||||
@dataclasses.dataclass
|
||||
class node_func_decl(node):
|
||||
type:str="func_decl"
|
||||
rettype:str="unknown"
|
||||
retattr:list[str]=dataclasses.field(default_factory=list)
|
||||
para:list[node_variable_def]=dataclasses.field(default_factory=list)
|
||||
|
||||
#typedef 节点
|
||||
@dataclasses.dataclass
|
||||
class node_typedef(node):
|
||||
type:str="typedef"
|
||||
attr:list[str]=dataclasses.field(default_factory=list)
|
||||
body:node=None
|
||||
|
||||
# 函数定义节点
|
||||
@dataclasses.dataclass
|
||||
class node_fdef(node):
|
||||
rettype:str
|
||||
class node_func_def(node):
|
||||
type:str="func_def"
|
||||
rettype:str="unknown"
|
||||
retattr:list[str]=dataclasses.field(default_factory=list)
|
||||
para:list[node_variable_def]=dataclasses.field(default_factory=list)
|
||||
body:list[node]=dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
# 找到闭合的括号
|
||||
def find_close(token_list:list[lex_token],token:tuple[int,int]):
|
||||
if token_list[0].token!=token[0]:
|
||||
return 0
|
||||
num=0
|
||||
for index,item in enumerate(token_list):
|
||||
if(item.token==token[0]):
|
||||
num+=1
|
||||
elif(item.token==token[1]):
|
||||
num-=1
|
||||
if(num==0):
|
||||
return index
|
||||
raise Exception(f"没有找到闭合的符号 {token[1]}")
|
||||
|
||||
# 找到一个完整的语句
|
||||
def find_sentence(token_list:list[lex_token]):
|
||||
bracket_flag=False
|
||||
index=0
|
||||
while index<len(token_list):
|
||||
if(token_list[index].token==lex_c.TOKEN("(")):
|
||||
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")")))
|
||||
if(bracket_index>0):
|
||||
bracket_flag=True
|
||||
index+=bracket_index
|
||||
elif(token_list[index].token==lex_c.TOKEN("{")):
|
||||
bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}")))
|
||||
if(bracket_index>0):
|
||||
index+=bracket_index
|
||||
if(bracket_flag==True):
|
||||
return token_list[:index+1]
|
||||
elif(token_list[index].token==lex_c.TOKEN(";")):
|
||||
return token_list[:index+1]
|
||||
index+=1
|
||||
raise Exception(f"没有找到完整的语句")
|
||||
|
||||
|
||||
|
||||
# 判断一个语句的类型
|
||||
def dist_node_type(token_list:list[lex_token]):
|
||||
if(token_list[0].token==lex_c.TOKEN_EXTERN):
|
||||
token_list=token_list[1:]
|
||||
if(token_list[-1].token==lex_c.TOKEN(";")):
|
||||
token_list=token_list[:-1]
|
||||
if(token_list[0].token==lex_c.TOKEN_STRUCT):
|
||||
return dist_node_type_struct(token_list=token_list)
|
||||
if(token_list[0].token==lex_c.TOKEN_UNION):
|
||||
return dist_node_type_union(token_list=token_list)
|
||||
if(token_list[0].token==lex_c.TOKEN_ENUM):
|
||||
return dist_node_type_enum(token_list=token_list)
|
||||
if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
|
||||
return dist_node_type_typedef(token_list=token_list)
|
||||
|
||||
raise Exception(f"无法处理的token类型 {token_list[0]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("main.c",mode='rb') as f:
|
||||
file_name="main.c"
|
||||
with open(file_name,mode='rb') as f:
|
||||
token_list=lex(f.read())
|
||||
file=node_file(name=file_name,token_list=token_list,body=[])
|
||||
while len(token_list)>0:
|
||||
sentence=find_sentence(token_list)
|
||||
node_d=dist_node_type(sentence)
|
||||
file.body.append(node_d)
|
||||
print('找到一个语句:')
|
||||
for item in sentence:
|
||||
print(f"\t{item}")
|
||||
token_list=token_list[len(sentence):]
|
Reference in New Issue
Block a user