添加一些语法

2024-12-03 08:53:09 +08:00
parent c736ef5b1b
commit dac9dad45d
5 changed files with 399 additions and 12 deletions
--- a/lex_c.py
+++ b/lex_c.py
@@ -34,6 +34,18 @@ TOKEN_UNION = 283,
 TOKEN_STRING = 284,
 TOKEN_DEFAULT = 285,
 TOKEN_RETURN = 286,
+TOKEN_ASSIG_ADD = 287
+TOKEN_ASSIG_SUB = 288
+TOKEN_ASSIG_MUL = 289
+TOKEN_ASSIG_DIV = 290
+TOKEN_ASSIG_LSH = 291
+TOKEN_ASSIG_RSH = 292
+TOKEN_EXTERN = 293
+TOKEN_FLOAT = 294
+TOKEN_DOUBLE = 295
+TOKEN_SHORT = 296
+TOKEN_LONG = 297
+

 def TOKEN(t:str):
  return t.encode("utf-8")[0]
@@ -59,6 +71,11 @@ _KeyWordTable={
  "union":TOKEN_UNION,
  "default":TOKEN_DEFAULT,
  "return":TOKEN_RETURN,
+  "extern":TOKEN_EXTERN,
+  "float":TOKEN_FLOAT,
+  "double":TOKEN_DOUBLE,
+  "short":TOKEN_SHORT,
+  "long":TOKEN_LONG,
 }

 _MarkTable={
@@ -70,6 +87,11 @@ _MarkTable={
  "==":TOKEN_EQ,
  "++":TOKEN_INC,
  "--":TOKEN_DEC,
+  "+=":TOKEN_ASSIG_ADD,
+  "-=":TOKEN_ASSIG_SUB,
+  "*=":TOKEN_ASSIG_MUL,
+  "<<=":TOKEN_ASSIG_LSH,
+  ">>=":TOKEN_ASSIG_RSH,
  "=":TOKEN("="),
  "!":TOKEN("!"),
  "<":TOKEN("<"),
@@ -233,8 +255,29 @@ def lex(text:bytes):
      c=lex_obj.read_operator_and_save(c)
    elif isinstr(c,"\""):
      c=lex_obj.read_str_and_save(c)
+    elif isinstr(c,"\\"):
+      c=lex_obj.get_next_char(c)
+      if(c!=TOKEN("\r") and c!=TOKEN("\n")):
+        raise Exception(f"符号 '\\' 必须在行末, line:{lex_obj.line} pos:{lex_obj.pos}")
+    elif isinstr(c,"/"):
+      c=lex_obj.get_next_char()
+      if(c==TOKEN("/")):
+        while c!=TOKEN("\n"):
+          c=lex_obj.get_next_char()
+      elif(c==TOKEN("*")):
+        c_old=lex_obj.get_next_char()
+        c=lex_obj.get_next_char()
+        while not (c_old==TOKEN("*") and c==TOKEN("/")):
+          c_old=c
+          c=lex_obj.get_next_char()
+        c=lex_obj.get_next_char()
+      elif(c==TOKEN("=")):
+        lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
+        c=lex_obj.get_next_char()
+      else:
+        lex_obj.save_one_char_token(TOKEN("/"))
    else:
-      raise Exception(f"err char {bytes([c])} at line:{lex_obj.line} pos:{lex_obj.pos}")
+      raise Exception(f"未知的字符 {bytes([c])}, line:{lex_obj.line} pos:{lex_obj.pos}")
  # for item in lex_obj.token_list:
  #   print(f"{item}")
  return lex_obj.token_list
--- a/main.c
+++ b/main.c
@@ -2,6 +2,49 @@



+// 行注释
+struct _struct_a;
+
+typedef const struct _struct_a _typedef_struct_a;
+
+struct _struct_a /* 块注释 */ {
+  int a;
+  int b;
+};
+
+
+
+
+enum _enum_a;
+
+enum _enum_a {
+  Enum0=0,
+  Enum1,
+  Enum2,
+};
+
+// 暂不支持匿名枚举类型
+// enum {
+//   Enumb0=0,
+//   Enumb1,
+//   Enumb2,
+// };
+
+
+union _union_a {
+  int a;
+  float b;
+  double c;
+  short d;
+};
+
+
+
+
+typedef int _typedef_int;
+
+
+
 const char* get_type(int s) {
  const char* ret;
  switch (s)
@@ -23,3 +66,7 @@ const char* get_type(int s) {
  return ret;
 }

+
+int main(){
+  return 0;
+}
--- a/node_declear.py
+++ b/node_declear.py
@@ -0,0 +1,128 @@
+from lex_c import lex_token
+import lex_c
+from parser_c import node
+from parser_c import node_file
+from parser_c import node_variable_def
+from parser_c import node_struct_decl
+from parser_c import node_struct_def
+from parser_c import node_union_decl
+from parser_c import node_union_def
+from parser_c import node_enum_decl
+from parser_c import node_enum_def
+from parser_c import node_func_decl
+from parser_c import node_typedef
+from parser_c import node_func_def
+
+from parser_c import find_sentence
+from parser_c import dist_node_type
+from parser_c import find_close
+
+
+
+
+
+def dist_node_type_struct(token_list:list[lex_token]):
+  if(token_list[0].token==lex_c.TOKEN_STRUCT):
+    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+      if(len(token_list)==2):
+        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
+      elif(token_list[2].token==lex_c.TOKEN("{")):
+        if not token_list[-1].token==lex_c.TOKEN("}"):
+          raise Exception("没有出现预期的符号 '}'")
+        v_list:list[node_variable_def]=[]
+        token_list_local=token_list[3:-1]
+        while len(token_list_local)>0:
+          sentence=find_sentence(token_list_local)
+          v_list.append(dist_node_type(token_list=sentence))
+          token_list_local=token_list_local[len(sentence):]
+        return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
+  raise Exception(f"语法错误 {token_list[0]}")
+
+
+
+def dist_node_type_union(token_list:list[lex_token]):
+  if(token_list[0].token==lex_c.TOKEN_UNION):
+    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+      if(len(token_list)==2):
+        return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
+      elif(token_list[2].token==lex_c.TOKEN("{")):
+        if not token_list[-1].token==lex_c.TOKEN("}"):
+          raise Exception("没有出现预期的符号 '}'")
+        v_list:list[node_variable_def]=[]
+        token_list_local=token_list[3:-1]
+        while len(token_list_local)>0:
+          sentence=find_sentence(token_list_local)
+          v_list.append(dist_node_type(token_list=sentence))
+          token_list_local=token_list_local[len(sentence):]
+        return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
+  raise Exception(f"语法错误 {token_list[0]}")
+
+
+
+def dist_node_type_enum(token_list:list[lex_token]):
+  if(token_list[0].token==lex_c.TOKEN_ENUM):
+    if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+      if(len(token_list)==2):
+        return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
+      elif(token_list[2].token==lex_c.TOKEN("{")):
+        if not token_list[-1].token==lex_c.TOKEN("}"):
+          raise Exception("没有出现预期的符号 '}'")
+        token_list_local=token_list[3:-1]
+        index=0
+        v_list:list[dict]=[]
+        while len(token_list_local)>0:
+          if(token_list_local[0].token==lex_c.TOKEN_SYMBOL):
+            key=token_list_local[0].buff.decode("utf-8")
+            if(token_list_local[1].token==lex_c.TOKEN("=") and token_list_local[2].token==lex_c.TOKEN_NUM):
+              index=int(token_list_local[2].buff.decode("utf-8"))
+              token_list_local=token_list_local[3:]
+            else:
+              index+=1
+              token_list_local=token_list_local[1:]
+            v_list.append({key:index})
+          if(len(token_list_local)>0):
+            if(token_list_local[0].token!=lex_c.TOKEN(",")):
+              raise Exception(f"枚举类型应该使用 ',' 分隔符")
+            token_list_local=token_list_local[1:]
+        return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,body=v_list)
+  raise Exception(f"语法错误 {token_list[0]}")
+
+
+def dist_node_type_typedef(token_list:list[lex_token]):
+  if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
+    attr=[]
+    token_list_local=token_list
+    if(token_list[-1].token!=lex_c.TOKEN_SYMBOL):
+      raise Exception(f"没有定义新类型 {token_list[-1]}")
+    name=token_list[-1].buff.decode("utf-8")
+    token_list=token_list[1:]
+    while token_list[0].token in [lex_c.TOKEN_UNSIGNED,lex_c.TOKEN_CONST]:
+      attr.append(token_list[0].name)
+      token_list=token_list[1:]
+    if(token_list[0].token==lex_c.TOKEN_STRUCT or token_list[0].token==lex_c.TOKEN_UNION):
+      attr.append(token_list[0].name)
+      if(token_list[1].token==lex_c.TOKEN_SYMBOL):
+        node_r=None
+        attr.append(token_list[1].buff.decode("utf-8"))
+        if(token_list[2].token==lex_c.TOKEN("{")):
+          node_r=dist_node_type(token_list=token_list[1:-1])
+        elif(token_list[2].token==lex_c.TOKEN("*")):
+          attr.append(token_list[2].name)
+        return node_typedef(name=name,token_list=token_list_local,attr=attr,body=node_r)
+    if(token_list[0].token==lex_c.TOKEN_SYMBOL):
+      # 使用typedef 定义过的自定义类型
+      attr.append(token_list[0].buff.decode("utf-8"))
+      token_list=token_list[1:]
+    else:
+      # c语言预设类型
+      while(token_list[0].token in 
+        [lex_c.TOKEN_INT,lex_c.TOKEN_CHAR,lex_c.TOKEN_SHORT,lex_c.TOKEN_LONG,lex_c.TOKEN_FLOAT,
+         lex_c.TOKEN_DOUBLE,lex_c.TOKEN_VOID,lex_c.TOKEN("*")]):
+        attr.append(token_list[0].name)
+        token_list=token_list[1:]
+    if(len(token_list)>1):
+      raise Exception(f"意外的token {token_list[0]}")
+    return node_typedef(name=name,token_list=token_list_local,attr=attr,body=None)
+  raise Exception(f"语法错误 {token_list[0]}")
+
+
--- a/node_run.py
+++ b/node_run.py
@@ -0,0 +1,37 @@
+from lex_c import lex_token
+import lex_c
+from parser_c import node
+from parser_c import node_file
+from parser_c import node_variable_def
+from parser_c import node_struct_decl
+from parser_c import node_struct_def
+from parser_c import node_union_decl
+from parser_c import node_union_def
+from parser_c import node_enum_decl
+from parser_c import node_enum_def
+from parser_c import node_func_decl
+from parser_c import node_typedef
+from parser_c import node_func_def
+
+
+
+
+class variable(object):
+    def __init__(self,name:str,value=None,attr:list[str]=[]):
+        self.name=name
+        self.value_=value
+        self.attr=attr
+    def set_value(self,value):
+        if("const" in self.attr):
+            raise Exception(f"变量 {self.name} 不可写")
+        self.value_=ValueError
+    def value(self):
+        return self.value_
+
+
+class file(object):
+    def __init__(self):
+        self.variable_list:list[variable]=[]
+        self.function_list:list=[]
+        self.variable_type_list:list=[]
+
--- a/parser_c.py
+++ b/parser_c.py
@@ -3,28 +3,160 @@ import sys
 import dataclasses
 from lex_c import lex_token
 from lex_c import lex
+import lex_c
+from node_declear import dist_node_type_struct
+from node_declear import dist_node_type_union
+from node_declear import dist_node_type_enum
+from node_declear import dist_node_type_typedef

+_NodeTypeTable=[
+  "file","vdecl","fdef"
+]


@dataclasses.dataclass
 class node:
  name:str
-  next:None
-  chid:None
-  token_list:list[lex_token]
+  type:str="base"
+  token_list:list[lex_token]=dataclasses.field(default_factory=list)

-# 变量声明节点
+# 文件节点
@dataclasses.dataclass
-class node_vdecl(node):
-  vvalue:None
-  vtype:str
-  vattr:list[str]
+class node_file(node):
+  type:str="file"
+  body:list=dataclasses.field(default_factory=list)
+
+# 变量定义节点
+@dataclasses.dataclass
+class node_variable_def(node):
+  type:str="variable_def"
+  vvalue=None
+  vtype:str="unknown"
+  vattr:list[str]=dataclasses.field(default_factory=list)
+
+# 结构体声明节点
+@dataclasses.dataclass
+class node_struct_decl(node):
+  type:str="struct_decl"
+
+# 结构体定义节点
+@dataclasses.dataclass
+class node_struct_def(node):
+  type:str="struct_def"
+  body:list[node_variable_def]=dataclasses.field(default_factory=list)
+
+# 联合体声明节点
+@dataclasses.dataclass
+class node_union_decl(node):
+  type:str="union_decl"
+
+# 联合体定义节点
+@dataclasses.dataclass
+class node_union_def(node):
+  type:str="union_def"
+  body:list[node_variable_def]=dataclasses.field(default_factory=list)
+
+# 枚举声明节点
+@dataclasses.dataclass
+class node_enum_decl(node):
+  type:str="enum_decl"
+
+# 枚举定义节点
+@dataclasses.dataclass
+class node_enum_def(node):
+  type:str="enum_def"
+  body:list[dict]=dataclasses.field(default_factory=list)
+
+# 函数声明节点
+@dataclasses.dataclass
+class node_func_decl(node):
+  type:str="func_decl"
+  rettype:str="unknown"
+  retattr:list[str]=dataclasses.field(default_factory=list)
+  para:list[node_variable_def]=dataclasses.field(default_factory=list)
+
+#typedef 节点
+@dataclasses.dataclass
+class node_typedef(node):
+  type:str="typedef"
+  attr:list[str]=dataclasses.field(default_factory=list)
+  body:node=None

 # 函数定义节点
@dataclasses.dataclass
-class node_fdef(node):
-  rettype:str
+class node_func_def(node):
+  type:str="func_def"
+  rettype:str="unknown"
+  retattr:list[str]=dataclasses.field(default_factory=list)
+  para:list[node_variable_def]=dataclasses.field(default_factory=list)
+  body:list[node]=dataclasses.field(default_factory=list)
+
+
+# 找到闭合的括号
+def find_close(token_list:list[lex_token],token:tuple[int,int]):
+  if token_list[0].token!=token[0]:
+    return 0
+  num=0
+  for index,item in enumerate(token_list):
+    if(item.token==token[0]):
+      num+=1
+    elif(item.token==token[1]):
+      num-=1
+    if(num==0):
+      return index
+  raise Exception(f"没有找到闭合的符号 {token[1]}")
+
+# 找到一个完整的语句
+def find_sentence(token_list:list[lex_token]):
+  bracket_flag=False
+  index=0
+  while index<len(token_list):
+    if(token_list[index].token==lex_c.TOKEN("(")):
+      bracket_index=find_close(token_list[index:],(lex_c.TOKEN("("),lex_c.TOKEN(")")))
+      if(bracket_index>0):
+        bracket_flag=True
+        index+=bracket_index
+    elif(token_list[index].token==lex_c.TOKEN("{")):
+      bracket_index=find_close(token_list[index:],(lex_c.TOKEN("{"),lex_c.TOKEN("}")))
+      if(bracket_index>0):
+        index+=bracket_index
+        if(bracket_flag==True):
+          return token_list[:index+1]
+    elif(token_list[index].token==lex_c.TOKEN(";")):
+      return token_list[:index+1]
+    index+=1
+  raise Exception(f"没有找到完整的语句")
+
+
+
+# 判断一个语句的类型
+def dist_node_type(token_list:list[lex_token]):
+  if(token_list[0].token==lex_c.TOKEN_EXTERN):
+    token_list=token_list[1:]
+  if(token_list[-1].token==lex_c.TOKEN(";")):
+    token_list=token_list[:-1]
+  if(token_list[0].token==lex_c.TOKEN_STRUCT):
+    return dist_node_type_struct(token_list=token_list)
+  if(token_list[0].token==lex_c.TOKEN_UNION):
+    return dist_node_type_union(token_list=token_list)
+  if(token_list[0].token==lex_c.TOKEN_ENUM):
+    return dist_node_type_enum(token_list=token_list)
+  if(token_list[0].token==lex_c.TOKEN_TYPEDEF):
+    return dist_node_type_typedef(token_list=token_list)
+
+  raise Exception(f"无法处理的token类型 {token_list[0]}")
+

 if __name__ == "__main__":
-  with open("main.c",mode='rb') as f:
+  file_name="main.c"
+  with open(file_name,mode='rb') as f:
    token_list=lex(f.read())
+  file=node_file(name=file_name,token_list=token_list,body=[])
+  while len(token_list)>0:
+    sentence=find_sentence(token_list)
+    node_d=dist_node_type(sentence)
+    file.body.append(node_d)
+    print('找到一个语句：')
+    for item in sentence:
+      print(f"\t{item}")
+    token_list=token_list[len(sentence):]