parser_c/parser_c.py

import os
import sys
import shutil
import dataclasses
import copy


TOKEN_IF = 256,
TOKEN_BREAK = 257,
TOKEN_WHILE=258,
TOKEN_SWITCH=259,
TOKEN_CASE=260,
TOKEN_DO=261,
TOKEN_CHAR=262,
TOKEN_INT=263,
TOKEN_VOID=264,
TOKEN_SYMBOL = 265 ,
TOKEN_NUM = 266 ,# 数字
TOKEN_INC = 267,# 自增
TOKEN_DEC = 268,# 自减
TOKEN_EQ = 269,# 相等
TOKEN_NEQ = 270,# 不相等
TOKEN_LSH = 271,# 左移
TOKEN_RSH = 272,# 右移
TOKEN_LEQ = 273,# 小于等于
TOKEN_GEQ = 274,# 大于等于
TOKEN_ELSE = 275,
TOKEN_CONTINUE = 276 ,
TOKEN_CONST = 277 ,
TOKEN_STATIC = 278 ,
TOKEN_UNSIGNED = 279 ,
TOKEN_TYPEDEF = 280 ,
TOKEN_STRUCT = 281 ,
TOKEN_ENUM = 282 ,
TOKEN_UNION = 283,
TOKEN_STRING = 284,
TOKEN_DEFAULT = 285,
TOKEN_RETURN = 286,
TOKEN_ASSIG_ADD = 287,
TOKEN_ASSIG_SUB = 288,
TOKEN_ASSIG_MUL = 289,
TOKEN_ASSIG_DIV = 290,
TOKEN_ASSIG_LSH = 291,
TOKEN_ASSIG_RSH = 292,
TOKEN_EXTERN = 293,
TOKEN_FLOAT = 294,
TOKEN_DOUBLE = 295,
TOKEN_SHORT = 296,
TOKEN_LONG = 297,
TOKEN_POINTER = 298,
TOKEN_LOGICAL_OR = 299,# 逻辑或
TOKEN_LOGICAL_AND = 300,# 逻辑与
TOKEN_OMIT = 301,# 省略符 ...


def TOKEN(t:str):
  return t.encode("utf-8")[0]

_KeyWordTable={
  "if":TOKEN_IF,
  "else":TOKEN_ELSE,
  "break":TOKEN_BREAK,
  "while":TOKEN_WHILE,
  "switch":TOKEN_SWITCH,
  "case":TOKEN_CASE,
  "do":TOKEN_DO,
  "char":TOKEN_CHAR,
  "int":TOKEN_INT,
  "void":TOKEN_VOID,
  "continue":TOKEN_CONTINUE,
  "const":TOKEN_CONST,
  "static":TOKEN_STATIC,
  "unisgned":TOKEN_UNSIGNED,
  "typedef":TOKEN_TYPEDEF,
  "struct":TOKEN_STRUCT,
  "enum":TOKEN_ENUM,
  "union":TOKEN_UNION,
  "default":TOKEN_DEFAULT,
  "return":TOKEN_RETURN,
  "extern":TOKEN_EXTERN,
  "float":TOKEN_FLOAT,
  "double":TOKEN_DOUBLE,
  "short":TOKEN_SHORT,
  "long":TOKEN_LONG,
}

_MarkTable={
  "<<":TOKEN_LSH,
  ">>":TOKEN_RSH,
  "<=":TOKEN_LEQ,
  ">=":TOKEN_GEQ,
  "!=":TOKEN_NEQ,
  "==":TOKEN_EQ,
  "++":TOKEN_INC,
  "--":TOKEN_DEC,
  "->":TOKEN_POINTER,
  "+=":TOKEN_ASSIG_ADD,
  "-=":TOKEN_ASSIG_SUB,
  "*=":TOKEN_ASSIG_MUL,
  "<<=":TOKEN_ASSIG_LSH,
  ">>=":TOKEN_ASSIG_RSH,
  "=":TOKEN("="),
  "!":TOKEN("!"),
  "<":TOKEN("<"),
  ">":TOKEN(">"),
  "+":TOKEN("+"),
  "-":TOKEN("-"),

}


# 是否是数字加字母
def isalnum(num:int):
  return bytes([num]).isalnum()

# 是否是数字加字母或下划线
def isalnum_(num:int):
  return bytes([num]).isalnum() or num==TOKEN("_")

# 是否是字母
def isalpha(num:int):
  return bytes([num]).isalpha()

# 是否是字母或下划线
def isalpha_(num:int):
  return bytes([num]).isalpha() or num==TOKEN("_")

# 是否是数字
def isdigit(num:int):
  return bytes([num]).isdigit()

# 是否是数字或小数点
def isdigitdot(num:int):
  return bytes([num]).isdigit() or num==TOKEN(".")

# 是否是空白字符 包括换行符
def isspace(num:int):
  return bytes([num]).isspace()

# 是否是给定字符串之一
def isinstr(num:int,t:str):
  c=bytes([num])
  return c in t.encode("utf-8")

# 是否是操作符
def isoperator(num:int):
  return isinstr(num,"<>!+-=")

@dataclasses.dataclass
class lex_token:
    name:str
    buff:bytearray
    token:int
    line:int
    pos:int

# 连写的操作符，这些实际上是多个操作符写在一起的结果
_NotMarkTable={
  "!!":[lex_token("operator",'!',TOKEN('!'),0,0),
        lex_token("operator",'!',TOKEN('!'),0,0)],
  "=-":[lex_token("operator",'=',TOKEN('='),0,0),
        lex_token("operator",'-',TOKEN('-'),0,0)],
  "--=":[lex_token("operator",'--',TOKEN_DEC,0,0),
        lex_token("operator",'=',TOKEN('='),0,0)],
  "++=":[lex_token("operator",'++',TOKEN_INC,0,0),
        lex_token("operator",'=',TOKEN('='),0,0)],
  "=--":[lex_token("operator",'=',TOKEN('='),0,0),
        lex_token("operator",'--',TOKEN_DEC,0,0)],
  "=++":[lex_token("operator",'=',TOKEN('='),0,0),
        lex_token("operator",'++',TOKEN_INC,0,0)],
  "!=--":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
        lex_token("operator",'--',TOKEN_DEC,0,0)],
  "!=++":[lex_token("operator",'!=',TOKEN_NEQ,0,0),
        lex_token("operator",'++',TOKEN_INC,0,0)],
  "==--":[lex_token("operator",'==',TOKEN_EQ,0,0),
        lex_token("operator",'--',TOKEN_DEC,0,0)],
  "==++":[lex_token("operator",'==',TOKEN_EQ,0,0),
        lex_token("operator",'++',TOKEN_INC,0,0)],
}


class lex_class(object):
  def __init__(self,text:bytes,file_name:str="") -> None:
    self.text=text
    self.index=-1
    self.line=1
    self.pos=-1
    self.token_list:list[lex_token]=[]
    self.token_buff=bytearray()
    self.file_name=file_name
    self.macro_table={}
  def save_char(self,c:int):
    self.token_buff.append(c&0xff)
  def save_token(self,token:lex_token):
    self.token_list.append(token)
    self.token_buff=bytearray()
  def _get_char(self):
    if(self.index<len(self.text)):
      c= self.text[self.index]
      return c
    return -1
  def get_next_char(self):
    if not self.is_end():
      self.index+=1
    c= self._get_char()
    if(c==b'\n'[0]):
      self.line+=1
      self.pos=-1
    else:
      self.pos+=1
    return c
  def is_end(self):
    return self.index>=len(self.text)
  def save_one_char_token(self,c:int):
    token=lex_token(bytes([c]).decode("utf-8"),bytes([c]),c,self.line,self.pos)
    self.save_token(token)
  def read_name_and_save(self,c:int):
    token=lex_token("symbol",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
    self.save_char(c)
    while True:
      c=self.get_next_char()
      if(isalnum_(c)):
        self.save_char(c)
      else:
        break
    name=self.token_buff.decode("utf-8")
    if(name in _KeyWordTable):
      token.token=_KeyWordTable[name]
      token.name=name
    token.buff=self.token_buff
    self.save_token(token)
    return c
  def read_operator_and_save(self,c:int):
    token=lex_token("operator",bytearray(),TOKEN_SYMBOL,self.line,self.pos)
    self.save_char(c)
    while True:
      c=self.get_next_char()
      if(isoperator(c)):
        self.save_char(c)
      else:
        break
    name=self.token_buff.decode("utf-8")
    if(name in _MarkTable):
      token.token=_MarkTable[name]
      token.name=name
      token.buff=self.token_buff
      self.save_token(token)
    elif(name in _NotMarkTable):
      tokens=_NotMarkTable[name]
      for t in tokens:
        token.token=t.token
        token.name=t.name
        token.buff=token.name.encode("utf-8")
        self.save_token(copy.deepcopy(token))
        token.pos+=len(token.name)
    else:
      raise Exception(f"不存在的操作符 {name} {self.file_name}:{self.line},{self.pos}")
      # print(f"不存在的操作符 {name} ")
    return c
  def read_num_and_save(self,c:int):
    token=lex_token("number",bytearray(),TOKEN_NUM,self.line,self.pos)
    self.save_char(c)
    while True:
      c=self.get_next_char()
      if(isdigitdot(c)):
        self.save_char(c)
      else:
        break
    if(self.token_buff.count(b'.')>1):
      raise Exception(f"数字不能包含多个点号 {self.file_name}:{self.line},{self.pos}")
    token.buff=self.token_buff
    self.save_token(token)
    return c
  _escape_table={'0':0,'a':7,'b':8,'t':9,'n':10,'v':11,'f':12,'r':13,'"':34,'\'':39,'?':63,'\\':92}
  def read_str_and_save(self,c:int):
    c=self.get_next_char()
    while c!=b'\"'[0]:
      if(c==TOKEN('\\')):# \
        c=self.get_next_char()
        self.save_char(self._escape_table.get(c,0))
      else:
        self.save_char(c)
      c=self.get_next_char()
    self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
    return self.get_next_char()
  def read_char_and_save(self,c:int):
    c=self.get_next_char()
    while c!=b'\''[0]:
      if(c==TOKEN('\\')):# \
        c=self.get_next_char()
        self.save_char(self._escape_table.get(c,0))
      else:
        self.save_char(c)
      c=self.get_next_char()
    self.save_token(lex_token("string",self.token_buff,TOKEN_STRING,self.line,self.pos))
    return self.get_next_char()
  def deal_macro(self,buff:bytearray):
    self.macro_result=False
    sp=buff.decode('utf-8').split()
    if(len(sp)>0):
      if(sp[0]=='#define'):
        if(len(sp)>=3):
          if not (sp[1] in self.macro_table):
            self.macro_table[sp[1]]=' '.join(sp[2:])
        else:
          if not (sp[1] in self.macro_table):
            self.macro_table[sp[1]]=""
      elif(sp[0]=='#ifdef'):
        self.macro_result= (sp[1] in self.macro_table)
        return self.macro_result
      elif(sp[0]=='#if'):
        t=' '.join(sp[1:])# 判断条件比较复杂，暂时固定返回失败
        return self.macro_result
      elif(sp[0]=='#elif'):
        return self.macro_result
      elif(sp[0]=='#else'):
        self.macro_result= not self.macro_result
        return self.macro_result
      elif(sp[0]=='#endif'):
        return True
      else:
        return True

def lex(text:bytes,file_name:str=""):
  lex_obj = lex_class(text,file_name)
  c=lex_obj.get_next_char()
  line_old=0
  pos_old=0
  while not lex_obj.is_end():
    line_old=lex_obj.line
    pos_old=lex_obj.pos
    if isalpha_(c):
      c=lex_obj.read_name_and_save(c)
    elif isinstr(c,"{}[]()~,;:*?%^"):
      lex_obj.save_one_char_token(c)
      c=lex_obj.get_next_char()
    elif isdigit(c):
      c=lex_obj.read_num_and_save(c)
    elif isspace(c):
      c=lex_obj.get_next_char()
    elif isoperator(c):
      c=lex_obj.read_operator_and_save(c)
    elif isinstr(c,"\""):
      c=lex_obj.read_str_and_save(c)
    elif isinstr(c,"\'"):
      c=lex_obj.read_char_and_save(c)
    elif isinstr(c,"\\"):
      c=lex_obj.get_next_char()
      if(c!=TOKEN("\r") and c!=TOKEN("\n")):
        raise Exception(f"符号 '\\' 必须在行末, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
    elif isinstr(c,"#"): # 宏定义
      c_old=c
      buff=bytearray()
      while (c!=TOKEN("\n") and c!=-1):
        c=lex_obj.get_next_char()
        if(c_old==TOKEN('/') and c==TOKEN('*')):# 适配宏后面有注释的情况
          while not (c_old==TOKEN("*") and c==TOKEN("/")) or c==-1:
            c_old=c
            c=lex_obj.get_next_char()
        elif(c_old==TOKEN('/') and c==TOKEN('/')):
          while not (c==TOKEN('\n') or c==-1):
            c=lex_obj.get_next_char()
        elif(c_old==TOKEN('\\') and c in [TOKEN('\n'),TOKEN('\r')]):# 适配多行
          c=lex_obj.get_next_char()
        else:
          buff.append(c_old&0xff)
        c_old=c
      if not (lex_obj.deal_macro(buff)): # 处理宏
        is_space=True
        while True:
          c=lex_obj.get_next_char()
          if(is_space and c==TOKEN('#')):
            break
          if(c==-1):
            break
          if not isspace(c):
            is_space=False
          elif(c==TOKEN('\n')):
            is_space=True
    elif isinstr(c,"/"):
      c=lex_obj.get_next_char()
      if(c==TOKEN("/")):
        while (c!=TOKEN("\n") and c!=-1):
          c=lex_obj.get_next_char()
      elif(c==TOKEN("*")):
        c_old=lex_obj.get_next_char()
        c=lex_obj.get_next_char()
        while not (c_old==TOKEN("*") and c==TOKEN("/")):
          c_old=c
          c=lex_obj.get_next_char()
        c=lex_obj.get_next_char()
      elif(c==TOKEN("=")):
        lex_obj.save_token(lex_token("/=",b"/=",TOKEN_ASSIG_DIV,lex_obj.line,lex_obj.pos))
        c=lex_obj.get_next_char()
      else:
        lex_obj.save_one_char_token(TOKEN("/"))
    elif isinstr(c,"|"):
      c=lex_obj.get_next_char()
      if(c==TOKEN("|")):
        lex_obj.save_token(lex_token("||",b"||",TOKEN_LOGICAL_OR,lex_obj.line,lex_obj.pos))
      else:
        lex_obj.save_one_char_token(TOKEN("|"))
    elif isinstr(c,"&"):
      c=lex_obj.get_next_char()
      if(c==TOKEN("&")):
        lex_obj.save_token(lex_token("&&",b"&&",TOKEN_LOGICAL_AND,lex_obj.line,lex_obj.pos))
      else:
        lex_obj.save_one_char_token(TOKEN("&"))
    elif isinstr(c,'.'):
      c=lex_obj.get_next_char()
      if(c==TOKEN('.')):
        c=lex_obj.get_next_char()
        if(c==TOKEN('.')):
          lex_obj.save_token(lex_token("...",b"...",TOKEN_OMIT,lex_obj.line,lex_obj.pos))
        else:
          raise Exception (f"格式错误 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
      else:
        lex_obj.save_one_char_token(TOKEN("."))
    else:
      raise Exception(f"未知的字符 {bytes([c])}, {lex_obj.file_name}:{lex_obj.line},{lex_obj.pos}")
      # c=lex_obj.get_next_char()
    # if(line_old==lex_obj.line and pos_old==lex_obj.pos):
    #   print(f"pointer not move.")
    # print(line_old,pos_old)
  # for item in lex_obj.token_list:
  #   print(f"{item}")
  return lex_obj.token_list


@dataclasses.dataclass
class node:
  name:list=dataclasses.field(default_factory=list)
  type:str="base"
  token_list:list=dataclasses.field(default_factory=list)
  child:list=dataclasses.field(default_factory=list)
  def complite(self):
    print(f"complite {self.type}")
# 文件节点
@dataclasses.dataclass
class node_file(node):
  type:str="file"
# 变量定义节点
@dataclasses.dataclass
class node_variable_def(node):
  type:str="variable_def"

# 结构体声明节点
@dataclasses.dataclass
class node_struct_decl(node):
  type:str="struct_decl"

# 结构体定义节点
@dataclasses.dataclass
class node_struct_def(node):
  type:str="struct_def"

# 联合体声明节点
@dataclasses.dataclass
class node_union_decl(node):
  type:str="union_decl"

# 联合体定义节点
@dataclasses.dataclass
class node_union_def(node):
  type:str="union_def"

# 枚举声明节点
@dataclasses.dataclass
class node_enum_decl(node):
  type:str="enum_decl"

# 枚举定义节点
@dataclasses.dataclass
class node_enum_def(node):
  type:str="enum_def"

# 函数声明节点
@dataclasses.dataclass
class node_func_decl(node):
  type:str="func_decl"

#typedef 节点
@dataclasses.dataclass
class node_typedef(node):
  type:str="typedef"

# 函数定义节点
@dataclasses.dataclass
class node_func_def(node):
  type:str="func_def"

# switch节点
@dataclasses.dataclass
class node_switch(node):
  type:str="switch"

# case节点
@dataclasses.dataclass
class node_case(node):
  type:str="case"

# default
@dataclasses.dataclass
class node_default(node):
  type:str="default"

# break节点
@dataclasses.dataclass
class node_break(node):
  type:str="break"

# return节点
@dataclasses.dataclass
class node_return(node):
  type:str="return"

# 函数调用节点
@dataclasses.dataclass
class node_call(node):
  type:str="call"

# 变量操作节点
@dataclasses.dataclass
class node_opt(node):
  type:str="opt_var"

# 符号节点
@dataclasses.dataclass
class node_symbol(node):
  type:str="symbol"

# string节点
@dataclasses.dataclass
class node_string(node):
  type:str="string"

# int节点
@dataclasses.dataclass
class node_int(node):
  type:str="int"


# 找到闭合的括号
def find_close(token_list:list,token:tuple):
  if token_list[0].token!=token[0]:
    return 0
  num=0
  for index,item in enumerate(token_list):
    if(item.token==token[0]):
      num+=1
    elif(item.token==token[1]):
      num-=1
    if(num==0):
      return index
  raise Exception(f"没有找到闭合的符号 {token_list[0]}")

# 找到指定token的index
def find_token(token_list:list,token:int):
  num=0
  for index,item in enumerate(token_list):
    if(item.token!=token):
      num+=1
    else:
      return num
  return num


# 找到一个完整的语句
def find_sentence(token_list:list,sep:list=[TOKEN(";"),TOKEN(":")]):
  bracket_flag=False
  index=0
  if(len(token_list)==1):
    return token_list
  while index<len(token_list):
    if(token_list[index].token==TOKEN("(")):
      bracket_index=find_close(token_list[index:],(TOKEN("("),TOKEN(")")))
      if(bracket_index>0):
        bracket_flag=True
        index+=bracket_index
    elif(token_list[index].token==TOKEN("{")):
      bracket_index=find_close(token_list[index:],(TOKEN("{"),TOKEN("}")))
      if(bracket_index>0):
        index+=bracket_index
        if(bracket_flag==True):
          return token_list[:index+1]
    elif(token_list[index].token in sep):
      return token_list[:index+1]
    index+=1
  raise Exception(f"没有找到完整的语句 sep={sep} token={token_list[0]}")


def dist_node_type_struct(token_list:list):
  if(token_list[0].token==TOKEN_STRUCT):
    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
      elif(token_list[2].token==TOKEN("{")):
        # if not token_list[-1].token==TOKEN("}"):
        #   raise Exception("没有出现预期的符号 '}'")
        # v_list:list[node_variable_def]=[]
        # token_list_local=token_list[3:-1]
        # while len(token_list_local)>0:
        #   sentence=find_sentence(token_list_local)
        #   v_list.append(dist_node_type(token_list=sentence))
        #   token_list_local=token_list_local[len(sentence):]
        return node_struct_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
      else:
        return node_struct_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
    # 函数定义
    return dist_node_type_funcdef(token_list=token_list)

  raise Exception(f"语法错误 {token_list[0]}")


def dist_node_type_union(token_list:list):
  if(token_list[0].token==TOKEN_UNION):
    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
        return node_union_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
      elif(token_list[2].token==TOKEN("{")):
        # if not token_list[-1].token==TOKEN("}"):
        #   raise Exception("没有出现预期的符号 '}'")
        # v_list:list[node_variable_def]=[]
        # token_list_local=token_list[3:-1]
        # while len(token_list_local)>0:
        #   sentence=find_sentence(token_list_local)
        #   v_list.append(dist_node_type(token_list=sentence))
        #   token_list_local=token_list_local[len(sentence):]
        return node_union_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
    # 函数定义
    return dist_node_type_funcdef(token_list=token_list)
  raise Exception(f"语法错误 {token_list[0]}")


def dist_node_type_enum(token_list:list):
  if(token_list[0].token==TOKEN_ENUM):
    if(token_list[1].token==TOKEN_SYMBOL):
      if(len(token_list)==2):
        return node_enum_decl(name=token_list[1].buff.decode("utf-8"),token_list=token_list)
      elif(token_list[2].token==TOKEN("{")):
        # if not token_list[-1].token==TOKEN("}"):
        #   raise Exception("没有出现预期的符号 '}'")
        # token_list_local=token_list[3:-1]
        return node_enum_def(name=token_list[1].buff.decode("utf-8"),token_list=token_list,child=[])
    elif(token_list[1].token==TOKEN("{")):
      # if not token_list[-1].token==TOKEN("}"):
      #   raise Exception("没有出现预期的符号 '}'")
      # token_list_local=token_list[3:-1]
      return node_enum_def(name='unnamed_enum',token_list=token_list,child=[])
  if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
    # 函数定义
    return dist_node_type_funcdef(token_list=token_list)
  raise Exception(f"语法错误 {token_list[0]}")


def dist_node_type_typedef(token_list:list):
  if(token_list[0].token==TOKEN_TYPEDEF):
    attr=[]
    token_list_local=token_list
    # if(token_list[-1].token!=TOKEN_SYMBOL):
    #   raise Exception(f"没有定义新类型 {token_list[-1]}")
    name=token_list[-1].buff.decode("utf-8")
    token_list=token_list[1:]
    while token_list[0].token in [TOKEN_UNSIGNED,TOKEN_CONST]:
      attr.append(token_list[0].name)
      token_list=token_list[1:]
    if(token_list[0].token==TOKEN_STRUCT or token_list[0].token==TOKEN_UNION):
      attr.append(token_list[0].name)
      if(token_list[1].token==TOKEN_SYMBOL):
        node_r=None
        attr.append(token_list[1].buff.decode("utf-8"))
        if(token_list[2].token==TOKEN("{")):
          node_r=dist_node_type(token_list=token_list[1:-1])
        elif(token_list[2].token==TOKEN("*")):
          attr.append(token_list[2].name)
        return node_typedef(name=name,token_list=token_list_local,child=[])
    if(token_list[0].token==TOKEN_SYMBOL):
      # 使用typedef 定义过的自定义类型
      attr.append(token_list[0].buff.decode("utf-8"))
      token_list=token_list[1:]
    else:
      # c语言预设类型
      while(token_list[0].token in
        [TOKEN_INT,TOKEN_CHAR,TOKEN_SHORT,TOKEN_LONG,TOKEN_FLOAT,
         TOKEN_DOUBLE,TOKEN_VOID,TOKEN("*")]):
        attr.append(token_list[0].name)
        token_list=token_list[1:]
    # if(len(token_list)>1):
    #   raise Exception(f"意外的token {token_list[0]}")
    return node_typedef(name=name,token_list=token_list_local,child=[])
  raise Exception(f"语法错误 {token_list[0]}")


# 找到子节点
def find_child(token_list:list,seq:list=[TOKEN(";"),TOKEN(":")]):
  child=[]
  token_list_local=[]
  for i in range(len(token_list)):
    if(token_list[i].token==TOKEN("{")):
      token_list_local=token_list[i+1:-1]
      break
  while len(token_list_local)>0:
    sentence=find_sentence(token_list_local,seq)
    node_d=dist_node_type(sentence)
    if not node_d is None:
      child.append(node_d)
    token_list_local=token_list_local[len(sentence):]
  return child


def dist_node_type_funcdef(token_list:list):
  for i in range(len(token_list)):
    if(token_list[i].token==TOKEN('(')):
      name=token_list[i-1].buff.decode("utf-8")
      break
  # return node_func_def(name=[name],token_list=token_list,child=find_child(token_list))
  return node_func_def(name=[name],token_list=token_list,child=[])

def dist_node_type_funcdecl(token_list:list):
  for i in range(len(token_list)):
    if(token_list[i].token==TOKEN_SYMBOL):
      name=token_list[i].buff.decode("utf-8")
      return node_func_decl(name=[name],token_list=token_list,child=[])
  raise Exception(f"函数声明格式错误 {token_list[0]}")


# 第一个token是symbol的处理
def dist_node_type_symbol(token_list:list):
  # 变量赋值或函数调用
  if(len(token_list)==1):
    return node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list)
  if(token_list[1].token == TOKEN("(")):
    child=find_child(token_list=token_list[2:-1])
    return node_call("call",token_list=token_list,child=child)
  elif(token_list[1].token in [
    TOKEN("="),TOKEN_ASSIG_ADD,TOKEN_ASSIG_DIV,TOKEN_ASSIG_LSH,
    TOKEN_ASSIG_MUL,TOKEN_ASSIG_RSH,TOKEN_ASSIG_SUB]):
    name=token_list[1].name
    child=[node_symbol(name=token_list[0].buff.decode("utf-8"),token_list=token_list[:1]),]
    child_d=dist_node_type(token_list=token_list[2:])
    if not child_d is None:
      child.append(child_d)
    return node_opt(name=name,token_list=token_list,child=child)
  else:
    # 没有赋值属性的操作
    name=token_list[1].name
    return node_opt(name=name,token_list=token_list,child=[])


# 判断一个语句的类型
def dist_node_type(token_list:list):
  # print(f"{token_list[0]}")
  if(token_list[0].token==TOKEN_EXTERN):
    token_list=token_list[1:]
  if(token_list[-1].token==TOKEN(";")):
    token_list=token_list[:-1]
  if(len(token_list)==0):
    return None
  if(token_list[0].token==TOKEN_STRUCT):
    return dist_node_type_struct(token_list=token_list)
  if(token_list[0].token==TOKEN_UNION):
    return dist_node_type_union(token_list=token_list)
  if(token_list[0].token==TOKEN_ENUM):
    return dist_node_type_enum(token_list=token_list)
  if(token_list[0].token==TOKEN_TYPEDEF):
    return dist_node_type_typedef(token_list=token_list)
  # if(token_list[0].token==TOKEN_SWITCH):
  #   child=find_child(token_list)
  #   return node_switch(name="",token_list=token_list,child=child)
  # if(token_list[0].token==TOKEN_CASE):
  #   name=token_list[1].buff.decode("utf-8")
  #   return node_case(name=name,token_list=token_list,child=[])
  # if(token_list[0].token==TOKEN_DEFAULT):
  #   return node_default(name="",token_list=token_list,child=[])
  # if(token_list[0].token==TOKEN_BREAK):
  #   return node_break(name="",token_list=token_list,child=[])
  # if(token_list[0].token==TOKEN_RETURN):
  #   if(len(token_list)>1):
  #     child=[dist_node_type(token_list[1:])]
  #   else:
  #     child=[]
  #   return node_return(name="",token_list=token_list,child=child)
  if(token_list[0].token==TOKEN_STRING):
    name=token_list[0].buff.decode("utf-8")
    return node_string(name=name,token_list=token_list,child=[])
  if(token_list[0].token==TOKEN_NUM):
    name=token_list[0].buff.decode("utf-8")
    return node_int(name=name,token_list=token_list,child=[])

  if(token_list[-1].token==TOKEN(")")):
    # 函数声明
    return dist_node_type_funcdecl(token_list)
  elif(token_list[-1].token==TOKEN("}")):
    if(find_token(token_list,TOKEN('('))<find_token(token_list,TOKEN('{'))):
      # 函数定义
      return dist_node_type_funcdef(token_list=token_list)
    else:
      # 变量定义
      pass
  elif(token_list[0].token==TOKEN_SYMBOL):
    # 变量赋值或函数调用
    return dist_node_type_symbol(token_list=token_list)

  # 变量定义
  for i in range(len(token_list)):
    if(token_list[i].token==TOKEN_SYMBOL):
      name=token_list[i].buff.decode("utf-8")
      return node_variable_def(name=[name],token_list=token_list,child=[])
  raise Exception(f"变量定义格式错误 {token_list[0]}")


def print_node(n:node,deep:int):
  s="|"*deep
  print(f"{s} {n.type} {n.name} {n.token_list[0]}")
  # n.complite()
  if (not n.child is None) and len(n.child)>0:
    for item in n.child:
      print_node(item,deep+1)

def find_func_def_in_file(n:node,deep:int,func_name_list:list):
  ack=False
  if(n.type=='func_def') and (n.name[0] in func_name_list):
    print(f"{n.type} {n.name}")
    return True
  # n.complite()
  if (not n.child is None) and len(n.child)>0:
    for item in n.child:
      ack=find_func_def_in_file(item,deep+1,func_name_list)
      if(ack):
        return ack
  return False

def check_func_def(file_name:str,func_name_list:list):
  with open(file_name,mode='rb') as f:
    read_d=f.read()
    if(read_d[:3]==bytes([0xef,0xbb,0xbf])):
      read_d=read_d[3:]
    token_list=lex(read_d,file_name)
  file=node_file(name=file_name,token_list=token_list)
  while len(token_list)>0:
    node_d=None
    try:
      sentence=find_sentence(token_list)
      node_d=dist_node_type(sentence)
    except Exception as e:
      print(f"in {file_name}")
      print(f"\t {e}")
      break
    if not node_d is None:
      file.child.append(node_d)
    token_list=token_list[len(sentence):]
  print_node(file,0)
  return find_func_def_in_file(file,0,func_name_list)


# 找到定义函数的文件
def find_func_def(file_list:list,func_name_list:str):
  ret_list=[]
  err_list=[]
  for item in file_list:
    sys.stdout.write('.')
    sys.stdout.flush()
    # try:
    ack=check_func_def(item,func_name_list)
    if(ack):
      ret_list.append(item)
    # except Exception as e:
    #   print(e)
    #   err_list.append(item)
  return ret_list,err_list

# 找到指定后缀的文件
def find_type(path:str,fix:str):
    dlist=os.listdir(path)
    file_list=[]
    for i in dlist:
        ps=os.path.join(path, i)
        if os.path.isdir(ps):
            file_list+=find_type(ps,fix)
            pass
        else:
            if(ps[-len(fix):]==fix):
                file_list.append(ps)
    return file_list


# with open("build/build_log.log",mode="r",encoding="utf-8") as f:
#   _out_text=f.readlines()


def get_func_list():
  func_list=[]
  _out_text=sys.stdin.readlines()
  for item in _out_text:
    key_str='undefined reference to `'
    index=item.find(key_str)
    if(index<0):
      continue
    index+=len(key_str)
    index_end=item[index:].find('\'')
    func=item[index:index+index_end]
    if not (func in func_list):
      func_list.append(func)
  return func_list


# 参数是扫描的目录列表
if __name__=="__main__":
  file_list=[]
  for item in sys.argv[1:]:
    file_list+=find_type(item,'.c')
  # file_list=["./dtest/dtest3/kl3_core_mark/core_main.c"]
  print(f"there is {len(file_list)} .c file.")
  # func_list=get_func_list()
  func_list=['main']
  print(func_list)
  # find_func_def(['driver/src/hw3/efuse.c'],['efuse_get_d_bg_vbg_cntl'])
  ret_list,err_list=find_func_def(file_list,func_list)
  print("已找到的文件")
  for item in ret_list:
    print(item)
  print("分析失败的文件")
  for item in err_list:
    print(item)