Abstract syntax tree
This commit is contained in:
137
ast.py
Normal file
137
ast.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import token
|
||||
from enum import Enum
|
||||
|
||||
class AstType(Enum):
|
||||
CONSTANT = 0
|
||||
VARIABLE = 1
|
||||
COMPARITOR = 2
|
||||
OPERATOR = 3
|
||||
IF = 4
|
||||
WHILE = 5
|
||||
BODY = 6
|
||||
SET = 7
|
||||
ROOT = 8
|
||||
FUNC_CALL = 9
|
||||
|
||||
operators = [token.TokenType.ADD, token.TokenType.SUBTRACT, token.TokenType.MULTIPLY, token.TokenType.DIVIDE]
|
||||
comparitors = [token.TokenType.EQUAL, token.TokenType.GREATER, token.TokenType.LESSER]
|
||||
constants = [token.TokenType.INTEGER, token.TokenType.DOUBLE, token.TokenType.STRING, token.TokenType.CHAR, token.TokenType.BOOLEAN]
|
||||
|
||||
class AstNode:
|
||||
def __init__(self, intok: token.Token):
|
||||
self.children = []
|
||||
self.tok = intok
|
||||
if intok.type == token.TokenType.ROOT:
|
||||
self.type = AstType.ROOT
|
||||
elif intok.type in operators:
|
||||
self.type = AstType.OPERATOR
|
||||
elif intok.type in comparitors:
|
||||
self.type = AstType.COMPARITOR
|
||||
elif intok.type in constants:
|
||||
self.type = AstType.CONSTANT
|
||||
elif intok.type == token.TokenType.VARIABLE:
|
||||
self.type = AstType.VARIABLE
|
||||
elif intok.type == token.TokenType.IF:
|
||||
self.type = AstType.IF
|
||||
elif intok.type == token.TokenType.WHILE:
|
||||
self.type = AstType.WHILE
|
||||
elif intok.type == token.TokenType.SET:
|
||||
self.type = AstType.SET
|
||||
else:
|
||||
self.type = None
|
||||
|
||||
def __repr__(self, level=0):
|
||||
ret = "\t" * level + f"AstNode(type={self.type}, token={self.tok})\n"
|
||||
for child in self.children:
|
||||
ret += child.__repr__(level + 1)
|
||||
return ret
|
||||
|
||||
def build_expression_ast(line_toks: list[token.Token]) -> AstNode:
|
||||
if not line_toks:
|
||||
return None
|
||||
|
||||
if len(line_toks) == 1:
|
||||
return AstNode(line_toks[0])
|
||||
|
||||
if len(line_toks) > 2 and line_toks[0].type == token.TokenType.VARIABLE and line_toks[1].type == token.TokenType.PARAMOPEN:
|
||||
func_call_node = AstNode(line_toks[0])
|
||||
func_call_node.type = AstType.FUNC_CALL
|
||||
if len(line_toks) > 3: # there are arguments
|
||||
arg_toks = line_toks[2:-1]
|
||||
if arg_toks:
|
||||
func_call_node.children.append(build_expression_ast(arg_toks))
|
||||
return func_call_node
|
||||
|
||||
for i, tok in enumerate(line_toks):
|
||||
if tok.type in comparitors or tok.type in operators:
|
||||
node = AstNode(tok)
|
||||
left = build_expression_ast(line_toks[:i])
|
||||
right = build_expression_ast(line_toks[i+1:])
|
||||
if left: node.children.append(left)
|
||||
if right: node.children.append(right)
|
||||
return node
|
||||
|
||||
return AstNode(line_toks[0]) if line_toks else None
|
||||
|
||||
|
||||
def build_ast(toks: list[list[token.Token]]) -> AstNode:
|
||||
root = AstNode(token.Token("", True))
|
||||
|
||||
line_index = 0
|
||||
while line_index < len(toks):
|
||||
line_toks = toks[line_index]
|
||||
|
||||
if not line_toks:
|
||||
line_index += 1
|
||||
continue
|
||||
|
||||
first_tok = line_toks[0]
|
||||
|
||||
if first_tok.type == token.TokenType.LET:
|
||||
if len(line_toks) >= 4 and line_toks[1].type == token.TokenType.VARIABLE and line_toks[2].type == token.TokenType.SET:
|
||||
assignment_node = AstNode(line_toks[2])
|
||||
var_node = AstNode(line_toks[1])
|
||||
expr_node = build_expression_ast(line_toks[3:])
|
||||
|
||||
assignment_node.children.append(var_node)
|
||||
if expr_node:
|
||||
assignment_node.children.append(expr_node)
|
||||
root.children.append(assignment_node)
|
||||
else:
|
||||
print(f"Syntax error on line {line_index + 1}: Invalid assignment.")
|
||||
|
||||
elif first_tok.type == token.TokenType.IF:
|
||||
if_node = AstNode(first_tok)
|
||||
condition_node = build_expression_ast(line_toks[1:])
|
||||
if condition_node:
|
||||
if_node.children.append(condition_node)
|
||||
|
||||
body_start_index = line_index + 1
|
||||
body_end_index = body_start_index
|
||||
while body_end_index < len(toks):
|
||||
if len(toks[body_end_index]) == 1 and toks[body_end_index][0].type == token.TokenType.END:
|
||||
break
|
||||
body_end_index += 1
|
||||
else:
|
||||
print(f"Syntax error on line {line_index + 1}: 'if' statement without matching 'end'.")
|
||||
|
||||
body_toks = toks[body_start_index:body_end_index]
|
||||
if body_toks:
|
||||
body_node = build_ast(body_toks)
|
||||
body_node.type = AstType.BODY
|
||||
if_node.children.append(body_node)
|
||||
|
||||
root.children.append(if_node)
|
||||
line_index = body_end_index
|
||||
|
||||
elif first_tok.type == token.TokenType.END:
|
||||
pass
|
||||
|
||||
else:
|
||||
expr_node = build_expression_ast(line_toks)
|
||||
if expr_node:
|
||||
root.children.append(expr_node)
|
||||
|
||||
line_index += 1
|
||||
|
||||
return root
|
Reference in New Issue
Block a user