Abstract syntax tree

This commit is contained in:
2025-09-02 20:41:17 +10:00
parent 5ebf653342
commit 8d3ffb7b9c
5 changed files with 222 additions and 50 deletions

137
ast.py Normal file
View File

@@ -0,0 +1,137 @@
import token
from enum import Enum
class AstType(Enum):
CONSTANT = 0
VARIABLE = 1
COMPARITOR = 2
OPERATOR = 3
IF = 4
WHILE = 5
BODY = 6
SET = 7
ROOT = 8
FUNC_CALL = 9
operators = [token.TokenType.ADD, token.TokenType.SUBTRACT, token.TokenType.MULTIPLY, token.TokenType.DIVIDE]
comparitors = [token.TokenType.EQUAL, token.TokenType.GREATER, token.TokenType.LESSER]
constants = [token.TokenType.INTEGER, token.TokenType.DOUBLE, token.TokenType.STRING, token.TokenType.CHAR, token.TokenType.BOOLEAN]
class AstNode:
def __init__(self, intok: token.Token):
self.children = []
self.tok = intok
if intok.type == token.TokenType.ROOT:
self.type = AstType.ROOT
elif intok.type in operators:
self.type = AstType.OPERATOR
elif intok.type in comparitors:
self.type = AstType.COMPARITOR
elif intok.type in constants:
self.type = AstType.CONSTANT
elif intok.type == token.TokenType.VARIABLE:
self.type = AstType.VARIABLE
elif intok.type == token.TokenType.IF:
self.type = AstType.IF
elif intok.type == token.TokenType.WHILE:
self.type = AstType.WHILE
elif intok.type == token.TokenType.SET:
self.type = AstType.SET
else:
self.type = None
def __repr__(self, level=0):
ret = "\t" * level + f"AstNode(type={self.type}, token={self.tok})\n"
for child in self.children:
ret += child.__repr__(level + 1)
return ret
def build_expression_ast(line_toks: list[token.Token]) -> AstNode:
if not line_toks:
return None
if len(line_toks) == 1:
return AstNode(line_toks[0])
if len(line_toks) > 2 and line_toks[0].type == token.TokenType.VARIABLE and line_toks[1].type == token.TokenType.PARAMOPEN:
func_call_node = AstNode(line_toks[0])
func_call_node.type = AstType.FUNC_CALL
if len(line_toks) > 3: # there are arguments
arg_toks = line_toks[2:-1]
if arg_toks:
func_call_node.children.append(build_expression_ast(arg_toks))
return func_call_node
for i, tok in enumerate(line_toks):
if tok.type in comparitors or tok.type in operators:
node = AstNode(tok)
left = build_expression_ast(line_toks[:i])
right = build_expression_ast(line_toks[i+1:])
if left: node.children.append(left)
if right: node.children.append(right)
return node
return AstNode(line_toks[0]) if line_toks else None
def build_ast(toks: list[list[token.Token]]) -> AstNode:
root = AstNode(token.Token("", True))
line_index = 0
while line_index < len(toks):
line_toks = toks[line_index]
if not line_toks:
line_index += 1
continue
first_tok = line_toks[0]
if first_tok.type == token.TokenType.LET:
if len(line_toks) >= 4 and line_toks[1].type == token.TokenType.VARIABLE and line_toks[2].type == token.TokenType.SET:
assignment_node = AstNode(line_toks[2])
var_node = AstNode(line_toks[1])
expr_node = build_expression_ast(line_toks[3:])
assignment_node.children.append(var_node)
if expr_node:
assignment_node.children.append(expr_node)
root.children.append(assignment_node)
else:
print(f"Syntax error on line {line_index + 1}: Invalid assignment.")
elif first_tok.type == token.TokenType.IF:
if_node = AstNode(first_tok)
condition_node = build_expression_ast(line_toks[1:])
if condition_node:
if_node.children.append(condition_node)
body_start_index = line_index + 1
body_end_index = body_start_index
while body_end_index < len(toks):
if len(toks[body_end_index]) == 1 and toks[body_end_index][0].type == token.TokenType.END:
break
body_end_index += 1
else:
print(f"Syntax error on line {line_index + 1}: 'if' statement without matching 'end'.")
body_toks = toks[body_start_index:body_end_index]
if body_toks:
body_node = build_ast(body_toks)
body_node.type = AstType.BODY
if_node.children.append(body_node)
root.children.append(if_node)
line_index = body_end_index
elif first_tok.type == token.TokenType.END:
pass
else:
expr_node = build_expression_ast(line_toks)
if expr_node:
root.children.append(expr_node)
line_index += 1
return root

View File

@@ -1,5 +1,6 @@
import sys
import preprocessor
import ast
if len(sys.argv) < 2:
print("Usage: hgc (file)")
@@ -8,5 +9,6 @@ if len(sys.argv) < 2:
with open(sys.argv[1], "r") as file:
lines = [preprocessor.process_line(line) for line in file]
for line in lines:
print(line)
ast = ast.build_ast(lines)
print(ast)

View File

@@ -1,44 +1,66 @@
import token
delimiters = ["=", ">", "<", "+", "-", "*", "/", " "]
delimiters = ["=", ">", "<", "+", "-", "*", "/", "(", ")"]
quick_tokens = [">", "<", "+", "-", "*", "/"]
quick_tokens = [">", "<", "+", "-", "*", "/", "(", ")"]
def doNothing():
return
def process_line(process: str) -> list[token.Token]:
buf = ""
tokens: list[token.Token] = []
prevEquals = False
for c in process:
if c in delimiters and buf != "":
tokens.append(token.Token(buf))
buf = ""
if prevEquals and c != '=':
tokens.append(token.Token("="))
prevEquals = False
if c in quick_tokens:
tokens.append(token.Token(c))
if buf != "":
tokens.append(token.Token(buf))
buf = ""
else:
match c:
case '\n':
doNothing()
case ' ':
doNothing()
case '=':
if prevEquals:
prevEquals = False
tokens.append(token.Token("=="))
else:
prevEquals = True
case _:
buf += c
if buf != "":
tokens.append(token.Token(buf))
i = 0
while i < len(process):
char = process[i]
if char == '"':
# End of buffer before string starts
if buf:
tokens.append(token.Token(buf))
buf = ""
i += 1
start = i
while i < len(process) and process[i] != '"':
i += 1
string_content = process[start:i]
# Create string token, assuming constructor wants quotes
tokens.append(token.Token(f'"{string_content}"'))
if i < len(process) and process[i] == '"':
i += 1 # Skip closing quote
continue
if char.isspace():
if buf:
tokens.append(token.Token(buf))
buf = ""
i += 1
continue
# Handle multi-char operators like '=='
if char == '=' and i + 1 < len(process) and process[i+1] == '=':
if buf:
tokens.append(token.Token(buf))
buf = ""
tokens.append(token.Token('=='))
i += 2
continue
if char in delimiters:
if buf:
tokens.append(token.Token(buf))
buf = ""
tokens.append(token.Token(char))
i += 1
continue
buf += char
i += 1
if buf:
tokens.append(token.Token(buf))
return tokens

View File

@@ -1,7 +1,7 @@
let dingus = 10
if "test" == "test"
print("Test is the same as test")
end
print(dingus)

View File

@@ -7,30 +7,37 @@ class TokenType(Enum):
CHAR = 4
BOOLEAN = 5
FUNCTION = 6
VARIABLE = 7
IF = 8
ELSE = 9
WHILE = 10
LET = 11
END = 12
VARIABLE = 8
IF = 9
ELSE = 10
WHILE = 11
LET = 12
END = 13
ADD = 13
SUBTRACT = 14
MULTIPLY = 15
DIVIDE = 16
ADD = 14
SUBTRACT = 15
MULTIPLY = 16
DIVIDE = 17
SET = 17
SET = 18
EQUAL = 18
GREATER = 19
LESSER = 20
EQUAL = 19
GREATER = 20
LESSER = 21
PARAMOPEN = 22
PARAMCLOSE = 23
ROOT = 24
UNKNOWN = 0
class Token:
def __init__(self, tok: str):
def __init__(self, tok: str, isroot = False):
self.value = tok
if isroot:
self.type = TokenType.ROOT
else:
self.type = get_type(tok)
def __repr__(self) -> str:
@@ -69,6 +76,10 @@ def get_type(process: str) -> TokenType:
return TokenType.GREATER
case "<":
return TokenType.LESSER
case "(":
return TokenType.PARAMOPEN
case ")":
return TokenType.PARAMCLOSE
# String/Char Literals
if len(process) >= 2: