Abstract syntax tree
This commit is contained in:
137
ast.py
Normal file
137
ast.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import token
|
||||
from enum import Enum
|
||||
|
||||
class AstType(Enum):
|
||||
CONSTANT = 0
|
||||
VARIABLE = 1
|
||||
COMPARITOR = 2
|
||||
OPERATOR = 3
|
||||
IF = 4
|
||||
WHILE = 5
|
||||
BODY = 6
|
||||
SET = 7
|
||||
ROOT = 8
|
||||
FUNC_CALL = 9
|
||||
|
||||
operators = [token.TokenType.ADD, token.TokenType.SUBTRACT, token.TokenType.MULTIPLY, token.TokenType.DIVIDE]
|
||||
comparitors = [token.TokenType.EQUAL, token.TokenType.GREATER, token.TokenType.LESSER]
|
||||
constants = [token.TokenType.INTEGER, token.TokenType.DOUBLE, token.TokenType.STRING, token.TokenType.CHAR, token.TokenType.BOOLEAN]
|
||||
|
||||
class AstNode:
|
||||
def __init__(self, intok: token.Token):
|
||||
self.children = []
|
||||
self.tok = intok
|
||||
if intok.type == token.TokenType.ROOT:
|
||||
self.type = AstType.ROOT
|
||||
elif intok.type in operators:
|
||||
self.type = AstType.OPERATOR
|
||||
elif intok.type in comparitors:
|
||||
self.type = AstType.COMPARITOR
|
||||
elif intok.type in constants:
|
||||
self.type = AstType.CONSTANT
|
||||
elif intok.type == token.TokenType.VARIABLE:
|
||||
self.type = AstType.VARIABLE
|
||||
elif intok.type == token.TokenType.IF:
|
||||
self.type = AstType.IF
|
||||
elif intok.type == token.TokenType.WHILE:
|
||||
self.type = AstType.WHILE
|
||||
elif intok.type == token.TokenType.SET:
|
||||
self.type = AstType.SET
|
||||
else:
|
||||
self.type = None
|
||||
|
||||
def __repr__(self, level=0):
|
||||
ret = "\t" * level + f"AstNode(type={self.type}, token={self.tok})\n"
|
||||
for child in self.children:
|
||||
ret += child.__repr__(level + 1)
|
||||
return ret
|
||||
|
||||
def build_expression_ast(line_toks: list[token.Token]) -> AstNode:
|
||||
if not line_toks:
|
||||
return None
|
||||
|
||||
if len(line_toks) == 1:
|
||||
return AstNode(line_toks[0])
|
||||
|
||||
if len(line_toks) > 2 and line_toks[0].type == token.TokenType.VARIABLE and line_toks[1].type == token.TokenType.PARAMOPEN:
|
||||
func_call_node = AstNode(line_toks[0])
|
||||
func_call_node.type = AstType.FUNC_CALL
|
||||
if len(line_toks) > 3: # there are arguments
|
||||
arg_toks = line_toks[2:-1]
|
||||
if arg_toks:
|
||||
func_call_node.children.append(build_expression_ast(arg_toks))
|
||||
return func_call_node
|
||||
|
||||
for i, tok in enumerate(line_toks):
|
||||
if tok.type in comparitors or tok.type in operators:
|
||||
node = AstNode(tok)
|
||||
left = build_expression_ast(line_toks[:i])
|
||||
right = build_expression_ast(line_toks[i+1:])
|
||||
if left: node.children.append(left)
|
||||
if right: node.children.append(right)
|
||||
return node
|
||||
|
||||
return AstNode(line_toks[0]) if line_toks else None
|
||||
|
||||
|
||||
def build_ast(toks: list[list[token.Token]]) -> AstNode:
|
||||
root = AstNode(token.Token("", True))
|
||||
|
||||
line_index = 0
|
||||
while line_index < len(toks):
|
||||
line_toks = toks[line_index]
|
||||
|
||||
if not line_toks:
|
||||
line_index += 1
|
||||
continue
|
||||
|
||||
first_tok = line_toks[0]
|
||||
|
||||
if first_tok.type == token.TokenType.LET:
|
||||
if len(line_toks) >= 4 and line_toks[1].type == token.TokenType.VARIABLE and line_toks[2].type == token.TokenType.SET:
|
||||
assignment_node = AstNode(line_toks[2])
|
||||
var_node = AstNode(line_toks[1])
|
||||
expr_node = build_expression_ast(line_toks[3:])
|
||||
|
||||
assignment_node.children.append(var_node)
|
||||
if expr_node:
|
||||
assignment_node.children.append(expr_node)
|
||||
root.children.append(assignment_node)
|
||||
else:
|
||||
print(f"Syntax error on line {line_index + 1}: Invalid assignment.")
|
||||
|
||||
elif first_tok.type == token.TokenType.IF:
|
||||
if_node = AstNode(first_tok)
|
||||
condition_node = build_expression_ast(line_toks[1:])
|
||||
if condition_node:
|
||||
if_node.children.append(condition_node)
|
||||
|
||||
body_start_index = line_index + 1
|
||||
body_end_index = body_start_index
|
||||
while body_end_index < len(toks):
|
||||
if len(toks[body_end_index]) == 1 and toks[body_end_index][0].type == token.TokenType.END:
|
||||
break
|
||||
body_end_index += 1
|
||||
else:
|
||||
print(f"Syntax error on line {line_index + 1}: 'if' statement without matching 'end'.")
|
||||
|
||||
body_toks = toks[body_start_index:body_end_index]
|
||||
if body_toks:
|
||||
body_node = build_ast(body_toks)
|
||||
body_node.type = AstType.BODY
|
||||
if_node.children.append(body_node)
|
||||
|
||||
root.children.append(if_node)
|
||||
line_index = body_end_index
|
||||
|
||||
elif first_tok.type == token.TokenType.END:
|
||||
pass
|
||||
|
||||
else:
|
||||
expr_node = build_expression_ast(line_toks)
|
||||
if expr_node:
|
||||
root.children.append(expr_node)
|
||||
|
||||
line_index += 1
|
||||
|
||||
return root
|
6
main.py
6
main.py
@@ -1,5 +1,6 @@
|
||||
import sys
|
||||
import preprocessor
|
||||
import ast
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: hgc (file)")
|
||||
@@ -8,5 +9,6 @@ if len(sys.argv) < 2:
|
||||
with open(sys.argv[1], "r") as file:
|
||||
lines = [preprocessor.process_line(line) for line in file]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
ast = ast.build_ast(lines)
|
||||
|
||||
print(ast)
|
@@ -1,44 +1,66 @@
|
||||
import token
|
||||
|
||||
delimiters = ["=", ">", "<", "+", "-", "*", "/", " "]
|
||||
delimiters = ["=", ">", "<", "+", "-", "*", "/", "(", ")"]
|
||||
|
||||
quick_tokens = [">", "<", "+", "-", "*", "/"]
|
||||
quick_tokens = [">", "<", "+", "-", "*", "/", "(", ")"]
|
||||
|
||||
def doNothing():
|
||||
return
|
||||
|
||||
def process_line(process: str) -> list[token.Token]:
|
||||
buf = ""
|
||||
tokens: list[token.Token] = []
|
||||
prevEquals = False
|
||||
for c in process:
|
||||
if c in delimiters and buf != "":
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
if prevEquals and c != '=':
|
||||
tokens.append(token.Token("="))
|
||||
prevEquals = False
|
||||
if c in quick_tokens:
|
||||
tokens.append(token.Token(c))
|
||||
if buf != "":
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
else:
|
||||
match c:
|
||||
case '\n':
|
||||
doNothing()
|
||||
case ' ':
|
||||
doNothing()
|
||||
case '=':
|
||||
if prevEquals:
|
||||
prevEquals = False
|
||||
tokens.append(token.Token("=="))
|
||||
else:
|
||||
prevEquals = True
|
||||
case _:
|
||||
buf += c
|
||||
if buf != "":
|
||||
tokens.append(token.Token(buf))
|
||||
i = 0
|
||||
while i < len(process):
|
||||
char = process[i]
|
||||
|
||||
if char == '"':
|
||||
# End of buffer before string starts
|
||||
if buf:
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
|
||||
i += 1
|
||||
start = i
|
||||
while i < len(process) and process[i] != '"':
|
||||
i += 1
|
||||
|
||||
string_content = process[start:i]
|
||||
# Create string token, assuming constructor wants quotes
|
||||
tokens.append(token.Token(f'"{string_content}"'))
|
||||
|
||||
if i < len(process) and process[i] == '"':
|
||||
i += 1 # Skip closing quote
|
||||
continue
|
||||
|
||||
if char.isspace():
|
||||
if buf:
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Handle multi-char operators like '=='
|
||||
if char == '=' and i + 1 < len(process) and process[i+1] == '=':
|
||||
if buf:
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
tokens.append(token.Token('=='))
|
||||
i += 2
|
||||
continue
|
||||
|
||||
if char in delimiters:
|
||||
if buf:
|
||||
tokens.append(token.Token(buf))
|
||||
buf = ""
|
||||
tokens.append(token.Token(char))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
buf += char
|
||||
i += 1
|
||||
|
||||
if buf:
|
||||
tokens.append(token.Token(buf))
|
||||
|
||||
return tokens
|
@@ -1,7 +1,7 @@
|
||||
let dingus = 10
|
||||
|
||||
if "test" == "test"
|
||||
|
||||
print("Test is the same as test")
|
||||
end
|
||||
|
||||
print(dingus)
|
43
token.py
43
token.py
@@ -7,30 +7,37 @@ class TokenType(Enum):
|
||||
CHAR = 4
|
||||
BOOLEAN = 5
|
||||
|
||||
FUNCTION = 6
|
||||
VARIABLE = 7
|
||||
IF = 8
|
||||
ELSE = 9
|
||||
WHILE = 10
|
||||
LET = 11
|
||||
END = 12
|
||||
VARIABLE = 8
|
||||
IF = 9
|
||||
ELSE = 10
|
||||
WHILE = 11
|
||||
LET = 12
|
||||
END = 13
|
||||
|
||||
ADD = 13
|
||||
SUBTRACT = 14
|
||||
MULTIPLY = 15
|
||||
DIVIDE = 16
|
||||
ADD = 14
|
||||
SUBTRACT = 15
|
||||
MULTIPLY = 16
|
||||
DIVIDE = 17
|
||||
|
||||
SET = 17
|
||||
SET = 18
|
||||
|
||||
EQUAL = 18
|
||||
GREATER = 19
|
||||
LESSER = 20
|
||||
EQUAL = 19
|
||||
GREATER = 20
|
||||
LESSER = 21
|
||||
|
||||
PARAMOPEN = 22
|
||||
PARAMCLOSE = 23
|
||||
|
||||
ROOT = 24
|
||||
|
||||
UNKNOWN = 0
|
||||
|
||||
class Token:
|
||||
def __init__(self, tok: str):
|
||||
def __init__(self, tok: str, isroot = False):
|
||||
self.value = tok
|
||||
if isroot:
|
||||
self.type = TokenType.ROOT
|
||||
else:
|
||||
self.type = get_type(tok)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
@@ -69,6 +76,10 @@ def get_type(process: str) -> TokenType:
|
||||
return TokenType.GREATER
|
||||
case "<":
|
||||
return TokenType.LESSER
|
||||
case "(":
|
||||
return TokenType.PARAMOPEN
|
||||
case ")":
|
||||
return TokenType.PARAMCLOSE
|
||||
|
||||
# String/Char Literals
|
||||
if len(process) >= 2:
|
||||
|
Reference in New Issue
Block a user