working on generating an AST with scope and stuff

This commit is contained in:
SpookyDervish
2025-09-01 06:44:33 +10:00
parent 67fe809c57
commit eee9325ab8
6 changed files with 187 additions and 43 deletions

View File

@@ -1,4 +1,5 @@
from console import console from console import console
from sys import exit
def traceback(code: str, error_type: str, error_message: str, line: int | None = None, start_column: int | None = None, end_column: int | None = None): def traceback(code: str, error_type: str, error_message: str, line: int | None = None, start_column: int | None = None, end_column: int | None = None):
@@ -19,5 +20,5 @@ def traceback(code: str, error_type: str, error_message: str, line: int | None =
else: else:
console.print(f"[bold red]{error_type}: {error_message}") console.print(f"[bold red]{error_type}: {error_message}")
exit(1)

133
ground_ast.py Normal file
View File

@@ -0,0 +1,133 @@
from __future__ import annotations
from dataclasses import dataclass
from tokenizer import Token, TokenType
from typing import Optional, Any
from error import traceback
@dataclass
class RootNode:
statements: list[Any]
@dataclass
class InstructionNode:
instruction: str
parent: FunctionNode | RootNode
arguments: list[Any]
@dataclass
class StringNode:
value: str
@dataclass
class NumberNode:
value: float
@dataclass
class VarRefNode:
var_name: str
@dataclass
class VarPointerNode:
var_name: str
@dataclass
class FunctionCallNode:
func_name: str
@dataclass
class TypeNode:
value: str
@dataclass
class ArgNode:
arg_type: str
name: str | None
parent: FunctionNode
@dataclass
class FunctionNode:
args: list[ArgNode]
statements: list[Any]
parent: FunctionNode | RootNode
return_type: Optional[str] = None
name: Optional[str] = None
def generate_ast(tokens: list[Token], code: str) -> RootNode:
root_node = RootNode([])
current_node = None
last_token = None
current_node_type = None
scope = root_node
# todo: this is the absolute WORST way i could do this, but i could not care less lmao
# its not even performant......
for token in tokens:
print(token)
if token.type == TokenType.INSTRUCTION:
if current_node:
scope.statements.append(current_node)
if token.value != "fun":
if current_node_type == "func":
scope = current_node
current_node = InstructionNode(token.value, scope, [])
current_node_type = "inst"
if current_node.instruction == "endfun":
scope = scope.parent # go up one scope
current_node.parent = scope
else:
current_node = FunctionNode([], [], scope)
current_node_type = "func"
if current_node:
if token.type == TokenType.STRING:
if current_node_type == "inst":
current_node.arguments.append(StringNode(token.value))
else:
traceback(code, "SyntaxError", "Expected instruction, not string.")
elif token.type == TokenType.INTEGER or token.type == TokenType.FLOAT:
if current_node_type == "inst":
current_node.arguments.append(NumberNode(token.value))
else:
traceback(code, "SyntaxError", "Expected instruction, not number.")
elif token.type == TokenType.VARIABLE_POINTER:
if current_node_type == "inst":
current_node.arguments.append(VarPointerNode(token.value))
elif last_token and last_token.type == TokenType.TYPE and current_node_type == "func":
print(current_node)
current_node.args[-1].name = token.value
else:
traceback(code, "SyntaxError", "Expected instruction, not variable pointer.")
elif token.type == TokenType.VARIABLE_REFERENCE:
if current_node_type == "inst":
current_node.arguments.append(VarRefNode(token.value))
else:
traceback(code, "SyntaxError", "Expected instruction, not variable reference.")
elif token.type == TokenType.TYPE:
if current_node_type == "inst":
current_node.arguments.append(TypeNode(token.value))
elif current_node_type == "func":
if last_token and last_token.type == TokenType.FUNCTION_REFERENCE or current_node.return_type:
current_node.args.append(ArgNode(
arg_type=token.value,
name=None,
parent=current_node
))
else:
current_node.return_type = token.value
else:
traceback(code, "SyntaxError", "Expected instruction, not type.")
elif token.type == TokenType.FUNCTION_REFERENCE:
if last_token and last_token.type == TokenType.TYPE and current_node_type == "func":
current_node.name = token.value
elif current_node_type == "inst":
current_node.arguments.append(FunctionCallNode(token.value))
else:
traceback(code, "SyntaxError", "Expected instruction or function return type, got function reference.")
elif token.type == TokenType.EOF:
root_node.statements.append(current_node)
last_token = token
return root_node

3
ground_types.py Normal file
View File

@@ -0,0 +1,3 @@
class String:
def __init__(self, value: str):
self.value = value

View File

@@ -1,17 +1,19 @@
from tokenizer import tokenize from tokenizer import tokenize
from ground_ast import generate_ast
from rich import print from rich import print
from time import time from time import time
def main(): def main():
start = time() start = time()
file = open("test.grnd", "r") file = open("test2.grnd", "r")
code = file.read() code = file.read()
file.close() file.close()
tokens = tokenize(code) tokens = tokenize(code)
ast = generate_ast(tokens, code)
compile_time = time()-start compile_time = time()-start
print(tokens) print(ast)
print(f"Compiled in {compile_time} seconds.") print(f"Compiled in {compile_time} seconds.")

View File

@@ -1,4 +1,12 @@
set &x 0 set &myName "Nathaniel"
@loop set &myAge 10
add $x 1 &x
jump %loop fun -list !split -string &str -string &determiner
set &x 2
set &y 5
add $x $y &x
stdlnout $x
endfun
# should error
stdlnout $x

View File

@@ -3,7 +3,6 @@ from typing import Any
from enum import Enum from enum import Enum
from error import traceback from error import traceback
from string import ascii_letters, digits, whitespace from string import ascii_letters, digits, whitespace
import sys
class TokenType(Enum): class TokenType(Enum):
@@ -20,6 +19,7 @@ class TokenType(Enum):
COMMENT = 11 # example: # hi there COMMENT = 11 # example: # hi there
LINE_REFERENCE = 12 # example: %12 LINE_REFERENCE = 12 # example: %12
LABEL_REFERENCE = 13 # example: %myLabel LABEL_REFERENCE = 13 # example: %myLabel
EOF = 14
@dataclass @dataclass
class Token: class Token:
@@ -42,10 +42,6 @@ def tokenize(input_string: str):
"extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize", "extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize",
"listappend", "if" "listappend", "if"
] ]
types = [
"string", "bool", "list", "char", "int", "double"
]
while pos < len(input_string): while pos < len(input_string):
current_char = input_string[pos] current_char = input_string[pos]
@@ -73,17 +69,17 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char in digits or current_char == ".": if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column) traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n": elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column) traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
sys.exit(1)
while pos < len(input_string): while pos < len(input_string):
current_char = input_string[pos] current_char = input_string[pos]
@@ -113,17 +109,17 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char in digits or current_char == ".": if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column) traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n": elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column) traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
sys.exit(1)
while pos < len(input_string): while pos < len(input_string):
current_char = input_string[pos] current_char = input_string[pos]
@@ -152,17 +148,17 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char in digits or current_char == ".": if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column) traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n": elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column) traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column)
sys.exit(1)
while pos < len(input_string): while pos < len(input_string):
current_char = input_string[pos] current_char = input_string[pos]
@@ -191,17 +187,17 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char in digits or current_char == ".": if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column) traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n": elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column) traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column)
sys.exit(1)
while pos < len(input_string): while pos < len(input_string):
current_char = input_string[pos] current_char = input_string[pos]
@@ -230,14 +226,14 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char == "\n": if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a type", line, column, column) traceback(input_string, "SyntaxError", "Expected a type", line, column, column)
sys.exit(1)
start_col = column start_col = column
while pos < len(input_string): while pos < len(input_string):
@@ -255,14 +251,10 @@ def tokenize(input_string: str):
pos += 1 pos += 1
column += 1 column += 1
if current_token in types: tokens.append(Token(
tokens.append(Token( TokenType.TYPE,
TokenType.TYPE, value=current_token
value=current_token ))
))
else:
traceback(input_string, "SyntaxError", f"\"{current_token}\" is not a valid type.", line, start_col, column)
sys.exit(1)
current_token = "" current_token = ""
elif current_char == "@": elif current_char == "@":
@@ -271,14 +263,14 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char == "\n": if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column) traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column)
sys.exit(1)
start_col = column start_col = column
while pos < len(input_string): while pos < len(input_string):
@@ -308,12 +300,12 @@ def tokenize(input_string: str):
if len(input_string) == pos: if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column) traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
if current_char == "\n": if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column) traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column)
sys.exit(1)
start_col = column start_col = column
if current_char in digits or current_char == ".": # its a line number reference if current_char in digits or current_char == ".": # its a line number reference
@@ -329,7 +321,7 @@ def tokenize(input_string: str):
break break
if not current_char in digits: # random ass character in the middle of the line number if not current_char in digits: # random ass character in the middle of the line number
traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column) traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column)
sys.exit(1)
current_token += current_char current_token += current_char
pos += 1 pos += 1
@@ -374,7 +366,7 @@ def tokenize(input_string: str):
column += 1 column += 1
if pos > len(input_string)-1: if pos > len(input_string)-1:
traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col) traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col)
sys.exit(1)
current_char = input_string[pos] current_char = input_string[pos]
tokens.append(Token( tokens.append(Token(
@@ -407,7 +399,7 @@ def tokenize(input_string: str):
)) ))
else: else:
traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column) traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column)
sys.exit(1)
current_token = "" current_token = ""
elif current_char in digits: elif current_char in digits:
@@ -427,7 +419,7 @@ def tokenize(input_string: str):
if not current_char in digits: if not current_char in digits:
traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column) traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column)
sys.exit(1)
current_token += current_char current_token += current_char
@@ -451,9 +443,14 @@ def tokenize(input_string: str):
continue continue
else: else:
traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column) traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column)
sys.exit(1)
column += 1 column += 1
pos += 1 pos += 1
tokens.append(Token(
TokenType.EOF,
None
))
return tokens return tokens