working on generating an AST with scope and stuff
This commit is contained in:
3
error.py
3
error.py
@@ -1,4 +1,5 @@
|
|||||||
from console import console
|
from console import console
|
||||||
|
from sys import exit
|
||||||
|
|
||||||
|
|
||||||
def traceback(code: str, error_type: str, error_message: str, line: int | None = None, start_column: int | None = None, end_column: int | None = None):
|
def traceback(code: str, error_type: str, error_message: str, line: int | None = None, start_column: int | None = None, end_column: int | None = None):
|
||||||
@@ -19,5 +20,5 @@ def traceback(code: str, error_type: str, error_message: str, line: int | None =
|
|||||||
else:
|
else:
|
||||||
console.print(f"[bold red]{error_type}: {error_message}")
|
console.print(f"[bold red]{error_type}: {error_message}")
|
||||||
|
|
||||||
|
exit(1)
|
||||||
|
|
133
ground_ast.py
Normal file
133
ground_ast.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from tokenizer import Token, TokenType
|
||||||
|
from typing import Optional, Any
|
||||||
|
from error import traceback
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RootNode:
|
||||||
|
statements: list[Any]
|
||||||
|
@dataclass
|
||||||
|
class InstructionNode:
|
||||||
|
instruction: str
|
||||||
|
parent: FunctionNode | RootNode
|
||||||
|
arguments: list[Any]
|
||||||
|
@dataclass
|
||||||
|
class StringNode:
|
||||||
|
value: str
|
||||||
|
@dataclass
|
||||||
|
class NumberNode:
|
||||||
|
value: float
|
||||||
|
@dataclass
|
||||||
|
class VarRefNode:
|
||||||
|
var_name: str
|
||||||
|
@dataclass
|
||||||
|
class VarPointerNode:
|
||||||
|
var_name: str
|
||||||
|
@dataclass
|
||||||
|
class FunctionCallNode:
|
||||||
|
func_name: str
|
||||||
|
@dataclass
|
||||||
|
class TypeNode:
|
||||||
|
value: str
|
||||||
|
@dataclass
|
||||||
|
class ArgNode:
|
||||||
|
arg_type: str
|
||||||
|
name: str | None
|
||||||
|
parent: FunctionNode
|
||||||
|
@dataclass
|
||||||
|
class FunctionNode:
|
||||||
|
args: list[ArgNode]
|
||||||
|
statements: list[Any]
|
||||||
|
parent: FunctionNode | RootNode
|
||||||
|
return_type: Optional[str] = None
|
||||||
|
name: Optional[str] = None
|
||||||
|
|
||||||
|
def generate_ast(tokens: list[Token], code: str) -> RootNode:
|
||||||
|
root_node = RootNode([])
|
||||||
|
|
||||||
|
current_node = None
|
||||||
|
last_token = None
|
||||||
|
current_node_type = None
|
||||||
|
scope = root_node
|
||||||
|
|
||||||
|
# todo: this is the absolute WORST way i could do this, but i could not care less lmao
|
||||||
|
# its not even performant......
|
||||||
|
for token in tokens:
|
||||||
|
print(token)
|
||||||
|
if token.type == TokenType.INSTRUCTION:
|
||||||
|
if current_node:
|
||||||
|
scope.statements.append(current_node)
|
||||||
|
|
||||||
|
if token.value != "fun":
|
||||||
|
if current_node_type == "func":
|
||||||
|
scope = current_node
|
||||||
|
|
||||||
|
current_node = InstructionNode(token.value, scope, [])
|
||||||
|
current_node_type = "inst"
|
||||||
|
|
||||||
|
if current_node.instruction == "endfun":
|
||||||
|
scope = scope.parent # go up one scope
|
||||||
|
|
||||||
|
current_node.parent = scope
|
||||||
|
else:
|
||||||
|
current_node = FunctionNode([], [], scope)
|
||||||
|
current_node_type = "func"
|
||||||
|
|
||||||
|
if current_node:
|
||||||
|
if token.type == TokenType.STRING:
|
||||||
|
if current_node_type == "inst":
|
||||||
|
current_node.arguments.append(StringNode(token.value))
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction, not string.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.INTEGER or token.type == TokenType.FLOAT:
|
||||||
|
if current_node_type == "inst":
|
||||||
|
current_node.arguments.append(NumberNode(token.value))
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction, not number.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.VARIABLE_POINTER:
|
||||||
|
if current_node_type == "inst":
|
||||||
|
current_node.arguments.append(VarPointerNode(token.value))
|
||||||
|
elif last_token and last_token.type == TokenType.TYPE and current_node_type == "func":
|
||||||
|
print(current_node)
|
||||||
|
current_node.args[-1].name = token.value
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction, not variable pointer.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.VARIABLE_REFERENCE:
|
||||||
|
if current_node_type == "inst":
|
||||||
|
current_node.arguments.append(VarRefNode(token.value))
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction, not variable reference.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.TYPE:
|
||||||
|
if current_node_type == "inst":
|
||||||
|
current_node.arguments.append(TypeNode(token.value))
|
||||||
|
elif current_node_type == "func":
|
||||||
|
if last_token and last_token.type == TokenType.FUNCTION_REFERENCE or current_node.return_type:
|
||||||
|
current_node.args.append(ArgNode(
|
||||||
|
arg_type=token.value,
|
||||||
|
name=None,
|
||||||
|
parent=current_node
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
current_node.return_type = token.value
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction, not type.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.FUNCTION_REFERENCE:
|
||||||
|
if last_token and last_token.type == TokenType.TYPE and current_node_type == "func":
|
||||||
|
current_node.name = token.value
|
||||||
|
elif current_node_type == "inst":
|
||||||
|
current_node.arguments.append(FunctionCallNode(token.value))
|
||||||
|
else:
|
||||||
|
traceback(code, "SyntaxError", "Expected instruction or function return type, got function reference.")
|
||||||
|
|
||||||
|
elif token.type == TokenType.EOF:
|
||||||
|
root_node.statements.append(current_node)
|
||||||
|
last_token = token
|
||||||
|
|
||||||
|
return root_node
|
3
ground_types.py
Normal file
3
ground_types.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
class String:
|
||||||
|
def __init__(self, value: str):
|
||||||
|
self.value = value
|
6
main.py
6
main.py
@@ -1,17 +1,19 @@
|
|||||||
from tokenizer import tokenize
|
from tokenizer import tokenize
|
||||||
|
from ground_ast import generate_ast
|
||||||
from rich import print
|
from rich import print
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
start = time()
|
start = time()
|
||||||
file = open("test.grnd", "r")
|
file = open("test2.grnd", "r")
|
||||||
code = file.read()
|
code = file.read()
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
tokens = tokenize(code)
|
tokens = tokenize(code)
|
||||||
|
ast = generate_ast(tokens, code)
|
||||||
compile_time = time()-start
|
compile_time = time()-start
|
||||||
print(tokens)
|
print(ast)
|
||||||
print(f"Compiled in {compile_time} seconds.")
|
print(f"Compiled in {compile_time} seconds.")
|
||||||
|
|
||||||
|
|
||||||
|
16
test2.grnd
16
test2.grnd
@@ -1,4 +1,12 @@
|
|||||||
set &x 0
|
set &myName "Nathaniel"
|
||||||
@loop
|
set &myAge 10
|
||||||
add $x 1 &x
|
|
||||||
jump %loop
|
fun -list !split -string &str -string &determiner
|
||||||
|
set &x 2
|
||||||
|
set &y 5
|
||||||
|
add $x $y &x
|
||||||
|
stdlnout $x
|
||||||
|
endfun
|
||||||
|
|
||||||
|
# should error
|
||||||
|
stdlnout $x
|
61
tokenizer.py
61
tokenizer.py
@@ -3,7 +3,6 @@ from typing import Any
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from error import traceback
|
from error import traceback
|
||||||
from string import ascii_letters, digits, whitespace
|
from string import ascii_letters, digits, whitespace
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class TokenType(Enum):
|
class TokenType(Enum):
|
||||||
@@ -20,6 +19,7 @@ class TokenType(Enum):
|
|||||||
COMMENT = 11 # example: # hi there
|
COMMENT = 11 # example: # hi there
|
||||||
LINE_REFERENCE = 12 # example: %12
|
LINE_REFERENCE = 12 # example: %12
|
||||||
LABEL_REFERENCE = 13 # example: %myLabel
|
LABEL_REFERENCE = 13 # example: %myLabel
|
||||||
|
EOF = 14
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Token:
|
class Token:
|
||||||
@@ -42,10 +42,6 @@ def tokenize(input_string: str):
|
|||||||
"extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize",
|
"extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize",
|
||||||
"listappend", "if"
|
"listappend", "if"
|
||||||
]
|
]
|
||||||
types = [
|
|
||||||
"string", "bool", "list", "char", "int", "double"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
@@ -73,17 +69,17 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char in digits or current_char == ".":
|
if current_char in digits or current_char == ".":
|
||||||
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
elif current_char == "\n":
|
elif current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
@@ -113,17 +109,17 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char in digits or current_char == ".":
|
if current_char in digits or current_char == ".":
|
||||||
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
elif current_char == "\n":
|
elif current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
@@ -152,17 +148,17 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char in digits or current_char == ".":
|
if current_char in digits or current_char == ".":
|
||||||
traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column)
|
traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
elif current_char == "\n":
|
elif current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
@@ -191,17 +187,17 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char in digits or current_char == ".":
|
if current_char in digits or current_char == ".":
|
||||||
traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column)
|
traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
elif current_char == "\n":
|
elif current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
@@ -230,14 +226,14 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char == "\n":
|
if current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a type", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a type", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
start_col = column
|
start_col = column
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
@@ -255,14 +251,10 @@ def tokenize(input_string: str):
|
|||||||
pos += 1
|
pos += 1
|
||||||
column += 1
|
column += 1
|
||||||
|
|
||||||
if current_token in types:
|
|
||||||
tokens.append(Token(
|
tokens.append(Token(
|
||||||
TokenType.TYPE,
|
TokenType.TYPE,
|
||||||
value=current_token
|
value=current_token
|
||||||
))
|
))
|
||||||
else:
|
|
||||||
traceback(input_string, "SyntaxError", f"\"{current_token}\" is not a valid type.", line, start_col, column)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
current_token = ""
|
current_token = ""
|
||||||
elif current_char == "@":
|
elif current_char == "@":
|
||||||
@@ -271,14 +263,14 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char == "\n":
|
if current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
start_col = column
|
start_col = column
|
||||||
while pos < len(input_string):
|
while pos < len(input_string):
|
||||||
@@ -308,12 +300,12 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if len(input_string) == pos:
|
if len(input_string) == pos:
|
||||||
traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
if current_char == "\n":
|
if current_char == "\n":
|
||||||
traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column)
|
traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
start_col = column
|
start_col = column
|
||||||
if current_char in digits or current_char == ".": # its a line number reference
|
if current_char in digits or current_char == ".": # its a line number reference
|
||||||
@@ -329,7 +321,7 @@ def tokenize(input_string: str):
|
|||||||
break
|
break
|
||||||
if not current_char in digits: # random ass character in the middle of the line number
|
if not current_char in digits: # random ass character in the middle of the line number
|
||||||
traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column)
|
traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
current_token += current_char
|
current_token += current_char
|
||||||
pos += 1
|
pos += 1
|
||||||
@@ -374,7 +366,7 @@ def tokenize(input_string: str):
|
|||||||
column += 1
|
column += 1
|
||||||
if pos > len(input_string)-1:
|
if pos > len(input_string)-1:
|
||||||
traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col)
|
traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col)
|
||||||
sys.exit(1)
|
|
||||||
current_char = input_string[pos]
|
current_char = input_string[pos]
|
||||||
|
|
||||||
tokens.append(Token(
|
tokens.append(Token(
|
||||||
@@ -407,7 +399,7 @@ def tokenize(input_string: str):
|
|||||||
))
|
))
|
||||||
else:
|
else:
|
||||||
traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column)
|
traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
current_token = ""
|
current_token = ""
|
||||||
elif current_char in digits:
|
elif current_char in digits:
|
||||||
@@ -427,7 +419,7 @@ def tokenize(input_string: str):
|
|||||||
|
|
||||||
if not current_char in digits:
|
if not current_char in digits:
|
||||||
traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column)
|
traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
current_token += current_char
|
current_token += current_char
|
||||||
|
|
||||||
@@ -451,9 +443,14 @@ def tokenize(input_string: str):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column)
|
traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column)
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
column += 1
|
column += 1
|
||||||
pos += 1
|
pos += 1
|
||||||
|
|
||||||
|
tokens.append(Token(
|
||||||
|
TokenType.EOF,
|
||||||
|
None
|
||||||
|
))
|
||||||
|
|
||||||
return tokens
|
return tokens
|
Reference in New Issue
Block a user