working on generating an AST with scope and stuff

This commit is contained in:
SpookyDervish
2025-09-01 06:44:33 +10:00
parent 67fe809c57
commit eee9325ab8
6 changed files with 187 additions and 43 deletions

View File

@@ -3,7 +3,6 @@ from typing import Any
from enum import Enum
from error import traceback
from string import ascii_letters, digits, whitespace
import sys
class TokenType(Enum):
@@ -20,6 +19,7 @@ class TokenType(Enum):
COMMENT = 11 # example: # hi there
LINE_REFERENCE = 12 # example: %12
LABEL_REFERENCE = 13 # example: %myLabel
EOF = 14
@dataclass
class Token:
@@ -42,10 +42,6 @@ def tokenize(input_string: str):
"extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize",
"listappend", "if"
]
types = [
"string", "bool", "list", "char", "int", "double"
]
while pos < len(input_string):
current_char = input_string[pos]
@@ -73,17 +69,17 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
sys.exit(1)
while pos < len(input_string):
current_char = input_string[pos]
@@ -113,17 +109,17 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
sys.exit(1)
while pos < len(input_string):
current_char = input_string[pos]
@@ -152,17 +148,17 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column)
sys.exit(1)
while pos < len(input_string):
current_char = input_string[pos]
@@ -191,17 +187,17 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char in digits or current_char == ".":
traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column)
sys.exit(1)
elif current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column)
sys.exit(1)
while pos < len(input_string):
current_char = input_string[pos]
@@ -230,14 +226,14 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a type", line, column, column)
sys.exit(1)
start_col = column
while pos < len(input_string):
@@ -255,14 +251,10 @@ def tokenize(input_string: str):
pos += 1
column += 1
if current_token in types:
tokens.append(Token(
TokenType.TYPE,
value=current_token
))
else:
traceback(input_string, "SyntaxError", f"\"{current_token}\" is not a valid type.", line, start_col, column)
sys.exit(1)
tokens.append(Token(
TokenType.TYPE,
value=current_token
))
current_token = ""
elif current_char == "@":
@@ -271,14 +263,14 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column)
sys.exit(1)
start_col = column
while pos < len(input_string):
@@ -308,12 +300,12 @@ def tokenize(input_string: str):
if len(input_string) == pos:
traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column)
sys.exit(1)
current_char = input_string[pos]
if current_char == "\n":
traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column)
sys.exit(1)
start_col = column
if current_char in digits or current_char == ".": # its a line number reference
@@ -329,7 +321,7 @@ def tokenize(input_string: str):
break
if not current_char in digits: # random ass character in the middle of the line number
traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column)
sys.exit(1)
current_token += current_char
pos += 1
@@ -374,7 +366,7 @@ def tokenize(input_string: str):
column += 1
if pos > len(input_string)-1:
traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col)
sys.exit(1)
current_char = input_string[pos]
tokens.append(Token(
@@ -407,7 +399,7 @@ def tokenize(input_string: str):
))
else:
traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column)
sys.exit(1)
current_token = ""
elif current_char in digits:
@@ -427,7 +419,7 @@ def tokenize(input_string: str):
if not current_char in digits:
traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column)
sys.exit(1)
current_token += current_char
@@ -451,9 +443,14 @@ def tokenize(input_string: str):
continue
else:
traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column)
sys.exit(1)
column += 1
pos += 1
tokens.append(Token(
TokenType.EOF,
None
))
return tokens