working on generating an AST with scope and stuff
This commit is contained in:
69
tokenizer.py
69
tokenizer.py
@@ -3,7 +3,6 @@ from typing import Any
|
||||
from enum import Enum
|
||||
from error import traceback
|
||||
from string import ascii_letters, digits, whitespace
|
||||
import sys
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
@@ -20,6 +19,7 @@ class TokenType(Enum):
|
||||
COMMENT = 11 # example: # hi there
|
||||
LINE_REFERENCE = 12 # example: %12
|
||||
LABEL_REFERENCE = 13 # example: %myLabel
|
||||
EOF = 14
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
@@ -42,10 +42,6 @@ def tokenize(input_string: str):
|
||||
"extern", "jump", "gettype", "exists", "setlist", "setlistat", "getlistat", "getlistsize",
|
||||
"listappend", "if"
|
||||
]
|
||||
types = [
|
||||
"string", "bool", "list", "char", "int", "double"
|
||||
]
|
||||
|
||||
|
||||
while pos < len(input_string):
|
||||
current_char = input_string[pos]
|
||||
@@ -73,17 +69,17 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char in digits or current_char == ".":
|
||||
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
elif current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
while pos < len(input_string):
|
||||
current_char = input_string[pos]
|
||||
@@ -113,17 +109,17 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a variable name, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char in digits or current_char == ".":
|
||||
traceback(input_string, "SyntaxError", "Variable names can't start with numbers.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
elif current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a variable name", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
while pos < len(input_string):
|
||||
current_char = input_string[pos]
|
||||
@@ -152,17 +148,17 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a function name, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char in digits or current_char == ".":
|
||||
traceback(input_string, "SyntaxError", "Function names can't start with numbers.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
elif current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a function name.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
while pos < len(input_string):
|
||||
current_char = input_string[pos]
|
||||
@@ -191,17 +187,17 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a list reference, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char in digits or current_char == ".":
|
||||
traceback(input_string, "SyntaxError", "List references can't start with numbers.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
elif current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a list reference.", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
while pos < len(input_string):
|
||||
current_char = input_string[pos]
|
||||
@@ -230,14 +226,14 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a type name, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a type", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
start_col = column
|
||||
while pos < len(input_string):
|
||||
@@ -255,14 +251,10 @@ def tokenize(input_string: str):
|
||||
pos += 1
|
||||
column += 1
|
||||
|
||||
if current_token in types:
|
||||
tokens.append(Token(
|
||||
TokenType.TYPE,
|
||||
value=current_token
|
||||
))
|
||||
else:
|
||||
traceback(input_string, "SyntaxError", f"\"{current_token}\" is not a valid type.", line, start_col, column)
|
||||
sys.exit(1)
|
||||
tokens.append(Token(
|
||||
TokenType.TYPE,
|
||||
value=current_token
|
||||
))
|
||||
|
||||
current_token = ""
|
||||
elif current_char == "@":
|
||||
@@ -271,14 +263,14 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a label decleration, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a label decleration", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
start_col = column
|
||||
while pos < len(input_string):
|
||||
@@ -308,12 +300,12 @@ def tokenize(input_string: str):
|
||||
|
||||
if len(input_string) == pos:
|
||||
traceback(input_string, "SyntaxError", "Expected a label or line reference, got <EOF>", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
if current_char == "\n":
|
||||
traceback(input_string, "SyntaxError", "Expected a label or line reference", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
start_col = column
|
||||
if current_char in digits or current_char == ".": # its a line number reference
|
||||
@@ -329,7 +321,7 @@ def tokenize(input_string: str):
|
||||
break
|
||||
if not current_char in digits: # random ass character in the middle of the line number
|
||||
traceback(input_string, "SyntaxError", "Malformed line number.", line, start_col, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
current_token += current_char
|
||||
pos += 1
|
||||
@@ -374,7 +366,7 @@ def tokenize(input_string: str):
|
||||
column += 1
|
||||
if pos > len(input_string)-1:
|
||||
traceback(input_string, "SyntaxError", f"String was never closed.", line, start_col, start_col)
|
||||
sys.exit(1)
|
||||
|
||||
current_char = input_string[pos]
|
||||
|
||||
tokens.append(Token(
|
||||
@@ -407,7 +399,7 @@ def tokenize(input_string: str):
|
||||
))
|
||||
else:
|
||||
traceback(input_string, "SyntaxError", f"\"{current_token}\" isn't a valid instruction.", line, start_col, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
current_token = ""
|
||||
elif current_char in digits:
|
||||
@@ -427,7 +419,7 @@ def tokenize(input_string: str):
|
||||
|
||||
if not current_char in digits:
|
||||
traceback(input_string, "SyntaxError", "Malformed number.", line, start_col, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
current_token += current_char
|
||||
|
||||
@@ -451,9 +443,14 @@ def tokenize(input_string: str):
|
||||
continue
|
||||
else:
|
||||
traceback(input_string, "SyntaxError", f"Unkown token \"{current_char}\"", line, column, column)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
column += 1
|
||||
pos += 1
|
||||
|
||||
tokens.append(Token(
|
||||
TokenType.EOF,
|
||||
None
|
||||
))
|
||||
|
||||
return tokens
|
Reference in New Issue
Block a user