AST is working!!!!

This commit is contained in:
SpookyDervish
2025-10-13 17:41:07 +11:00
parent 40d695729f
commit 985af13d6a
6 changed files with 219 additions and 6 deletions

64
AST.py
View File

@@ -40,5 +40,67 @@ class Program(Node):
def json(self) -> dict: def json(self) -> dict:
return { return {
"type": self.type().value, "type": self.type().value,
"statements": [] "statements": [{stmt.type().value: stmt.json()} for stmt in self.statements]
} }
# region Statements
class ExpressionStatement(Statement):
def __init__(self, expr: Expression = None) -> None:
self.expr: Expression = expr
def type(self) -> NodeType:
return NodeType.ExpressionStatement
def json(self) -> dict:
return {
"type": self.type().value,
"expr": self.expr.json()
}
# endregion
# region Expressions
class InfixExpression(Expression):
def __init__(self, left_node: Expression, operator: str, right_node: Expression = None) -> None:
self.left_node: Expression = left_node
self.operator: str = operator
self.right_node: Expression = right_node
def type(self) -> NodeType:
return NodeType.InfixExpression
def json(self) -> dict:
return {
"type": self.type().value,
"left_node": self.left_node.json(),
"operator": self.operator,
"right_node": self.right_node.json()
}
# endregion
# region Literals
class IntegerLiteral(Expression):
def __init__(self, value: int = None) -> None:
self.value: int = value
def type(self) -> NodeType:
return NodeType.IntegerLiteral
def json(self) -> dict:
return {
"type": self.type().value,
"value": self.value
}
class FloatLiteral(Expression):
def __init__(self, value: float = None) -> None:
self.value: float = value
def type(self) -> NodeType:
return NodeType.FloatLiteral
def json(self) -> dict:
return {
"type": self.type().value,
"value": self.value
}
# endregion

22
debug/ast.json Normal file
View File

@@ -0,0 +1,22 @@
{
"type": "Program",
"statements": [
{
"ExpressionStatement": {
"type": "ExpressionStatement",
"expr": {
"type": "InfixExpression",
"left_node": {
"type": "IntegerLiteral",
"value": 5
},
"operator": "+",
"right_node": {
"type": "IntegerLiteral",
"value": 5
}
}
}
}
]
}

View File

@@ -91,6 +91,8 @@ class Lexer:
tok = self.__new_token(TokenType.LCURLY, self.current_char) tok = self.__new_token(TokenType.LCURLY, self.current_char)
case "}": case "}":
tok = self.__new_token(TokenType.RCURLY, self.current_char) tok = self.__new_token(TokenType.RCURLY, self.current_char)
case ";":
tok = self.__new_token(TokenType.SEMICOLON, self.current_char)
case ":": case ":":
tok = self.__new_token(TokenType.COLON, self.current_char) tok = self.__new_token(TokenType.COLON, self.current_char)
case None: case None:

View File

@@ -27,6 +27,7 @@ class TokenType(Enum):
LCURLY = "LCURLY" LCURLY = "LCURLY"
RCURLY = "RCURLY" RCURLY = "RCURLY"
COLON = "COLON" COLON = "COLON"
SEMICOLON = "SEMICOLON"
class Token: class Token:
def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None: def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None:

16
main.py
View File

@@ -1,6 +1,10 @@
from lexer import Lexer from lexer import Lexer
from plasma_parser import Parser
from AST import Program
import json
LEXER_DEBUG: bool = True LEXER_DEBUG: bool = True
PARSER_DEBUG: bool = True
if __name__ == "__main__": if __name__ == "__main__":
@@ -11,3 +15,15 @@ if __name__ == "__main__":
debug_lex: Lexer = Lexer(source=code) debug_lex: Lexer = Lexer(source=code)
while debug_lex.current_char is not None: while debug_lex.current_char is not None:
print(debug_lex.next_token()) print(debug_lex.next_token())
l: Lexer = Lexer(source=code)
p: Parser = Parser(lexer=l)
if PARSER_DEBUG:
print("===== PARSER DEBUG =====")
program: Program = p.parse_program()
with open("debug/ast.json", "w") as f:
json.dump(program.json(), f, indent=4)
print("Wrote AST to debug/ast.json successfully.")

View File

@@ -3,6 +3,11 @@ from lexer_token import Token, TokenType
from typing import Callable from typing import Callable
from enum import Enum, auto from enum import Enum, auto
from AST import Statement, Expression, Program
from AST import ExpressionStatement
from AST import InfixExpression
from AST import IntegerLiteral, FloatLiteral
class PrecedenceType(Enum): class PrecedenceType(Enum):
P_LOWEST = 0 P_LOWEST = 0
P_EQUALS = auto() P_EQUALS = auto()
@@ -32,8 +37,23 @@ class Parser:
self.current_token: Token = None self.current_token: Token = None
self.peek_token: Token = None self.peek_token: Token = None
self.prefix_parse_functions: dict[Token, Callable] = {} # -1 self.prefix_parse_functions: dict[Token, Callable] = { # -1
self.infix_parse_functions: dict[Token, Callable] = {} # 5 + 5
TokenType.INT: self.__parse_int_literal,
TokenType.FLOAT: self.__parse_float_literal,
TokenType.LPAREN: self.__parse_grouped_expression
}
self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5
TokenType.PLUS: self.__parse_infix_expression,
TokenType.MINUS: self.__parse_infix_expression,
TokenType.SLASH: self.__parse_infix_expression,
TokenType.ASTERISK: self.__parse_infix_expression,
TokenType.POW: self.__parse_infix_expression,
TokenType.MODULUS: self.__parse_infix_expression
}
self.__next_token() self.__next_token()
self.__next_token() self.__next_token()
@@ -74,4 +94,94 @@ class Parser:
# endregion # endregion
def parse_program(self) -> None: def parse_program(self) -> None:
pass program: Program = Program()
while self.current_token.type != TokenType.EOF:
stmt: Statement = self.__parse_statement()
if stmt is not None:
program.statements.append(stmt)
self.__next_token()
return program
# region Statement Methods
def __parse_statement(self) -> Statement:
return self.__parse_expression_statement()
def __parse_expression_statement(self) -> ExpressionStatement:
expr = self.__parse_expression(PrecedenceType.P_LOWEST)
if self.__peek_token_is(TokenType.SEMICOLON):
self.__next_token()
stmt: ExpressionStatement = ExpressionStatement(expr=expr)
return stmt
# endregion
# region Expression Methods
def __parse_expression(self, precedence: PrecedenceType) -> Expression:
prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type)
if prefix_func is None:
self.__no_prefix_parse_function_error(self.current_token.type)
return None
left_expr: Expression = prefix_func()
while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value:
infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type)
if infix_func is None:
return left_expr
self.__next_token()
left_expr = infix_func(left_expr)
return left_expr
def __parse_infix_expression(self, left_node: Expression) -> Expression:
infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal)
precedence = self.__current_precedence()
self.__next_token()
infix_expr.right_node = self.__parse_expression(precedence)
return infix_expr
def __parse_grouped_expression(self) -> Expression:
self.__next_token()
expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.RPAREN):
return None
return expr
# endregion
# region Prefix Methods
def __parse_int_literal(self) -> Expression:
int_lit: IntegerLiteral = IntegerLiteral()
try:
int_lit.value = int(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.")
return None
return int_lit
def __parse_float_literal(self) -> Expression:
float_lit: FloatLiteral = FloatLiteral()
try:
float_lit.value = float(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.")
return None
return float_lit
# endregion