2025-10-13 07:07:32 +11:00
|
|
|
from lexer import Lexer
|
|
|
|
|
from lexer_token import Token, TokenType
|
|
|
|
|
from typing import Callable
|
|
|
|
|
from enum import Enum, auto
|
|
|
|
|
|
2025-10-13 17:41:07 +11:00
|
|
|
from AST import Statement, Expression, Program
|
2025-10-14 21:23:11 +11:00
|
|
|
from AST import ExpressionStatement, AssignmentStatement, FunctionStatement, ReturnStatement, BlockStatement, ReassignStatement, IfStatement
|
2025-10-13 17:41:07 +11:00
|
|
|
from AST import InfixExpression
|
2025-10-14 21:23:11 +11:00
|
|
|
from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral
|
2025-10-13 17:41:07 +11:00
|
|
|
|
2025-10-13 07:07:32 +11:00
|
|
|
class PrecedenceType(Enum):
|
|
|
|
|
P_LOWEST = 0
|
|
|
|
|
P_EQUALS = auto()
|
|
|
|
|
P_LESSGREATER = auto()
|
|
|
|
|
P_SUM = auto()
|
|
|
|
|
P_PRODUCT = auto()
|
|
|
|
|
P_EXPONENT = auto()
|
|
|
|
|
P_PREFIX = auto()
|
|
|
|
|
P_CALL = auto()
|
|
|
|
|
P_INDEX = auto()
|
|
|
|
|
|
|
|
|
|
PRECEDENCES: dict[TokenType, PrecedenceType] = {
|
|
|
|
|
TokenType.PLUS: PrecedenceType.P_SUM,
|
|
|
|
|
TokenType.MINUS: PrecedenceType.P_SUM,
|
|
|
|
|
TokenType.ASTERISK: PrecedenceType.P_PRODUCT,
|
|
|
|
|
TokenType.SLASH: PrecedenceType.P_PRODUCT,
|
|
|
|
|
TokenType.MODULUS: PrecedenceType.P_PRODUCT,
|
2025-10-14 21:23:11 +11:00
|
|
|
TokenType.POW: PrecedenceType.P_EXPONENT,
|
|
|
|
|
TokenType.EQ_EQ: PrecedenceType.P_EQUALS,
|
|
|
|
|
TokenType.NOT_EQ: PrecedenceType.P_EQUALS,
|
|
|
|
|
TokenType.LT: PrecedenceType.P_LESSGREATER,
|
|
|
|
|
TokenType.GT: PrecedenceType.P_LESSGREATER,
|
|
|
|
|
TokenType.LT_EQ: PrecedenceType.P_LESSGREATER,
|
|
|
|
|
TokenType.GT_EQ: PrecedenceType.P_LESSGREATER
|
2025-10-13 07:07:32 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Parser:
|
|
|
|
|
def __init__(self, lexer: Lexer) -> None:
|
|
|
|
|
self.lexer: Lexer = lexer
|
|
|
|
|
|
|
|
|
|
self.errors: list[str] = []
|
|
|
|
|
|
|
|
|
|
self.current_token: Token = None
|
|
|
|
|
self.peek_token: Token = None
|
|
|
|
|
|
2025-10-13 17:41:07 +11:00
|
|
|
self.prefix_parse_functions: dict[Token, Callable] = { # -1
|
2025-10-14 07:14:53 +11:00
|
|
|
TokenType.IDENT: self.__parse_identifier,
|
2025-10-13 17:41:07 +11:00
|
|
|
TokenType.INT: self.__parse_int_literal,
|
|
|
|
|
TokenType.FLOAT: self.__parse_float_literal,
|
2025-10-14 21:23:11 +11:00
|
|
|
TokenType.LPAREN: self.__parse_grouped_expression,
|
|
|
|
|
TokenType.IF: self.__parse_if_statement,
|
|
|
|
|
TokenType.TRUE: self.__parse_boolean,
|
|
|
|
|
TokenType.FALSE: self.__parse_boolean,
|
2025-10-13 17:41:07 +11:00
|
|
|
}
|
|
|
|
|
self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5
|
|
|
|
|
TokenType.PLUS: self.__parse_infix_expression,
|
|
|
|
|
TokenType.MINUS: self.__parse_infix_expression,
|
|
|
|
|
TokenType.SLASH: self.__parse_infix_expression,
|
|
|
|
|
TokenType.ASTERISK: self.__parse_infix_expression,
|
|
|
|
|
TokenType.POW: self.__parse_infix_expression,
|
2025-10-14 21:23:11 +11:00
|
|
|
TokenType.MODULUS: self.__parse_infix_expression,
|
|
|
|
|
TokenType.EQ_EQ: self.__parse_infix_expression,
|
|
|
|
|
TokenType.NOT_EQ: self.__parse_infix_expression,
|
|
|
|
|
TokenType.LT: self.__parse_infix_expression,
|
|
|
|
|
TokenType.GT: self.__parse_infix_expression,
|
|
|
|
|
TokenType.LT_EQ: self.__parse_infix_expression,
|
|
|
|
|
TokenType.GT_EQ: self.__parse_infix_expression,
|
2025-10-13 17:41:07 +11:00
|
|
|
}
|
2025-10-13 07:07:32 +11:00
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
# region Parser helpers
|
|
|
|
|
def __next_token(self) -> None:
|
|
|
|
|
self.current_token = self.peek_token
|
|
|
|
|
self.peek_token = self.lexer.next_token()
|
|
|
|
|
|
2025-10-13 21:05:03 +11:00
|
|
|
def __current_token_is(self, tt: TokenType) -> bool:
|
|
|
|
|
return self.current_token.type == tt
|
|
|
|
|
|
2025-10-13 07:07:32 +11:00
|
|
|
def __peek_token_is(self, tt: TokenType) -> bool:
|
|
|
|
|
return self.peek_token.type == tt
|
|
|
|
|
|
|
|
|
|
def __expect_peek(self, tt: TokenType) -> bool:
|
|
|
|
|
if self.__peek_token_is(tt):
|
|
|
|
|
self.__next_token()
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
self.__peek_error(tt)
|
|
|
|
|
return False
|
|
|
|
|
|
2025-10-13 13:09:48 +11:00
|
|
|
def __current_precedence(self) -> PrecedenceType:
|
|
|
|
|
prec = PRECEDENCES.get(self.current_token.type)
|
|
|
|
|
if prec is None:
|
|
|
|
|
return PrecedenceType.P_LOWEST
|
|
|
|
|
return prec
|
|
|
|
|
|
|
|
|
|
def __peek_precedence(self) -> PrecedenceType:
|
|
|
|
|
prec = PRECEDENCES.get(self.peek_token.type)
|
|
|
|
|
if prec is None:
|
|
|
|
|
return PrecedenceType.P_LOWEST
|
|
|
|
|
return prec
|
|
|
|
|
|
2025-10-13 07:07:32 +11:00
|
|
|
def __peek_error(self, tt: TokenType):
|
|
|
|
|
self.errors.append(f"Expected next token to be {tt}, got {self.peek_token.type} instead.")
|
|
|
|
|
|
|
|
|
|
def __no_prefix_parse_function_error(self, tt: TokenType):
|
|
|
|
|
self.errors.append(f"No Prefix Parse Function for {tt} found.")
|
2025-10-13 13:09:48 +11:00
|
|
|
# endregion
|
|
|
|
|
|
|
|
|
|
def parse_program(self) -> None:
|
2025-10-13 17:41:07 +11:00
|
|
|
program: Program = Program()
|
|
|
|
|
|
|
|
|
|
while self.current_token.type != TokenType.EOF:
|
|
|
|
|
stmt: Statement = self.__parse_statement()
|
|
|
|
|
|
|
|
|
|
if stmt is not None:
|
|
|
|
|
program.statements.append(stmt)
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
return program
|
|
|
|
|
|
|
|
|
|
# region Statement Methods
|
|
|
|
|
def __parse_statement(self) -> Statement:
|
2025-10-13 21:05:03 +11:00
|
|
|
match self.current_token.type:
|
|
|
|
|
case TokenType.IDENT:
|
|
|
|
|
return self.__parse_assignment_statement()
|
2025-10-14 07:14:53 +11:00
|
|
|
case TokenType.RETURN:
|
|
|
|
|
return self.__parse_return_statement()
|
2025-10-13 21:05:03 +11:00
|
|
|
case _:
|
|
|
|
|
return self.__parse_expression_statement()
|
2025-10-14 07:14:53 +11:00
|
|
|
|
2025-10-13 17:41:07 +11:00
|
|
|
def __parse_expression_statement(self) -> ExpressionStatement:
|
|
|
|
|
expr = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
if self.__peek_token_is(TokenType.SEMICOLON):
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
stmt: ExpressionStatement = ExpressionStatement(expr=expr)
|
|
|
|
|
|
|
|
|
|
return stmt
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
def __parse_assignment_statement(self) -> AssignmentStatement:
|
|
|
|
|
# x: Int = 10;
|
|
|
|
|
stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal))
|
|
|
|
|
|
2025-10-14 07:14:53 +11:00
|
|
|
if self.__peek_token_is(TokenType.EQ): # function definition
|
|
|
|
|
# x = Func(): Int { return 10; }
|
|
|
|
|
|
2025-10-14 19:22:59 +11:00
|
|
|
self.__next_token()
|
2025-10-14 07:14:53 +11:00
|
|
|
|
2025-10-14 19:22:59 +11:00
|
|
|
if self.__peek_token_is(TokenType.TYPE):
|
|
|
|
|
func_stmt: FunctionStatement = FunctionStatement(name=stmt.name)
|
|
|
|
|
|
|
|
|
|
if self.peek_token.literal != "Func":
|
|
|
|
|
self.errors.append(f"Expected next token to be \"Func\", got {self.current_token.literal} instead.")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.LPAREN):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
func_stmt.parameters = []
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.RPAREN):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.COLON):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.TYPE):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
func_stmt.return_type = self.current_token.literal
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.LBRACE):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
func_stmt.body = self.__parse_block_statement()
|
|
|
|
|
|
|
|
|
|
return func_stmt
|
|
|
|
|
else: # reassignment statement
|
|
|
|
|
assign_stmt: ReassignStatement = ReassignStatement()
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
assign_stmt.ident = stmt.name
|
|
|
|
|
assign_stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
return assign_stmt
|
2025-10-14 07:14:53 +11:00
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
if not self.__expect_peek(TokenType.COLON):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.TYPE):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
stmt.value_type = self.current_token.literal
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.EQ):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
return stmt
|
|
|
|
|
|
|
|
|
|
def __parse_return_statement(self) -> ReturnStatement:
|
|
|
|
|
stmt: ReturnStatement = ReturnStatement()
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.SEMICOLON):
|
2025-10-13 21:05:03 +11:00
|
|
|
return None
|
|
|
|
|
|
2025-10-14 07:14:53 +11:00
|
|
|
return stmt
|
|
|
|
|
|
|
|
|
|
def __parse_block_statement(self) -> BlockStatement:
|
|
|
|
|
block_stmt: BlockStatement = BlockStatement()
|
|
|
|
|
|
2025-10-13 21:05:03 +11:00
|
|
|
self.__next_token()
|
|
|
|
|
|
2025-10-14 07:14:53 +11:00
|
|
|
while not self.__current_token_is(TokenType.RBRACE) and not self.__current_token_is(TokenType.EOF):
|
|
|
|
|
stmt: Statement = self.__parse_statement()
|
|
|
|
|
if stmt is not None:
|
|
|
|
|
block_stmt.statements.append(stmt)
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
2025-10-14 07:14:53 +11:00
|
|
|
return block_stmt
|
2025-10-14 21:23:11 +11:00
|
|
|
|
|
|
|
|
def __parse_if_statement(self) -> IfStatement:
|
|
|
|
|
condition: Expression = None
|
|
|
|
|
consequence: BlockStatement = None
|
|
|
|
|
alternative: BlockStatement = None
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
condition = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.LBRACE):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
consequence = self.__parse_block_statement()
|
|
|
|
|
|
|
|
|
|
if self.__peek_token_is(TokenType.UNLESS):
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.LBRACE):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
alternative = self.__parse_block_statement()
|
|
|
|
|
|
|
|
|
|
return IfStatement(condition, consequence, alternative)
|
2025-10-13 17:41:07 +11:00
|
|
|
# endregion
|
|
|
|
|
|
|
|
|
|
# region Expression Methods
|
|
|
|
|
def __parse_expression(self, precedence: PrecedenceType) -> Expression:
|
|
|
|
|
prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type)
|
|
|
|
|
if prefix_func is None:
|
|
|
|
|
self.__no_prefix_parse_function_error(self.current_token.type)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
left_expr: Expression = prefix_func()
|
|
|
|
|
while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value:
|
|
|
|
|
infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type)
|
|
|
|
|
if infix_func is None:
|
|
|
|
|
return left_expr
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
left_expr = infix_func(left_expr)
|
|
|
|
|
|
|
|
|
|
return left_expr
|
|
|
|
|
|
|
|
|
|
def __parse_infix_expression(self, left_node: Expression) -> Expression:
|
|
|
|
|
infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal)
|
|
|
|
|
|
|
|
|
|
precedence = self.__current_precedence()
|
|
|
|
|
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
infix_expr.right_node = self.__parse_expression(precedence)
|
|
|
|
|
|
|
|
|
|
return infix_expr
|
|
|
|
|
|
|
|
|
|
def __parse_grouped_expression(self) -> Expression:
|
|
|
|
|
self.__next_token()
|
|
|
|
|
|
|
|
|
|
expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST)
|
|
|
|
|
|
|
|
|
|
if not self.__expect_peek(TokenType.RPAREN):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return expr
|
|
|
|
|
# endregion
|
|
|
|
|
|
|
|
|
|
# region Prefix Methods
|
2025-10-14 07:14:53 +11:00
|
|
|
def __parse_identifier(self) -> IdentifierLiteral:
|
|
|
|
|
return IdentifierLiteral(value=self.current_token.literal)
|
|
|
|
|
|
2025-10-13 17:41:07 +11:00
|
|
|
def __parse_int_literal(self) -> Expression:
|
|
|
|
|
int_lit: IntegerLiteral = IntegerLiteral()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
int_lit.value = int(self.current_token.literal)
|
|
|
|
|
except:
|
|
|
|
|
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return int_lit
|
|
|
|
|
|
|
|
|
|
def __parse_float_literal(self) -> Expression:
|
|
|
|
|
float_lit: FloatLiteral = FloatLiteral()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
float_lit.value = float(self.current_token.literal)
|
|
|
|
|
except:
|
|
|
|
|
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return float_lit
|
2025-10-14 21:23:11 +11:00
|
|
|
|
|
|
|
|
def __parse_boolean(self) -> BooleanLiteral:
|
|
|
|
|
return BooleanLiteral(value=self.__current_token_is(TokenType.TRUE))
|
2025-10-13 17:41:07 +11:00
|
|
|
# endregion
|