Files
Plasma/plasma_parser.py
2025-10-15 07:16:15 +11:00

401 lines
11 KiB
Python

from lexer import Lexer
from lexer_token import Token, TokenType
from typing import Callable
from enum import Enum, auto
from AST import Statement, Expression, Program
from AST import ExpressionStatement, AssignmentStatement, FunctionStatement, ReturnStatement, BlockStatement, ReassignStatement, IfStatement
from AST import InfixExpression, CallExpression
from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral
from AST import FunctionParameter
class PrecedenceType(Enum):
P_LOWEST = 0
P_EQUALS = auto()
P_LESSGREATER = auto()
P_SUM = auto()
P_PRODUCT = auto()
P_EXPONENT = auto()
P_PREFIX = auto()
P_CALL = auto()
P_INDEX = auto()
PRECEDENCES: dict[TokenType, PrecedenceType] = {
TokenType.PLUS: PrecedenceType.P_SUM,
TokenType.MINUS: PrecedenceType.P_SUM,
TokenType.ASTERISK: PrecedenceType.P_PRODUCT,
TokenType.SLASH: PrecedenceType.P_PRODUCT,
TokenType.MODULUS: PrecedenceType.P_PRODUCT,
TokenType.POW: PrecedenceType.P_EXPONENT,
TokenType.EQ_EQ: PrecedenceType.P_EQUALS,
TokenType.NOT_EQ: PrecedenceType.P_EQUALS,
TokenType.LT: PrecedenceType.P_LESSGREATER,
TokenType.GT: PrecedenceType.P_LESSGREATER,
TokenType.LT_EQ: PrecedenceType.P_LESSGREATER,
TokenType.GT_EQ: PrecedenceType.P_LESSGREATER,
TokenType.LPAREN: PrecedenceType.P_CALL
}
class Parser:
def __init__(self, lexer: Lexer) -> None:
self.lexer: Lexer = lexer
self.errors: list[str] = []
self.current_token: Token = None
self.peek_token: Token = None
self.prefix_parse_functions: dict[Token, Callable] = { # -1
TokenType.IDENT: self.__parse_identifier,
TokenType.INT: self.__parse_int_literal,
TokenType.FLOAT: self.__parse_float_literal,
TokenType.LPAREN: self.__parse_grouped_expression,
TokenType.IF: self.__parse_if_statement,
TokenType.TRUE: self.__parse_boolean,
TokenType.FALSE: self.__parse_boolean,
}
self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5
TokenType.PLUS: self.__parse_infix_expression,
TokenType.MINUS: self.__parse_infix_expression,
TokenType.SLASH: self.__parse_infix_expression,
TokenType.ASTERISK: self.__parse_infix_expression,
TokenType.POW: self.__parse_infix_expression,
TokenType.MODULUS: self.__parse_infix_expression,
TokenType.EQ_EQ: self.__parse_infix_expression,
TokenType.NOT_EQ: self.__parse_infix_expression,
TokenType.LT: self.__parse_infix_expression,
TokenType.GT: self.__parse_infix_expression,
TokenType.LT_EQ: self.__parse_infix_expression,
TokenType.GT_EQ: self.__parse_infix_expression,
TokenType.LPAREN: self.__parse_call_expression
}
self.__next_token()
self.__next_token()
# region Parser helpers
def __next_token(self) -> None:
self.current_token = self.peek_token
self.peek_token = self.lexer.next_token()
def __current_token_is(self, tt: TokenType) -> bool:
return self.current_token.type == tt
def __peek_token_is(self, tt: TokenType) -> bool:
return self.peek_token.type == tt
def __expect_peek(self, tt: TokenType) -> bool:
if self.__peek_token_is(tt):
self.__next_token()
return True
else:
self.__peek_error(tt)
return False
def __current_precedence(self) -> PrecedenceType:
prec = PRECEDENCES.get(self.current_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
def __peek_precedence(self) -> PrecedenceType:
prec = PRECEDENCES.get(self.peek_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
def __peek_error(self, tt: TokenType):
self.errors.append(f"Expected next token to be {tt}, got {self.peek_token.type} instead.")
def __no_prefix_parse_function_error(self, tt: TokenType):
self.errors.append(f"No Prefix Parse Function for {tt} found.")
# endregion
def parse_program(self) -> None:
program: Program = Program()
while self.current_token.type != TokenType.EOF:
stmt: Statement = self.__parse_statement()
if stmt is not None:
program.statements.append(stmt)
self.__next_token()
return program
# region Statement Methods
def __parse_statement(self) -> Statement:
match self.current_token.type:
case TokenType.IDENT:
return self.__parse_assignment_statement()
case TokenType.RETURN:
return self.__parse_return_statement()
case _:
return self.__parse_expression_statement()
def __parse_expression_statement(self) -> ExpressionStatement:
expr = self.__parse_expression(PrecedenceType.P_LOWEST)
if self.__peek_token_is(TokenType.SEMICOLON):
self.__next_token()
stmt: ExpressionStatement = ExpressionStatement(expr=expr)
return stmt
def __parse_assignment_statement(self) -> AssignmentStatement:
# x: Int = 10;
stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal))
if self.__peek_token_is(TokenType.EQ): # function definition
# x = Func(): Int { return 10; }
self.__next_token()
if self.__peek_token_is(TokenType.TYPE):
func_stmt: FunctionStatement = FunctionStatement(name=stmt.name)
if self.peek_token.literal != "Func":
self.errors.append(f"Expected next token to be \"Func\", got {self.current_token.literal} instead.")
return None
self.__next_token()
if not self.__expect_peek(TokenType.LPAREN):
return None
func_stmt.parameters = self.__parse_function_parameters()
if not self.__expect_peek(TokenType.COLON):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
func_stmt.return_type = self.current_token.literal
if not self.__expect_peek(TokenType.LBRACE):
return None
func_stmt.body = self.__parse_block_statement()
return func_stmt
else: # reassignment statement
assign_stmt: ReassignStatement = ReassignStatement()
self.__next_token()
assign_stmt.ident = stmt.name
assign_stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST)
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
self.__next_token()
return assign_stmt
else:
if not self.__expect_peek(TokenType.COLON):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
stmt.value_type = self.current_token.literal
if not self.__expect_peek(TokenType.EQ):
return None
self.__next_token()
stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST)
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
self.__next_token()
return stmt
def __parse_function_parameters(self) -> list[FunctionParameter]:
params: list[FunctionParameter] = []
if self.__peek_token_is(TokenType.RPAREN):
self.__next_token()
return params
self.__next_token()
first_param: FunctionParameter = FunctionParameter(name=self.current_token.literal)
if not self.__expect_peek(TokenType.COLON):
return None
self.__next_token()
first_param.value_type = self.current_token.literal
params.append(first_param)
while self.__peek_token_is(TokenType.COMMA):
self.__next_token()
self.__next_token()
param: FunctionParameter = FunctionParameter(name=self.current_token.literal)
if not self.__expect_peek(TokenType.COLON):
return None
self.__next_token()
param.value_type = self.current_token.literal
params.append(param)
if not self.__expect_peek(TokenType.RPAREN):
return None
return params
def __parse_return_statement(self) -> ReturnStatement:
stmt: ReturnStatement = ReturnStatement()
self.__next_token()
stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.SEMICOLON):
return None
return stmt
def __parse_block_statement(self) -> BlockStatement:
block_stmt: BlockStatement = BlockStatement()
self.__next_token()
while not self.__current_token_is(TokenType.RBRACE) and not self.__current_token_is(TokenType.EOF):
stmt: Statement = self.__parse_statement()
if stmt is not None:
block_stmt.statements.append(stmt)
self.__next_token()
return block_stmt
def __parse_if_statement(self) -> IfStatement:
condition: Expression = None
consequence: BlockStatement = None
alternative: BlockStatement = None
self.__next_token()
condition = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.LBRACE):
return None
consequence = self.__parse_block_statement()
if self.__peek_token_is(TokenType.UNLESS):
self.__next_token()
if not self.__expect_peek(TokenType.LBRACE):
return None
alternative = self.__parse_block_statement()
return IfStatement(condition, consequence, alternative)
# endregion
# region Expression Methods
def __parse_expression(self, precedence: PrecedenceType) -> Expression:
prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type)
if prefix_func is None:
self.__no_prefix_parse_function_error(self.current_token.type)
return None
left_expr: Expression = prefix_func()
while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value:
infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type)
if infix_func is None:
return left_expr
self.__next_token()
left_expr = infix_func(left_expr)
return left_expr
def __parse_infix_expression(self, left_node: Expression) -> Expression:
infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal)
precedence = self.__current_precedence()
self.__next_token()
infix_expr.right_node = self.__parse_expression(precedence)
return infix_expr
def __parse_grouped_expression(self) -> Expression:
self.__next_token()
expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.RPAREN):
return None
return expr
def __parse_call_expression(self, function: Expression) -> CallExpression:
expr: CallExpression = CallExpression(function=function)
expr.arguments = self.__parse_expression_list(TokenType.RPAREN)
return expr
def __parse_expression_list(self, end: TokenType) -> list[Expression]:
e_list: list[Expression] = []
if self.__peek_token_is(end):
self.__next_token()
return e_list
self.__next_token()
e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST))
while self.__peek_token_is(TokenType.COMMA):
self.__next_token()
self.__next_token()
e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST))
if not self.__expect_peek(end):
return None
return e_list
# endregion
# region Prefix Methods
def __parse_identifier(self) -> IdentifierLiteral:
return IdentifierLiteral(value=self.current_token.literal)
def __parse_int_literal(self) -> Expression:
int_lit: IntegerLiteral = IntegerLiteral()
try:
int_lit.value = int(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.")
return None
return int_lit
def __parse_float_literal(self) -> Expression:
float_lit: FloatLiteral = FloatLiteral()
try:
float_lit.value = float(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.")
return None
return float_lit
def __parse_boolean(self) -> BooleanLiteral:
return BooleanLiteral(value=self.__current_token_is(TokenType.TRUE))
# endregion