Files
Plasma/plasma_parser.py

473 lines
13 KiB
Python
Raw Normal View History

2025-10-13 07:07:32 +11:00
from lexer import Lexer
from lexer_token import Token, TokenType
from typing import Callable
from enum import Enum, auto
2025-10-13 17:41:07 +11:00
from AST import Statement, Expression, Program
2025-10-15 18:44:15 +11:00
from AST import ExpressionStatement, AssignmentStatement, FunctionStatement, ReturnStatement, BlockStatement, ReassignStatement, IfStatement, WhileStatement
2025-10-16 07:24:40 +11:00
from AST import InfixExpression, CallExpression, BreakStatement, ContinueStatement, ForStatement
2025-10-15 07:58:30 +11:00
from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral, StringLiteral
from AST import FunctionParameter
2025-10-13 17:41:07 +11:00
2025-10-13 07:07:32 +11:00
class PrecedenceType(Enum):
P_LOWEST = 0
P_EQUALS = auto()
P_LESSGREATER = auto()
P_SUM = auto()
P_PRODUCT = auto()
P_EXPONENT = auto()
P_PREFIX = auto()
P_CALL = auto()
P_INDEX = auto()
PRECEDENCES: dict[TokenType, PrecedenceType] = {
TokenType.PLUS: PrecedenceType.P_SUM,
TokenType.MINUS: PrecedenceType.P_SUM,
TokenType.ASTERISK: PrecedenceType.P_PRODUCT,
TokenType.SLASH: PrecedenceType.P_PRODUCT,
TokenType.MODULUS: PrecedenceType.P_PRODUCT,
2025-10-14 21:23:11 +11:00
TokenType.POW: PrecedenceType.P_EXPONENT,
TokenType.EQ_EQ: PrecedenceType.P_EQUALS,
TokenType.NOT_EQ: PrecedenceType.P_EQUALS,
TokenType.LT: PrecedenceType.P_LESSGREATER,
TokenType.GT: PrecedenceType.P_LESSGREATER,
TokenType.LT_EQ: PrecedenceType.P_LESSGREATER,
2025-10-15 06:59:29 +11:00
TokenType.GT_EQ: PrecedenceType.P_LESSGREATER,
2025-10-15 16:23:51 +11:00
TokenType.DOLLARSIGN: PrecedenceType.P_CALL
2025-10-13 07:07:32 +11:00
}
class Parser:
def __init__(self, lexer: Lexer) -> None:
self.lexer: Lexer = lexer
self.errors: list[str] = []
self.current_token: Token = None
self.peek_token: Token = None
2025-10-13 17:41:07 +11:00
self.prefix_parse_functions: dict[Token, Callable] = { # -1
2025-10-14 07:14:53 +11:00
TokenType.IDENT: self.__parse_identifier,
2025-10-13 17:41:07 +11:00
TokenType.INT: self.__parse_int_literal,
TokenType.FLOAT: self.__parse_float_literal,
2025-10-14 21:23:11 +11:00
TokenType.LPAREN: self.__parse_grouped_expression,
2025-10-15 07:58:30 +11:00
2025-10-14 21:23:11 +11:00
TokenType.IF: self.__parse_if_statement,
TokenType.TRUE: self.__parse_boolean,
TokenType.FALSE: self.__parse_boolean,
2025-10-15 07:58:30 +11:00
TokenType.STRING: self.__parse_string_literal,
2025-10-15 16:23:51 +11:00
TokenType.DOLLARSIGN: self.__parse_call_expression
2025-10-13 17:41:07 +11:00
}
self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5
TokenType.PLUS: self.__parse_infix_expression,
TokenType.MINUS: self.__parse_infix_expression,
TokenType.SLASH: self.__parse_infix_expression,
TokenType.ASTERISK: self.__parse_infix_expression,
TokenType.POW: self.__parse_infix_expression,
2025-10-14 21:23:11 +11:00
TokenType.MODULUS: self.__parse_infix_expression,
TokenType.EQ_EQ: self.__parse_infix_expression,
TokenType.NOT_EQ: self.__parse_infix_expression,
TokenType.LT: self.__parse_infix_expression,
TokenType.GT: self.__parse_infix_expression,
TokenType.LT_EQ: self.__parse_infix_expression,
TokenType.GT_EQ: self.__parse_infix_expression,
2025-10-15 16:23:51 +11:00
2025-10-13 17:41:07 +11:00
}
2025-10-13 07:07:32 +11:00
self.__next_token()
self.__next_token()
# region Parser helpers
def __next_token(self) -> None:
self.current_token = self.peek_token
self.peek_token = self.lexer.next_token()
2025-10-13 21:05:03 +11:00
def __current_token_is(self, tt: TokenType) -> bool:
return self.current_token.type == tt
2025-10-13 07:07:32 +11:00
def __peek_token_is(self, tt: TokenType) -> bool:
return self.peek_token.type == tt
def __expect_peek(self, tt: TokenType) -> bool:
if self.__peek_token_is(tt):
self.__next_token()
return True
else:
self.__peek_error(tt)
return False
2025-10-13 13:09:48 +11:00
def __current_precedence(self) -> PrecedenceType:
prec = PRECEDENCES.get(self.current_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
def __peek_precedence(self) -> PrecedenceType:
prec = PRECEDENCES.get(self.peek_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
2025-10-13 07:07:32 +11:00
def __peek_error(self, tt: TokenType):
self.errors.append(f"Expected next token to be {tt}, got {self.peek_token.type} instead.")
def __no_prefix_parse_function_error(self, tt: TokenType):
self.errors.append(f"No Prefix Parse Function for {tt} found.")
2025-10-13 13:09:48 +11:00
# endregion
def parse_program(self) -> None:
2025-10-13 17:41:07 +11:00
program: Program = Program()
while self.current_token.type != TokenType.EOF:
stmt: Statement = self.__parse_statement()
if stmt is not None:
program.statements.append(stmt)
self.__next_token()
return program
# region Statement Methods
def __parse_statement(self) -> Statement:
2025-10-13 21:05:03 +11:00
match self.current_token.type:
case TokenType.IDENT:
return self.__parse_assignment_statement()
2025-10-14 07:14:53 +11:00
case TokenType.RETURN:
return self.__parse_return_statement()
2025-10-15 18:44:15 +11:00
case TokenType.WHILE:
return self.__parse_while_statement()
2025-10-16 07:24:40 +11:00
case TokenType.BREAK:
return self.__parse_break_statement()
case TokenType.CONTINUE:
return self.__parse_continue_statement()
case TokenType.FOR:
return self.__parse_for_statement()
2025-10-13 21:05:03 +11:00
case _:
return self.__parse_expression_statement()
2025-10-14 07:14:53 +11:00
2025-10-13 17:41:07 +11:00
def __parse_expression_statement(self) -> ExpressionStatement:
expr = self.__parse_expression(PrecedenceType.P_LOWEST)
if self.__peek_token_is(TokenType.SEMICOLON):
self.__next_token()
stmt: ExpressionStatement = ExpressionStatement(expr=expr)
return stmt
2025-10-13 21:05:03 +11:00
def __parse_assignment_statement(self) -> AssignmentStatement:
# x: Int = 10;
stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal))
2025-10-14 07:14:53 +11:00
if self.__peek_token_is(TokenType.EQ): # function definition
# x = Func(): Int { return 10; }
self.__next_token()
2025-10-14 07:14:53 +11:00
if self.__peek_token_is(TokenType.TYPE):
func_stmt: FunctionStatement = FunctionStatement(name=stmt.name)
if self.peek_token.literal != "Func":
self.errors.append(f"Expected next token to be \"Func\", got {self.current_token.literal} instead.")
return None
self.__next_token()
if not self.__expect_peek(TokenType.LPAREN):
return None
func_stmt.parameters = self.__parse_function_parameters()
if not self.__expect_peek(TokenType.COLON):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
func_stmt.return_type = self.current_token.literal
if not self.__expect_peek(TokenType.LBRACE):
return None
func_stmt.body = self.__parse_block_statement()
return func_stmt
else: # reassignment statement
assign_stmt: ReassignStatement = ReassignStatement()
self.__next_token()
assign_stmt.ident = stmt.name
assign_stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST)
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
self.__next_token()
return assign_stmt
2025-10-14 07:14:53 +11:00
else:
if not self.__expect_peek(TokenType.COLON):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
stmt.value_type = self.current_token.literal
if not self.__expect_peek(TokenType.EQ):
return None
self.__next_token()
stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST)
while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF):
self.__next_token()
return stmt
def __parse_function_parameters(self) -> list[FunctionParameter]:
params: list[FunctionParameter] = []
if self.__peek_token_is(TokenType.RPAREN):
self.__next_token()
return params
self.__next_token()
first_param: FunctionParameter = FunctionParameter(name=self.current_token.literal)
if not self.__expect_peek(TokenType.COLON):
return None
self.__next_token()
first_param.value_type = self.current_token.literal
params.append(first_param)
while self.__peek_token_is(TokenType.COMMA):
self.__next_token()
self.__next_token()
param: FunctionParameter = FunctionParameter(name=self.current_token.literal)
if not self.__expect_peek(TokenType.COLON):
return None
self.__next_token()
param.value_type = self.current_token.literal
params.append(param)
if not self.__expect_peek(TokenType.RPAREN):
return None
return params
2025-10-14 07:14:53 +11:00
def __parse_return_statement(self) -> ReturnStatement:
stmt: ReturnStatement = ReturnStatement()
self.__next_token()
stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.SEMICOLON):
2025-10-13 21:05:03 +11:00
return None
2025-10-14 07:14:53 +11:00
return stmt
def __parse_block_statement(self) -> BlockStatement:
block_stmt: BlockStatement = BlockStatement()
2025-10-13 21:05:03 +11:00
self.__next_token()
2025-10-14 07:14:53 +11:00
while not self.__current_token_is(TokenType.RBRACE) and not self.__current_token_is(TokenType.EOF):
stmt: Statement = self.__parse_statement()
if stmt is not None:
block_stmt.statements.append(stmt)
2025-10-13 21:05:03 +11:00
self.__next_token()
2025-10-14 07:14:53 +11:00
return block_stmt
2025-10-14 21:23:11 +11:00
def __parse_if_statement(self) -> IfStatement:
condition: Expression = None
consequence: BlockStatement = None
alternative: BlockStatement = None
self.__next_token()
condition = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.LBRACE):
return None
consequence = self.__parse_block_statement()
if self.__peek_token_is(TokenType.UNLESS):
self.__next_token()
if not self.__expect_peek(TokenType.LBRACE):
return None
alternative = self.__parse_block_statement()
return IfStatement(condition, consequence, alternative)
2025-10-15 18:44:15 +11:00
def __parse_while_statement(self) -> WhileStatement:
condition: Expression = None
body: BlockStatement = None
self.__next_token()
condition = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.LBRACE):
return None
body = self.__parse_block_statement()
return WhileStatement(condition, body)
2025-10-16 07:24:40 +11:00
def __parse_break_statement(self) -> BreakStatement:
self.__next_token()
return BreakStatement()
def __parse_continue_statement(self) -> ContinueStatement:
self.__next_token()
return ContinueStatement()
def __parse_for_statement(self) -> ForStatement:
stmt: ForStatement = ForStatement()
if not self.__expect_peek(TokenType.LPAREN):
return None
if not self.__expect_peek(TokenType.IDENT):
return None
stmt.var_declaration = self.__parse_assignment_statement()
self.__next_token() # skip ;
stmt.condition = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.SEMICOLON):
return None
self.__next_token() # skip ;
stmt.action = self.__parse_assignment_statement()
print(stmt.action.json())
self.__next_token()
if not self.__expect_peek(TokenType.LBRACE):
return None
stmt.body = self.__parse_block_statement()
return stmt
2025-10-13 17:41:07 +11:00
# endregion
# region Expression Methods
def __parse_expression(self, precedence: PrecedenceType) -> Expression:
prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type)
if prefix_func is None:
self.__no_prefix_parse_function_error(self.current_token.type)
return None
left_expr: Expression = prefix_func()
while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value:
infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type)
if infix_func is None:
return left_expr
self.__next_token()
left_expr = infix_func(left_expr)
return left_expr
def __parse_infix_expression(self, left_node: Expression) -> Expression:
infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal)
precedence = self.__current_precedence()
self.__next_token()
infix_expr.right_node = self.__parse_expression(precedence)
return infix_expr
def __parse_grouped_expression(self) -> Expression:
self.__next_token()
expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.RPAREN):
return None
return expr
2025-10-15 06:59:29 +11:00
def __parse_expression_list(self, end: TokenType) -> list[Expression]:
e_list: list[Expression] = []
if self.__peek_token_is(end):
self.__next_token()
return e_list
self.__next_token()
e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST))
while self.__peek_token_is(TokenType.COMMA):
self.__next_token()
self.__next_token()
e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST))
if not self.__expect_peek(end):
return None
return e_list
2025-10-13 17:41:07 +11:00
# endregion
# region Prefix Methods
2025-10-15 16:23:51 +11:00
def __parse_call_expression(self) -> CallExpression:
self.__next_token()
expr: CallExpression = CallExpression(function=self.__parse_expression(PrecedenceType.P_LOWEST))
self.__next_token()
expr.arguments = self.__parse_expression_list(TokenType.RPAREN)
return expr
2025-10-14 07:14:53 +11:00
def __parse_identifier(self) -> IdentifierLiteral:
return IdentifierLiteral(value=self.current_token.literal)
2025-10-13 17:41:07 +11:00
def __parse_int_literal(self) -> Expression:
int_lit: IntegerLiteral = IntegerLiteral()
try:
int_lit.value = int(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.")
return None
return int_lit
def __parse_float_literal(self) -> Expression:
float_lit: FloatLiteral = FloatLiteral()
try:
float_lit.value = float(self.current_token.literal)
except:
self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.")
return None
return float_lit
2025-10-14 21:23:11 +11:00
def __parse_boolean(self) -> BooleanLiteral:
return BooleanLiteral(value=self.__current_token_is(TokenType.TRUE))
2025-10-15 07:58:30 +11:00
def __parse_string_literal(self) -> StringLiteral:
return StringLiteral(value=self.current_token.literal)
2025-10-13 17:41:07 +11:00
# endregion