from lexer import Lexer from lexer_token import Token, TokenType from typing import Callable from enum import Enum, auto from AST import Statement, Expression, Program from AST import ExpressionStatement, AssignmentStatement, FunctionStatement, ReturnStatement, BlockStatement, ReassignStatement, IfStatement from AST import InfixExpression, CallExpression from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral class PrecedenceType(Enum): P_LOWEST = 0 P_EQUALS = auto() P_LESSGREATER = auto() P_SUM = auto() P_PRODUCT = auto() P_EXPONENT = auto() P_PREFIX = auto() P_CALL = auto() P_INDEX = auto() PRECEDENCES: dict[TokenType, PrecedenceType] = { TokenType.PLUS: PrecedenceType.P_SUM, TokenType.MINUS: PrecedenceType.P_SUM, TokenType.ASTERISK: PrecedenceType.P_PRODUCT, TokenType.SLASH: PrecedenceType.P_PRODUCT, TokenType.MODULUS: PrecedenceType.P_PRODUCT, TokenType.POW: PrecedenceType.P_EXPONENT, TokenType.EQ_EQ: PrecedenceType.P_EQUALS, TokenType.NOT_EQ: PrecedenceType.P_EQUALS, TokenType.LT: PrecedenceType.P_LESSGREATER, TokenType.GT: PrecedenceType.P_LESSGREATER, TokenType.LT_EQ: PrecedenceType.P_LESSGREATER, TokenType.GT_EQ: PrecedenceType.P_LESSGREATER, TokenType.LPAREN: PrecedenceType.P_CALL } class Parser: def __init__(self, lexer: Lexer) -> None: self.lexer: Lexer = lexer self.errors: list[str] = [] self.current_token: Token = None self.peek_token: Token = None self.prefix_parse_functions: dict[Token, Callable] = { # -1 TokenType.IDENT: self.__parse_identifier, TokenType.INT: self.__parse_int_literal, TokenType.FLOAT: self.__parse_float_literal, TokenType.LPAREN: self.__parse_grouped_expression, TokenType.IF: self.__parse_if_statement, TokenType.TRUE: self.__parse_boolean, TokenType.FALSE: self.__parse_boolean, } self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5 TokenType.PLUS: self.__parse_infix_expression, TokenType.MINUS: self.__parse_infix_expression, TokenType.SLASH: self.__parse_infix_expression, TokenType.ASTERISK: self.__parse_infix_expression, TokenType.POW: self.__parse_infix_expression, TokenType.MODULUS: self.__parse_infix_expression, TokenType.EQ_EQ: self.__parse_infix_expression, TokenType.NOT_EQ: self.__parse_infix_expression, TokenType.LT: self.__parse_infix_expression, TokenType.GT: self.__parse_infix_expression, TokenType.LT_EQ: self.__parse_infix_expression, TokenType.GT_EQ: self.__parse_infix_expression, TokenType.LPAREN: self.__parse_call_expression } self.__next_token() self.__next_token() # region Parser helpers def __next_token(self) -> None: self.current_token = self.peek_token self.peek_token = self.lexer.next_token() def __current_token_is(self, tt: TokenType) -> bool: return self.current_token.type == tt def __peek_token_is(self, tt: TokenType) -> bool: return self.peek_token.type == tt def __expect_peek(self, tt: TokenType) -> bool: if self.__peek_token_is(tt): self.__next_token() return True else: self.__peek_error(tt) return False def __current_precedence(self) -> PrecedenceType: prec = PRECEDENCES.get(self.current_token.type) if prec is None: return PrecedenceType.P_LOWEST return prec def __peek_precedence(self) -> PrecedenceType: prec = PRECEDENCES.get(self.peek_token.type) if prec is None: return PrecedenceType.P_LOWEST return prec def __peek_error(self, tt: TokenType): self.errors.append(f"Expected next token to be {tt}, got {self.peek_token.type} instead.") def __no_prefix_parse_function_error(self, tt: TokenType): self.errors.append(f"No Prefix Parse Function for {tt} found.") # endregion def parse_program(self) -> None: program: Program = Program() while self.current_token.type != TokenType.EOF: stmt: Statement = self.__parse_statement() if stmt is not None: program.statements.append(stmt) self.__next_token() return program # region Statement Methods def __parse_statement(self) -> Statement: match self.current_token.type: case TokenType.IDENT: return self.__parse_assignment_statement() case TokenType.RETURN: return self.__parse_return_statement() case _: return self.__parse_expression_statement() def __parse_expression_statement(self) -> ExpressionStatement: expr = self.__parse_expression(PrecedenceType.P_LOWEST) if self.__peek_token_is(TokenType.SEMICOLON): self.__next_token() stmt: ExpressionStatement = ExpressionStatement(expr=expr) return stmt def __parse_assignment_statement(self) -> AssignmentStatement: # x: Int = 10; stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal)) if self.__peek_token_is(TokenType.EQ): # function definition # x = Func(): Int { return 10; } self.__next_token() if self.__peek_token_is(TokenType.TYPE): func_stmt: FunctionStatement = FunctionStatement(name=stmt.name) if self.peek_token.literal != "Func": self.errors.append(f"Expected next token to be \"Func\", got {self.current_token.literal} instead.") return None self.__next_token() if not self.__expect_peek(TokenType.LPAREN): return None func_stmt.parameters = [] if not self.__expect_peek(TokenType.RPAREN): return None if not self.__expect_peek(TokenType.COLON): return None if not self.__expect_peek(TokenType.TYPE): return None func_stmt.return_type = self.current_token.literal if not self.__expect_peek(TokenType.LBRACE): return None func_stmt.body = self.__parse_block_statement() return func_stmt else: # reassignment statement assign_stmt: ReassignStatement = ReassignStatement() self.__next_token() assign_stmt.ident = stmt.name assign_stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST) while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF): self.__next_token() return assign_stmt else: if not self.__expect_peek(TokenType.COLON): return None if not self.__expect_peek(TokenType.TYPE): return None stmt.value_type = self.current_token.literal if not self.__expect_peek(TokenType.EQ): return None self.__next_token() stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST) while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF): self.__next_token() return stmt def __parse_return_statement(self) -> ReturnStatement: stmt: ReturnStatement = ReturnStatement() self.__next_token() stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.SEMICOLON): return None return stmt def __parse_block_statement(self) -> BlockStatement: block_stmt: BlockStatement = BlockStatement() self.__next_token() while not self.__current_token_is(TokenType.RBRACE) and not self.__current_token_is(TokenType.EOF): stmt: Statement = self.__parse_statement() if stmt is not None: block_stmt.statements.append(stmt) self.__next_token() return block_stmt def __parse_if_statement(self) -> IfStatement: condition: Expression = None consequence: BlockStatement = None alternative: BlockStatement = None self.__next_token() condition = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.LBRACE): return None consequence = self.__parse_block_statement() if self.__peek_token_is(TokenType.UNLESS): self.__next_token() if not self.__expect_peek(TokenType.LBRACE): return None alternative = self.__parse_block_statement() return IfStatement(condition, consequence, alternative) # endregion # region Expression Methods def __parse_expression(self, precedence: PrecedenceType) -> Expression: prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type) if prefix_func is None: self.__no_prefix_parse_function_error(self.current_token.type) return None left_expr: Expression = prefix_func() while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value: infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type) if infix_func is None: return left_expr self.__next_token() left_expr = infix_func(left_expr) return left_expr def __parse_infix_expression(self, left_node: Expression) -> Expression: infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal) precedence = self.__current_precedence() self.__next_token() infix_expr.right_node = self.__parse_expression(precedence) return infix_expr def __parse_grouped_expression(self) -> Expression: self.__next_token() expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.RPAREN): return None return expr def __parse_call_expression(self, function: Expression) -> CallExpression: expr: CallExpression = CallExpression(function=function) expr.arguments = [] # TODO if not self.__expect_peek(TokenType.RPAREN): return None return expr # endregion # region Prefix Methods def __parse_identifier(self) -> IdentifierLiteral: return IdentifierLiteral(value=self.current_token.literal) def __parse_int_literal(self) -> Expression: int_lit: IntegerLiteral = IntegerLiteral() try: int_lit.value = int(self.current_token.literal) except: self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.") return None return int_lit def __parse_float_literal(self) -> Expression: float_lit: FloatLiteral = FloatLiteral() try: float_lit.value = float(self.current_token.literal) except: self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.") return None return float_lit def __parse_boolean(self) -> BooleanLiteral: return BooleanLiteral(value=self.__current_token_is(TokenType.TRUE)) # endregion