from lexer import Lexer from lexer_token import Token, TokenType from typing import Callable from enum import Enum, auto from AST import Statement, Expression, Program from AST import ExpressionStatement, AssignmentStatement, FunctionStatement, ReturnStatement, BlockStatement, ReassignStatement, IfStatement, WhileStatement from AST import InfixExpression, CallExpression, PrefixExpression, PostfixExpression from AST import BreakStatement, ContinueStatement, ForStatement, DependStatement from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral, StringLiteral from AST import FunctionParameter class PrecedenceType(Enum): P_LOWEST = 0 P_EQUALS = auto() P_LESSGREATER = auto() P_SUM = auto() P_PRODUCT = auto() P_EXPONENT = auto() P_PREFIX = auto() P_CALL = auto() P_INDEX = auto() PRECEDENCES: dict[TokenType, PrecedenceType] = { TokenType.PLUS: PrecedenceType.P_SUM, TokenType.MINUS: PrecedenceType.P_SUM, TokenType.ASTERISK: PrecedenceType.P_PRODUCT, TokenType.SLASH: PrecedenceType.P_PRODUCT, TokenType.MODULUS: PrecedenceType.P_PRODUCT, TokenType.POW: PrecedenceType.P_EXPONENT, TokenType.EQ_EQ: PrecedenceType.P_EQUALS, TokenType.NOT_EQ: PrecedenceType.P_EQUALS, TokenType.LT: PrecedenceType.P_LESSGREATER, TokenType.GT: PrecedenceType.P_LESSGREATER, TokenType.LT_EQ: PrecedenceType.P_LESSGREATER, TokenType.GT_EQ: PrecedenceType.P_LESSGREATER, TokenType.DOLLARSIGN: PrecedenceType.P_CALL, TokenType.PLUS_PLUS: PrecedenceType.P_INDEX, TokenType.MINUS_MINUS: PrecedenceType.P_INDEX } class Parser: def __init__(self, lexer: Lexer) -> None: self.lexer: Lexer = lexer self.errors: list[str] = [] self.current_token: Token = None self.peek_token: Token = None self.prefix_parse_functions: dict[Token, Callable] = { # -1 TokenType.IDENT: self.__parse_identifier, TokenType.INT: self.__parse_int_literal, TokenType.FLOAT: self.__parse_float_literal, TokenType.LPAREN: self.__parse_grouped_expression, TokenType.IF: self.__parse_if_statement, TokenType.TRUE: self.__parse_boolean, TokenType.FALSE: self.__parse_boolean, TokenType.STRING: self.__parse_string_literal, TokenType.DOLLARSIGN: self.__parse_call_expression, TokenType.MINUS: self.__parse_prefix_expression, TokenType.BANG: self.__parse_prefix_expression, } self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5 TokenType.PLUS: self.__parse_infix_expression, TokenType.MINUS: self.__parse_infix_expression, TokenType.SLASH: self.__parse_infix_expression, TokenType.ASTERISK: self.__parse_infix_expression, TokenType.POW: self.__parse_infix_expression, TokenType.MODULUS: self.__parse_infix_expression, TokenType.EQ_EQ: self.__parse_infix_expression, TokenType.NOT_EQ: self.__parse_infix_expression, TokenType.LT: self.__parse_infix_expression, TokenType.GT: self.__parse_infix_expression, TokenType.LT_EQ: self.__parse_infix_expression, TokenType.GT_EQ: self.__parse_infix_expression, TokenType.PLUS_PLUS: self.__parse_postfix_expression, TokenType.MINUS_MINUS: self.__parse_postfix_expression } self.__next_token() self.__next_token() # region Parser helpers def __next_token(self) -> None: self.current_token = self.peek_token self.peek_token = self.lexer.next_token() def __current_token_is(self, tt: TokenType) -> bool: return self.current_token.type == tt def __peek_token_is(self, tt: TokenType) -> bool: return self.peek_token.type == tt def __peek_token_is_assignment(self) -> bool: assignment_operators: list[TokenType] = [ TokenType.EQ, TokenType.PLUS_EQ, TokenType.MINUS_EQ, TokenType.MUL_EQ, TokenType.DIV_EQ, ] return self.peek_token.type in assignment_operators def __expect_peek(self, tt: TokenType) -> bool: if self.__peek_token_is(tt): self.__next_token() return True else: self.__peek_error(tt) return False def __current_precedence(self) -> PrecedenceType: prec = PRECEDENCES.get(self.current_token.type) if prec is None: return PrecedenceType.P_LOWEST return prec def __peek_precedence(self) -> PrecedenceType: prec = PRECEDENCES.get(self.peek_token.type) if prec is None: return PrecedenceType.P_LOWEST return prec def __peek_error(self, tt: TokenType): self.errors.append(f"Expected next token to be {tt}, got {self.peek_token.type} instead.") def __no_prefix_parse_function_error(self, tt: TokenType): self.errors.append(f"No Prefix Parse Function for {tt} found.") # endregion def parse_program(self) -> None: program: Program = Program() while self.current_token.type != TokenType.EOF: stmt: Statement = self.__parse_statement() if stmt is not None: program.statements.append(stmt) self.__next_token() return program # region Statement Methods def __parse_statement(self) -> Statement: match self.current_token.type: case TokenType.IDENT: return self.__parse_assignment_statement() case TokenType.RETURN: return self.__parse_return_statement() case TokenType.WHILE: return self.__parse_while_statement() case TokenType.BREAK: return self.__parse_break_statement() case TokenType.CONTINUE: return self.__parse_continue_statement() case TokenType.FOR: return self.__parse_for_statement() case TokenType.DEPEND: return self.__parse_depend_statement() case _: return self.__parse_expression_statement() def __parse_expression_statement(self) -> ExpressionStatement: expr = self.__parse_expression(PrecedenceType.P_LOWEST) if self.__peek_token_is(TokenType.SEMICOLON): self.__next_token() stmt: ExpressionStatement = ExpressionStatement(expr=expr) return stmt def __parse_assignment_statement(self) -> AssignmentStatement: # x: Int = 10; stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal)) if self.__peek_token_is_assignment(): # function definition # x = Func(): Int { return 10; } self.__next_token() if self.__current_token_is(TokenType.EQ) and self.__peek_token_is(TokenType.TYPE): func_stmt: FunctionStatement = FunctionStatement(name=stmt.name) if self.peek_token.literal != "Func": self.errors.append(f"Expected next token to be \"Func\", got {self.current_token.literal} instead.") return None self.__next_token() if not self.__expect_peek(TokenType.LPAREN): return None func_stmt.parameters = self.__parse_function_parameters() if not self.__expect_peek(TokenType.COLON): return None if not self.__expect_peek(TokenType.TYPE): return None func_stmt.return_type = self.current_token.literal if not self.__expect_peek(TokenType.LBRACE): return None func_stmt.body = self.__parse_block_statement() return func_stmt else: # reassignment statement assign_stmt: ReassignStatement = ReassignStatement() assign_stmt.operator = self.current_token.literal assign_stmt.ident = stmt.name self.__next_token() assign_stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST) while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF): self.__next_token() return assign_stmt else: if not self.__expect_peek(TokenType.COLON): return None if not self.__expect_peek(TokenType.TYPE): return None stmt.value_type = self.current_token.literal if not self.__expect_peek(TokenType.EQ): return None self.__next_token() stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST) while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF): self.__next_token() return stmt def __parse_function_parameters(self) -> list[FunctionParameter]: params: list[FunctionParameter] = [] if self.__peek_token_is(TokenType.RPAREN): self.__next_token() return params self.__next_token() first_param: FunctionParameter = FunctionParameter(name=self.current_token.literal) if not self.__expect_peek(TokenType.COLON): return None self.__next_token() first_param.value_type = self.current_token.literal params.append(first_param) while self.__peek_token_is(TokenType.COMMA): self.__next_token() self.__next_token() param: FunctionParameter = FunctionParameter(name=self.current_token.literal) if not self.__expect_peek(TokenType.COLON): return None self.__next_token() param.value_type = self.current_token.literal params.append(param) if not self.__expect_peek(TokenType.RPAREN): return None return params def __parse_return_statement(self) -> ReturnStatement: stmt: ReturnStatement = ReturnStatement() self.__next_token() stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.SEMICOLON): return None return stmt def __parse_block_statement(self) -> BlockStatement: block_stmt: BlockStatement = BlockStatement() self.__next_token() while not self.__current_token_is(TokenType.RBRACE) and not self.__current_token_is(TokenType.EOF): stmt: Statement = self.__parse_statement() if stmt is not None: block_stmt.statements.append(stmt) self.__next_token() return block_stmt def __parse_if_statement(self) -> IfStatement: condition: Expression = None consequence: BlockStatement = None alternative: BlockStatement = None self.__next_token() condition = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.LBRACE): return None consequence = self.__parse_block_statement() if self.__peek_token_is(TokenType.UNLESS): self.__next_token() if not self.__expect_peek(TokenType.LBRACE): return None alternative = self.__parse_block_statement() return IfStatement(condition, consequence, alternative) def __parse_while_statement(self) -> WhileStatement: condition: Expression = None body: BlockStatement = None self.__next_token() condition = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.LBRACE): return None body = self.__parse_block_statement() return WhileStatement(condition, body) def __parse_break_statement(self) -> BreakStatement: self.__next_token() return BreakStatement() def __parse_continue_statement(self) -> ContinueStatement: self.__next_token() return ContinueStatement() def __parse_for_statement(self) -> ForStatement: stmt: ForStatement = ForStatement() if not self.__expect_peek(TokenType.LPAREN): return None if not self.__expect_peek(TokenType.IDENT): return None stmt.var_declaration = self.__parse_assignment_statement() self.__next_token() # skip ; stmt.condition = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.SEMICOLON): return None self.__next_token() # skip ; stmt.action = self.__parse_assignment_statement() self.__next_token() if not self.__expect_peek(TokenType.LBRACE): return None stmt.body = self.__parse_block_statement() return stmt def __parse_depend_statement(self) -> DependStatement: if not self.__expect_peek(TokenType.STRING): return None stmt: DependStatement = DependStatement(self.current_token.literal) if not self.__expect_peek(TokenType.SEMICOLON): return None return stmt # endregion # region Expression Methods def __parse_expression(self, precedence: PrecedenceType) -> Expression: prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type) if prefix_func is None: self.__no_prefix_parse_function_error(self.current_token.type) return None left_expr: Expression = prefix_func() while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value: infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type) if infix_func is None: return left_expr self.__next_token() left_expr = infix_func(left_expr) return left_expr def __parse_infix_expression(self, left_node: Expression) -> Expression: infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal) precedence = self.__current_precedence() self.__next_token() infix_expr.right_node = self.__parse_expression(precedence) return infix_expr def __parse_grouped_expression(self) -> Expression: self.__next_token() expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST) if not self.__expect_peek(TokenType.RPAREN): return None return expr def __parse_expression_list(self, end: TokenType) -> list[Expression]: e_list: list[Expression] = [] if self.__peek_token_is(end): self.__next_token() return e_list self.__next_token() e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST)) while self.__peek_token_is(TokenType.COMMA): self.__next_token() self.__next_token() e_list.append(self.__parse_expression(PrecedenceType.P_LOWEST)) if not self.__expect_peek(end): return None return e_list def __parse_prefix_expression(self) -> PrefixExpression: prefix_expr: PrefixExpression = PrefixExpression(operator=self.current_token.literal) self.__next_token() prefix_expr.right_node = self.__parse_expression(PrecedenceType.P_PREFIX) return prefix_expr def __parse_postfix_expression(self, left_node: Expression) -> PostfixExpression: return PostfixExpression(left_node, self.current_token.literal) # endregion # region Prefix Methods def __parse_call_expression(self) -> CallExpression: self.__next_token() expr: CallExpression = CallExpression(function=self.__parse_expression(PrecedenceType.P_LOWEST)) self.__next_token() expr.arguments = self.__parse_expression_list(TokenType.RPAREN) return expr def __parse_identifier(self) -> IdentifierLiteral: return IdentifierLiteral(value=self.current_token.literal) def __parse_int_literal(self) -> Expression: int_lit: IntegerLiteral = IntegerLiteral() try: int_lit.value = int(self.current_token.literal) except: self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.") return None return int_lit def __parse_float_literal(self) -> Expression: float_lit: FloatLiteral = FloatLiteral() try: float_lit.value = float(self.current_token.literal) except: self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.") return None return float_lit def __parse_boolean(self) -> BooleanLiteral: return BooleanLiteral(value=self.__current_token_is(TokenType.TRUE)) def __parse_string_literal(self) -> StringLiteral: return StringLiteral(value=self.current_token.literal) # endregion