diff --git a/AST.py b/AST.py index df5979c..d38cddc 100644 --- a/AST.py +++ b/AST.py @@ -40,5 +40,67 @@ class Program(Node): def json(self) -> dict: return { "type": self.type().value, - "statements": [] - } \ No newline at end of file + "statements": [{stmt.type().value: stmt.json()} for stmt in self.statements] + } + +# region Statements +class ExpressionStatement(Statement): + def __init__(self, expr: Expression = None) -> None: + self.expr: Expression = expr + + def type(self) -> NodeType: + return NodeType.ExpressionStatement + + def json(self) -> dict: + return { + "type": self.type().value, + "expr": self.expr.json() + } +# endregion + +# region Expressions +class InfixExpression(Expression): + def __init__(self, left_node: Expression, operator: str, right_node: Expression = None) -> None: + self.left_node: Expression = left_node + self.operator: str = operator + self.right_node: Expression = right_node + + def type(self) -> NodeType: + return NodeType.InfixExpression + + def json(self) -> dict: + return { + "type": self.type().value, + "left_node": self.left_node.json(), + "operator": self.operator, + "right_node": self.right_node.json() + } +# endregion + +# region Literals +class IntegerLiteral(Expression): + def __init__(self, value: int = None) -> None: + self.value: int = value + + def type(self) -> NodeType: + return NodeType.IntegerLiteral + + def json(self) -> dict: + return { + "type": self.type().value, + "value": self.value + } + +class FloatLiteral(Expression): + def __init__(self, value: float = None) -> None: + self.value: float = value + + def type(self) -> NodeType: + return NodeType.FloatLiteral + + def json(self) -> dict: + return { + "type": self.type().value, + "value": self.value + } +# endregion \ No newline at end of file diff --git a/debug/ast.json b/debug/ast.json new file mode 100644 index 0000000..d67aa72 --- /dev/null +++ b/debug/ast.json @@ -0,0 +1,22 @@ +{ + "type": "Program", + "statements": [ + { + "ExpressionStatement": { + "type": "ExpressionStatement", + "expr": { + "type": "InfixExpression", + "left_node": { + "type": "IntegerLiteral", + "value": 5 + }, + "operator": "+", + "right_node": { + "type": "IntegerLiteral", + "value": 5 + } + } + } + } + ] +} \ No newline at end of file diff --git a/lexer.py b/lexer.py index 795b8ef..1e50bcb 100644 --- a/lexer.py +++ b/lexer.py @@ -91,6 +91,8 @@ class Lexer: tok = self.__new_token(TokenType.LCURLY, self.current_char) case "}": tok = self.__new_token(TokenType.RCURLY, self.current_char) + case ";": + tok = self.__new_token(TokenType.SEMICOLON, self.current_char) case ":": tok = self.__new_token(TokenType.COLON, self.current_char) case None: diff --git a/lexer_token.py b/lexer_token.py index 8d1f9cf..66cb220 100644 --- a/lexer_token.py +++ b/lexer_token.py @@ -27,6 +27,7 @@ class TokenType(Enum): LCURLY = "LCURLY" RCURLY = "RCURLY" COLON = "COLON" + SEMICOLON = "SEMICOLON" class Token: def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None: diff --git a/main.py b/main.py index acf5139..b9abf12 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,10 @@ from lexer import Lexer +from plasma_parser import Parser +from AST import Program +import json LEXER_DEBUG: bool = True +PARSER_DEBUG: bool = True if __name__ == "__main__": @@ -10,4 +14,16 @@ if __name__ == "__main__": if LEXER_DEBUG: debug_lex: Lexer = Lexer(source=code) while debug_lex.current_char is not None: - print(debug_lex.next_token()) \ No newline at end of file + print(debug_lex.next_token()) + + l: Lexer = Lexer(source=code) + p: Parser = Parser(lexer=l) + + if PARSER_DEBUG: + print("===== PARSER DEBUG =====") + program: Program = p.parse_program() + + with open("debug/ast.json", "w") as f: + json.dump(program.json(), f, indent=4) + + print("Wrote AST to debug/ast.json successfully.") \ No newline at end of file diff --git a/plasma_parser.py b/plasma_parser.py index 8c228a7..20389fb 100644 --- a/plasma_parser.py +++ b/plasma_parser.py @@ -3,6 +3,11 @@ from lexer_token import Token, TokenType from typing import Callable from enum import Enum, auto +from AST import Statement, Expression, Program +from AST import ExpressionStatement +from AST import InfixExpression +from AST import IntegerLiteral, FloatLiteral + class PrecedenceType(Enum): P_LOWEST = 0 P_EQUALS = auto() @@ -32,8 +37,23 @@ class Parser: self.current_token: Token = None self.peek_token: Token = None - self.prefix_parse_functions: dict[Token, Callable] = {} # -1 - self.infix_parse_functions: dict[Token, Callable] = {} # 5 + 5 + self.prefix_parse_functions: dict[Token, Callable] = { # -1 + + TokenType.INT: self.__parse_int_literal, + TokenType.FLOAT: self.__parse_float_literal, + TokenType.LPAREN: self.__parse_grouped_expression + + } + self.infix_parse_functions: dict[Token, Callable] = { # 5 + 5 + + TokenType.PLUS: self.__parse_infix_expression, + TokenType.MINUS: self.__parse_infix_expression, + TokenType.SLASH: self.__parse_infix_expression, + TokenType.ASTERISK: self.__parse_infix_expression, + TokenType.POW: self.__parse_infix_expression, + TokenType.MODULUS: self.__parse_infix_expression + + } self.__next_token() self.__next_token() @@ -74,4 +94,94 @@ class Parser: # endregion def parse_program(self) -> None: - pass \ No newline at end of file + program: Program = Program() + + while self.current_token.type != TokenType.EOF: + stmt: Statement = self.__parse_statement() + + if stmt is not None: + program.statements.append(stmt) + + self.__next_token() + + return program + + # region Statement Methods + def __parse_statement(self) -> Statement: + return self.__parse_expression_statement() + + def __parse_expression_statement(self) -> ExpressionStatement: + expr = self.__parse_expression(PrecedenceType.P_LOWEST) + + if self.__peek_token_is(TokenType.SEMICOLON): + self.__next_token() + + stmt: ExpressionStatement = ExpressionStatement(expr=expr) + + return stmt + # endregion + + # region Expression Methods + def __parse_expression(self, precedence: PrecedenceType) -> Expression: + prefix_func: Callable | None = self.prefix_parse_functions.get(self.current_token.type) + if prefix_func is None: + self.__no_prefix_parse_function_error(self.current_token.type) + return None + + left_expr: Expression = prefix_func() + while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value: + infix_func: Callable | None = self.infix_parse_functions.get(self.peek_token.type) + if infix_func is None: + return left_expr + + self.__next_token() + + left_expr = infix_func(left_expr) + + return left_expr + + def __parse_infix_expression(self, left_node: Expression) -> Expression: + infix_expr: Expression = InfixExpression(left_node=left_node, operator=self.current_token.literal) + + precedence = self.__current_precedence() + + self.__next_token() + + infix_expr.right_node = self.__parse_expression(precedence) + + return infix_expr + + def __parse_grouped_expression(self) -> Expression: + self.__next_token() + + expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST) + + if not self.__expect_peek(TokenType.RPAREN): + return None + + return expr + # endregion + + # region Prefix Methods + def __parse_int_literal(self) -> Expression: + int_lit: IntegerLiteral = IntegerLiteral() + + try: + int_lit.value = int(self.current_token.literal) + except: + self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an integer.") + return None + + return int_lit + + def __parse_float_literal(self) -> Expression: + float_lit: FloatLiteral = FloatLiteral() + + try: + float_lit.value = float(self.current_token.literal) + except: + self.errors.append(f"Could not parse \"{self.current_token.literal}\" as an float.") + return None + + return float_lit + # endregion \ No newline at end of file