From f9cd1dba29d142d40569ab0aec9affc1587cf23e Mon Sep 17 00:00:00 2001 From: SpookyDervish <78246495+SpookyDervish@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:05:03 +1100 Subject: [PATCH] VARIABLESSSS --- AST.py | 32 +++++++++ compiler.py | 152 +++++++++++++++++++++++++++++++++++++++++ debug/ast.json | 41 +++-------- debug/ir.ll | 13 ++++ environment.py | 23 +++++++ lexer.py | 20 +++++- lexer_token.py | 36 +++++++++- main.py | 32 +++++++-- plasma_parser.py | 37 +++++++++- tests/conditionals.pla | 4 +- tests/functions.pla | 4 +- tests/helloWorld.pla | 2 +- tests/lexer.pla | 6 -- tests/parser.pla | 1 - tests/test.pla | 2 + tests/types.pla | 4 +- tests/variables.pla | 22 +++--- 17 files changed, 366 insertions(+), 65 deletions(-) create mode 100644 compiler.py create mode 100644 debug/ir.ll create mode 100644 environment.py delete mode 100644 tests/lexer.pla delete mode 100644 tests/parser.pla create mode 100644 tests/test.pla diff --git a/AST.py b/AST.py index d38cddc..4c22413 100644 --- a/AST.py +++ b/AST.py @@ -7,6 +7,7 @@ class NodeType(Enum): # Statements ExpressionStatement = "ExpressionStatement" + AssignmentStatement = "AssignmentStatement" # Expressions InfixExpression = "InfixExpression" @@ -14,6 +15,7 @@ class NodeType(Enum): # Literals IntegerLiteral = "IntegerLiteral" FloatLiteral = "FloatLiteral" + IdentifierLiteral = "IdentifierLiteral" class Node: @abstractmethod @@ -56,6 +58,23 @@ class ExpressionStatement(Statement): "type": self.type().value, "expr": self.expr.json() } + +class AssignmentStatement(Statement): + def __init__(self, name: Expression = None, value: Expression = None, value_type: str = None) -> None: + self.name = name + self.value = value + self.value_type = value_type + + def type(self) -> NodeType: + return NodeType.AssignmentStatement + + def json(self) -> dict: + return { + "type": self.type().value, + "name": self.name.json(), + "value": self.value.json(), + "value_type": self.value_type + } # endregion # region Expressions @@ -103,4 +122,17 @@ class FloatLiteral(Expression): "type": self.type().value, "value": self.value } + +class IdentifierLiteral(Expression): + def __init__(self, value: str = None) -> None: + self.value: str = value + + def type(self) -> NodeType: + return NodeType.IdentifierLiteral + + def json(self) -> dict: + return { + "type": self.type().value, + "value": self.value + } # endregion \ No newline at end of file diff --git a/compiler.py b/compiler.py new file mode 100644 index 0000000..875c286 --- /dev/null +++ b/compiler.py @@ -0,0 +1,152 @@ +from llvmlite import ir + +from AST import Node, NodeType, Program, Expression +from AST import ExpressionStatement, AssignmentStatement +from AST import InfixExpression +from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral + +from environment import Environment + + +class Compiler: + def __init__(self) -> None: + self.type_map: dict[str, ir.type] = { + "bool": ir.IntType(1), + "byte": ir.IntType(8), + "short": ir.IntType(16), + "int": ir.IntType(32), + "long": ir.IntType(64), + "float": ir.FloatType(), + "double": ir.DoubleType() + } + + self.module: ir.Module = ir.Module("main") + self.builder: ir.IRBuilder = ir.IRBuilder() + self.environment: Environment = Environment() + + def compile(self, node: Node) -> None: + match node.type(): + case NodeType.Program: + self.__visit_program(node) + + # Statements + case NodeType.ExpressionStatement: + self.__visit_expression_statement(node) + case NodeType.AssignmentStatement: + self.__visit_assignment_statement(node) + + # Expressions + case NodeType.InfixExpression: + self.__visit_infix_expression(node) + + # region Visit Methods + def __visit_program(self, node: Program) -> None: + func_main: str = "main" + param_types: list[ir.Type] = [] + return_type = ir.Type = self.type_map["int"] + + fnty = ir.FunctionType(return_type, param_types) + func = ir.Function(self.module, fnty, func_main) + + block = func.append_basic_block(f"{func_main}_entry") + + self.builder = ir.IRBuilder(block) + + for stmt in node.statements: + self.compile(stmt) + + return_value: ir.Constant = ir.Constant(self.type_map["int"], 123) + self.builder.ret(return_value) + + # region Statements + def __visit_expression_statement(self, node: ExpressionStatement) -> None: + self.compile(node.expr) + + def __visit_assignment_statement(self, node: AssignmentStatement) -> None: + name: str = node.name.value + value: Expression = node.value + value_type: str = node.value_type # TODO: implemented + + value, Type = self.__resolve_value(node=value) + + if self.environment.lookup(name) is None: + # Define and allocate the new variable + ptr = self.builder.alloca(Type) + + # Storing the value to the ptr + self.builder.store(value, ptr) + + # Add the variable to the environment + self.environment.define(name, value, Type) + else: + ptr, _ = self.environment.lookup(name) + self.builder.store(value, ptr) + # endregion + + # region Expressions + def __visit_infix_expression(self, node: InfixExpression) -> None: + operator: str = node.operator + + left_value, left_type = self.__resolve_value(node.left_node) + right_value, right_type = self.__resolve_value(node.right_node) + + value = None + Type = None + if isinstance(right_type, ir.IntType) and isinstance(left_type, ir.IntType): + Type = self.type_map["int"] + match operator: + case "+": + value = self.builder.add(left_value, right_value) + case "-": + value = self.builder.sub(left_value, right_value) + case "*": + value = self.builder.mul(left_value, right_value) + case "/": + value = self.builder.sdiv(left_value, right_value) + case "%": + value = self.builder.srem(left_value, right_value) + case "^": + # TODO + pass + elif isinstance(right_type, ir.FloatType) and isinstance(left_type, ir.FloatType): + Type = self.type_map["float"] + match operator: + case "+": + value = self.builder.fadd(left_value, right_value) + case "-": + value = self.builder.fsub(left_value, right_value) + case "*": + value = self.builder.fmul(left_value, right_value) + case "/": + value = self.builder.fdiv(left_value, right_value) + case "%": + value = self.builder.frem(left_value, right_value) + case "^": + # TODO + pass + + return value, Type + # endregion + + # endregion + + # region Helper Methods + def __resolve_value(self, node: Expression) -> tuple[ir.Value, ir.Type]: + match node.type(): + case NodeType.IntegerLiteral: + node: IntegerLiteral = node + value, Type = node.value, self.type_map['int'] + return ir.Constant(Type, value), Type + case NodeType.FloatLiteral: + node: FloatLiteral = node + value, Type = node.value, self.type_map['float'] + return ir.Constant(Type, value), Type + case NodeType.IdentifierLiteral: + node: IdentifierLiteral = node + ptr, Type = self.environment.lookup(node.value) + return self.builder.load(ptr), Type + + # expression value + case NodeType.InfixExpression: + return self.__visit_infix_expression(node) + # endregion \ No newline at end of file diff --git a/debug/ast.json b/debug/ast.json index 505760c..bedc05d 100644 --- a/debug/ast.json +++ b/debug/ast.json @@ -2,36 +2,17 @@ "type": "Program", "statements": [ { - "ExpressionStatement": { - "type": "ExpressionStatement", - "expr": { - "type": "InfixExpression", - "left_node": { - "type": "InfixExpression", - "left_node": { - "type": "InfixExpression", - "left_node": { - "type": "IntegerLiteral", - "value": 5 - }, - "operator": "+", - "right_node": { - "type": "IntegerLiteral", - "value": 5 - } - }, - "operator": "*", - "right_node": { - "type": "IntegerLiteral", - "value": 3 - } - }, - "operator": "+", - "right_node": { - "type": "IntegerLiteral", - "value": 2 - } - } + "AssignmentStatement": { + "type": "AssignmentStatement", + "name": { + "type": "IdentifierLiteral", + "value": "myVar" + }, + "value": { + "type": "IntegerLiteral", + "value": 1 + }, + "value_type": "Bool" } } ] diff --git a/debug/ir.ll b/debug/ir.ll new file mode 100644 index 0000000..d4e3968 --- /dev/null +++ b/debug/ir.ll @@ -0,0 +1,13 @@ +; ModuleID = "main" +target triple = "x86_64-pc-windows-msvc" +target datalayout = "" + +define i32 @"main"() +{ +main_entry: + %".2" = alloca float + store float 0x3ff3ae1480000000, float* %".2" + %".4" = alloca i32 + store i32 456, i32* %".4" + ret i32 123 +} diff --git a/environment.py b/environment.py new file mode 100644 index 0000000..088d27d --- /dev/null +++ b/environment.py @@ -0,0 +1,23 @@ +from llvmlite import ir + + +class Environment: + def __init__(self, records: dict[str, tuple[ir.Value, ir.Type]] = None, parent = None, name: str = "global") -> None: + self.records: dict[str, tuple[ir.Value, ir.Type]] = records if records else {} + self.parent = parent + self.name: str = name + + def define(self, name: str, value: ir.Value, _type: ir.Type) -> ir.Value: + self.records[name] = (value, _type) + return value + + def lookup(self, name: str) -> tuple[ir.Value, ir.Type]: + return self.__resolve(name) + + def __resolve(self, name: str) -> tuple[ir.Value, ir.Type]: + if name in self.records: + return self.records[name] + elif self.parent: + return self.parent.__resolve(name) + else: + return None \ No newline at end of file diff --git a/lexer.py b/lexer.py index 1e50bcb..ef4de5a 100644 --- a/lexer.py +++ b/lexer.py @@ -1,4 +1,4 @@ -from lexer_token import Token, TokenType +from lexer_token import Token, TokenType, lookup_ident from typing import Any @@ -36,6 +36,9 @@ class Lexer: def __is_digit(self, char: str) -> bool: return "0" <= char and char <= "9" + def __is_letter(self, char: str) -> bool: + return "a" <= char and char <= "z" or "A" <= char and char <= "Z" or char == "_" + def __read_number(self) -> Token: start_pos: int = self.position dot_count: int = 0 @@ -61,6 +64,13 @@ class Lexer: else: return self.__new_token(TokenType.FLOAT, float(output)) + def __read_identifier(self) -> str: + position = self.position + while self.current_char is not None and (self.__is_letter(self.current_char) or self.current_char.isalnum()): + self.__read_char() + + return self.source[position:self.position] + def next_token(self) -> Token: tok: Token = None @@ -79,6 +89,8 @@ class Lexer: tok = self.__new_token(TokenType.POW, self.current_char) case "%": tok = self.__new_token(TokenType.MODULUS, self.current_char) + case "=": + tok = self.__new_token(TokenType.EQ, self.current_char) case "(": tok = self.__new_token(TokenType.LPAREN, self.current_char) case ")": @@ -98,6 +110,12 @@ class Lexer: case None: tok = self.__new_token(TokenType.EOF, "") case _: + if self.__is_letter(self.current_char): + literal: str = self.__read_identifier() + tt: TokenType = lookup_ident(literal) + tok = self.__new_token(tt, literal) + return tok + if self.__is_digit(self.current_char): tok = self.__read_number() return tok diff --git a/lexer_token.py b/lexer_token.py index 66cb220..951a3b4 100644 --- a/lexer_token.py +++ b/lexer_token.py @@ -8,6 +8,7 @@ class TokenType(Enum): ILLEGAL = "ILLEGAL" # Data types + IDENT = "IDENT" INT = "INT" FLOAT = "FLOAT" @@ -19,6 +20,9 @@ class TokenType(Enum): POW = "POW" MODULUS = "MODULUS" + # Assignment symbols + EQ = "EQ" + # Symbols LPAREN = "LPAREN" RPAREN = "RPAREN" @@ -29,6 +33,11 @@ class TokenType(Enum): COLON = "COLON" SEMICOLON = "SEMICOLON" + # Keywords + + # Typing + TYPE = "TYPE" + class Token: def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None: self.type = type @@ -40,4 +49,29 @@ class Token: return f"token[{self.type} : {self.literal} : Line {self.line_no} : Position {self.position}]" def __repr__(self) -> str: - return str(self) \ No newline at end of file + return str(self) + + +KEYWORDS: dict[str, TokenType] = { + +} + +ALT_KEYWORDS: dict[str, TokenType] = { + +} + +TYPE_KEYWORDS: list[str] = ["Int", "Float", "String", "Bool", "List", "Nil"] + +def lookup_ident(ident: str) -> TokenType: + tt: TokenType | None = KEYWORDS.get(ident) + if tt is not None: + return tt + + tt: TokenType | None = ALT_KEYWORDS.get(ident) + if tt is not None: + return tt + + if ident in TYPE_KEYWORDS: + return TokenType.TYPE + + return TokenType.IDENT \ No newline at end of file diff --git a/main.py b/main.py index b9abf12..31fd052 100644 --- a/main.py +++ b/main.py @@ -1,14 +1,20 @@ from lexer import Lexer from plasma_parser import Parser +from compiler import Compiler from AST import Program import json -LEXER_DEBUG: bool = True -PARSER_DEBUG: bool = True +from llvmlite import ir +from llvmlite.binding import targets +from ctypes import CFUNCTYPE, c_int, c_float + +LEXER_DEBUG: bool = False +PARSER_DEBUG: bool = False +COMPILER_DEBUG: bool = True if __name__ == "__main__": - with open("tests/parser.pla") as f: + with open("tests/test.pla") as f: code: str = f.read() if LEXER_DEBUG: @@ -19,11 +25,27 @@ if __name__ == "__main__": l: Lexer = Lexer(source=code) p: Parser = Parser(lexer=l) + program: Program = p.parse_program() + if len(p.errors) > 0: + for err in p.errors: + print(err) + exit(1) + if PARSER_DEBUG: print("===== PARSER DEBUG =====") - program: Program = p.parse_program() + #program: Program = p.parse_program() with open("debug/ast.json", "w") as f: json.dump(program.json(), f, indent=4) - print("Wrote AST to debug/ast.json successfully.") \ No newline at end of file + print("Wrote AST to debug/ast.json successfully.") + + c: Compiler = Compiler() + c.compile(program) + + module: ir.Module = c.module + module.triple = targets.get_default_triple() + + if COMPILER_DEBUG: + with open("debug/ir.ll", "w") as f: + f.write(str(module)) \ No newline at end of file diff --git a/plasma_parser.py b/plasma_parser.py index 20389fb..4353f0e 100644 --- a/plasma_parser.py +++ b/plasma_parser.py @@ -4,9 +4,9 @@ from typing import Callable from enum import Enum, auto from AST import Statement, Expression, Program -from AST import ExpressionStatement +from AST import ExpressionStatement, AssignmentStatement from AST import InfixExpression -from AST import IntegerLiteral, FloatLiteral +from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral class PrecedenceType(Enum): P_LOWEST = 0 @@ -63,6 +63,9 @@ class Parser: self.current_token = self.peek_token self.peek_token = self.lexer.next_token() + def __current_token_is(self, tt: TokenType) -> bool: + return self.current_token.type == tt + def __peek_token_is(self, tt: TokenType) -> bool: return self.peek_token.type == tt @@ -108,7 +111,11 @@ class Parser: # region Statement Methods def __parse_statement(self) -> Statement: - return self.__parse_expression_statement() + match self.current_token.type: + case TokenType.IDENT: + return self.__parse_assignment_statement() + case _: + return self.__parse_expression_statement() def __parse_expression_statement(self) -> ExpressionStatement: expr = self.__parse_expression(PrecedenceType.P_LOWEST) @@ -119,6 +126,30 @@ class Parser: stmt: ExpressionStatement = ExpressionStatement(expr=expr) return stmt + + def __parse_assignment_statement(self) -> AssignmentStatement: + # x: Int = 10; + stmt: AssignmentStatement = AssignmentStatement(name=IdentifierLiteral(self.current_token.literal)) + + if not self.__expect_peek(TokenType.COLON): + return None + + if not self.__expect_peek(TokenType.TYPE): + return None + + stmt.value_type = self.current_token.literal + + if not self.__expect_peek(TokenType.EQ): + return None + + self.__next_token() + + stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST) + + while not self.__current_token_is(TokenType.SEMICOLON) and not self.__current_token_is(TokenType.EOF): + self.__next_token() + + return stmt # endregion # region Expression Methods diff --git a/tests/conditionals.pla b/tests/conditionals.pla index b02c639..1dc8d9a 100644 --- a/tests/conditionals.pla +++ b/tests/conditionals.pla @@ -1,9 +1,9 @@ depend "io.pla" if (1 + 2 == 3) { - print("The universe is functional!") + print("The universe is functional!"); } unless { - print("WHAT, HOW") + print("WHAT, HOW"); } \ No newline at end of file diff --git a/tests/functions.pla b/tests/functions.pla index d560fd5..c520992 100644 --- a/tests/functions.pla +++ b/tests/functions.pla @@ -2,7 +2,7 @@ depend "io.pla" depend "string.pla" add = Func(a: Int, b: Int): Int { - return a + b + return a + b; } -print(String(add(1, 3))) \ No newline at end of file +print(String(add(1, 3))); \ No newline at end of file diff --git a/tests/helloWorld.pla b/tests/helloWorld.pla index 165f105..a32e625 100644 --- a/tests/helloWorld.pla +++ b/tests/helloWorld.pla @@ -1,2 +1,2 @@ depend "io.pla" -print("Hello, World!") \ No newline at end of file +print("Hello, World!"); \ No newline at end of file diff --git a/tests/lexer.pla b/tests/lexer.pla deleted file mode 100644 index 11747db..0000000 --- a/tests/lexer.pla +++ /dev/null @@ -1,6 +0,0 @@ -123 -0.456 -[] -{} -(1 + 3 * 2 ^ 4) % 2 -2 - 1 \ No newline at end of file diff --git a/tests/parser.pla b/tests/parser.pla deleted file mode 100644 index 7da8b56..0000000 --- a/tests/parser.pla +++ /dev/null @@ -1 +0,0 @@ -(5 + 5) * 3 + 2 \ No newline at end of file diff --git a/tests/test.pla b/tests/test.pla new file mode 100644 index 0000000..bcd41f3 --- /dev/null +++ b/tests/test.pla @@ -0,0 +1,2 @@ +a: Float = 1.23; +b: Int = 456; \ No newline at end of file diff --git a/tests/types.pla b/tests/types.pla index bc1a140..6e878a2 100644 --- a/tests/types.pla +++ b/tests/types.pla @@ -14,6 +14,6 @@ struct Person { speak = Func(sentence: String): Nil { print(sentence) -} +} -max: Person = {"Max", 17, } \ No newline at end of file +max: Person = {"Max", 17, speak, Gender.male}; \ No newline at end of file diff --git a/tests/variables.pla b/tests/variables.pla index cfcbef7..a51887d 100644 --- a/tests/variables.pla +++ b/tests/variables.pla @@ -1,16 +1,16 @@ depend "string.pla" depend "io.h" -myInt: Int = 123 -myDecimal: Float = 0.456 -myBoolean: Bool = true -myString: String = "Hello!\n" -myList: List = [1, "hi", true, [1, 2, 3], 0.789] +myInt: Int = 123; +myDecimal: Float = 0.456; +myBoolean: Bool = true; +myString: String = "Hello!\n"; +myList: List = [1, "hi", true, [1, 2, 3], 0.789]; -MY_CONSTANT: Const(String) = "foo bar" +MY_CONSTANT: Const(String) = "foo bar"; -print(String(myInt)) -print(String(myDecimal)) -print(String(myBoolean)) -print(myString) -print(String(myList)) \ No newline at end of file +print(String(myInt)); +print(String(myDecimal)); +print(String(myBoolean)); +print(myString); +print(String(myList)); \ No newline at end of file