2025-10-13 21:05:03 +11:00
|
|
|
from llvmlite import ir
|
|
|
|
|
|
|
|
|
|
from AST import Node, NodeType, Program, Expression
|
2025-10-14 21:23:11 +11:00
|
|
|
from AST import ExpressionStatement, AssignmentStatement, BlockStatement, ReturnStatement, FunctionStatement, ReassignStatement, IfStatement
|
2025-10-15 06:59:29 +11:00
|
|
|
from AST import InfixExpression, CallExpression
|
2025-10-14 21:23:11 +11:00
|
|
|
from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral, BooleanLiteral
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
from environment import Environment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Compiler:
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self.type_map: dict[str, ir.type] = {
|
2025-10-14 07:46:54 +11:00
|
|
|
"Bool": ir.IntType(1),
|
|
|
|
|
"Byte": ir.IntType(8),
|
|
|
|
|
"Short": ir.IntType(16),
|
|
|
|
|
"Int": ir.IntType(32),
|
|
|
|
|
"Long": ir.IntType(64),
|
|
|
|
|
"Float": ir.FloatType(),
|
|
|
|
|
"Double": ir.DoubleType()
|
2025-10-13 21:05:03 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.module: ir.Module = ir.Module("main")
|
|
|
|
|
self.builder: ir.IRBuilder = ir.IRBuilder()
|
|
|
|
|
self.environment: Environment = Environment()
|
2025-10-14 20:02:22 +11:00
|
|
|
self.errors: list[str] = []
|
2025-10-13 21:05:03 +11:00
|
|
|
|
2025-10-14 21:23:11 +11:00
|
|
|
self.__initialize_builtins()
|
|
|
|
|
|
|
|
|
|
def __initialize_builtins(self) -> None:
|
|
|
|
|
def __init_booleans() -> tuple[ir.GlobalVariable, ir.GlobalVariable]:
|
|
|
|
|
bool_type: ir.Type = self.type_map["Bool"]
|
|
|
|
|
|
|
|
|
|
true_var = ir.GlobalVariable(self.module, bool_type, "true")
|
|
|
|
|
true_var.initializer = ir.Constant(bool_type, 1)
|
|
|
|
|
true_var.global_constant = True
|
|
|
|
|
|
|
|
|
|
false_var = ir.GlobalVariable(self.module, bool_type, "false")
|
|
|
|
|
false_var.initializer = ir.Constant(bool_type, 0)
|
|
|
|
|
false_var.global_constant = True
|
|
|
|
|
|
|
|
|
|
return true_var, false_var
|
|
|
|
|
|
|
|
|
|
true_var, false_var = __init_booleans()
|
|
|
|
|
self.environment.define("true", true_var, true_var.type)
|
|
|
|
|
self.environment.define("false", false_var, false_var.type)
|
|
|
|
|
|
2025-10-13 21:05:03 +11:00
|
|
|
def compile(self, node: Node) -> None:
|
|
|
|
|
match node.type():
|
|
|
|
|
case NodeType.Program:
|
|
|
|
|
self.__visit_program(node)
|
|
|
|
|
|
|
|
|
|
# Statements
|
|
|
|
|
case NodeType.ExpressionStatement:
|
|
|
|
|
self.__visit_expression_statement(node)
|
|
|
|
|
case NodeType.AssignmentStatement:
|
|
|
|
|
self.__visit_assignment_statement(node)
|
2025-10-14 07:46:54 +11:00
|
|
|
case NodeType.FunctionStatement:
|
|
|
|
|
self.__visit_function_statement(node)
|
|
|
|
|
case NodeType.BlockStatement:
|
|
|
|
|
self.__visit_block_statement(node)
|
|
|
|
|
case NodeType.ReturnStatement:
|
|
|
|
|
self.__visit_return_statement(node)
|
2025-10-14 20:02:22 +11:00
|
|
|
case NodeType.ReassignStatement:
|
|
|
|
|
self.__visit_reassign_statement(node)
|
2025-10-14 21:23:11 +11:00
|
|
|
case NodeType.IfStatement:
|
|
|
|
|
self.__visit_if_statement(node)
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
# Expressions
|
|
|
|
|
case NodeType.InfixExpression:
|
|
|
|
|
self.__visit_infix_expression(node)
|
2025-10-15 06:59:29 +11:00
|
|
|
case NodeType.CallExpression:
|
|
|
|
|
self.__visit_call_expression(node)
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
# region Visit Methods
|
|
|
|
|
def __visit_program(self, node: Program) -> None:
|
|
|
|
|
for stmt in node.statements:
|
|
|
|
|
self.compile(stmt)
|
|
|
|
|
|
|
|
|
|
# region Statements
|
|
|
|
|
def __visit_expression_statement(self, node: ExpressionStatement) -> None:
|
|
|
|
|
self.compile(node.expr)
|
|
|
|
|
|
|
|
|
|
def __visit_assignment_statement(self, node: AssignmentStatement) -> None:
|
|
|
|
|
name: str = node.name.value
|
|
|
|
|
value: Expression = node.value
|
|
|
|
|
value_type: str = node.value_type # TODO: implemented
|
|
|
|
|
|
|
|
|
|
value, Type = self.__resolve_value(node=value)
|
|
|
|
|
|
|
|
|
|
if self.environment.lookup(name) is None:
|
|
|
|
|
# Define and allocate the new variable
|
|
|
|
|
ptr = self.builder.alloca(Type)
|
|
|
|
|
|
|
|
|
|
# Storing the value to the ptr
|
|
|
|
|
self.builder.store(value, ptr)
|
|
|
|
|
|
|
|
|
|
# Add the variable to the environment
|
2025-10-14 07:46:54 +11:00
|
|
|
self.environment.define(name, ptr, Type)
|
2025-10-13 21:05:03 +11:00
|
|
|
else:
|
|
|
|
|
ptr, _ = self.environment.lookup(name)
|
|
|
|
|
self.builder.store(value, ptr)
|
2025-10-14 07:46:54 +11:00
|
|
|
|
|
|
|
|
def __visit_block_statement(self, node: BlockStatement) -> None:
|
|
|
|
|
for stmt in node.statements:
|
|
|
|
|
self.compile(stmt)
|
|
|
|
|
|
|
|
|
|
def __visit_return_statement(self, node: ReturnStatement) -> None:
|
|
|
|
|
value: Expression = node.return_value
|
2025-10-14 19:22:59 +11:00
|
|
|
|
|
|
|
|
value, Type = self.__resolve_value(value)
|
2025-10-14 07:46:54 +11:00
|
|
|
|
|
|
|
|
self.builder.ret(value)
|
|
|
|
|
|
|
|
|
|
def __visit_function_statement(self, node: FunctionStatement) -> None:
|
|
|
|
|
name: str = node.name.value
|
|
|
|
|
body: BlockStatement = node.body
|
|
|
|
|
|
|
|
|
|
params: list[IdentifierLiteral] = node.parameters
|
|
|
|
|
param_types: list[ir.Type] = [] # TODO
|
|
|
|
|
|
|
|
|
|
return_type: ir.Type = self.type_map[node.return_type]
|
|
|
|
|
|
|
|
|
|
fnty: ir.FunctionType = ir.FunctionType(return_type, param_types)
|
|
|
|
|
func: ir.Function = ir.Function(self.module, fnty, name)
|
|
|
|
|
|
|
|
|
|
block: ir.Block = func.append_basic_block(f"{name}_entry")
|
|
|
|
|
|
|
|
|
|
previous_builder = self.builder
|
|
|
|
|
|
|
|
|
|
self.builder = ir.IRBuilder(block)
|
|
|
|
|
|
|
|
|
|
previous_env = self.environment
|
|
|
|
|
|
|
|
|
|
self.environment = Environment(parent=self.environment)
|
|
|
|
|
self.environment.define(name, func, return_type)
|
|
|
|
|
|
|
|
|
|
self.compile(body)
|
|
|
|
|
|
|
|
|
|
self.environment = previous_env
|
|
|
|
|
self.environment.define(name, func, return_type)
|
|
|
|
|
|
|
|
|
|
self.builder = previous_builder
|
|
|
|
|
|
2025-10-14 20:02:22 +11:00
|
|
|
def __visit_reassign_statement(self, node: ReassignStatement) -> None:
|
|
|
|
|
name: str = node.ident.value
|
|
|
|
|
value: Expression = node.right_value
|
|
|
|
|
|
|
|
|
|
value, Type = self.__resolve_value(value)
|
|
|
|
|
|
|
|
|
|
if self.environment.lookup(name) is None:
|
|
|
|
|
self.errors.append(f"Identifier {name} has not been declared before it was re-assigned.")
|
|
|
|
|
else:
|
|
|
|
|
ptr, _ = self.environment.lookup(name)
|
|
|
|
|
self.builder.store(value, ptr)
|
2025-10-14 21:23:11 +11:00
|
|
|
|
|
|
|
|
def __visit_if_statement(self, node: IfStatement) -> None:
|
|
|
|
|
condition = node.condition
|
|
|
|
|
consequence = node.consequence
|
|
|
|
|
alternative = node.alternative
|
|
|
|
|
|
|
|
|
|
test, _ = self.__resolve_value(condition)
|
|
|
|
|
|
|
|
|
|
if alternative is None:
|
|
|
|
|
with self.builder.if_then(test):
|
|
|
|
|
self.compile(consequence)
|
|
|
|
|
else:
|
|
|
|
|
with self.builder.if_else(test) as (true, otherwise):
|
|
|
|
|
# Creating a condition branch
|
|
|
|
|
# condition
|
|
|
|
|
# / \
|
|
|
|
|
# true false
|
|
|
|
|
# / \
|
|
|
|
|
# / \
|
|
|
|
|
# if block else block
|
|
|
|
|
with true:
|
|
|
|
|
self.compile(consequence)
|
|
|
|
|
|
|
|
|
|
with otherwise:
|
|
|
|
|
self.compile(alternative)
|
2025-10-14 07:46:54 +11:00
|
|
|
# endregion
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
# region Expressions
|
|
|
|
|
def __visit_infix_expression(self, node: InfixExpression) -> None:
|
|
|
|
|
operator: str = node.operator
|
|
|
|
|
|
|
|
|
|
left_value, left_type = self.__resolve_value(node.left_node)
|
|
|
|
|
right_value, right_type = self.__resolve_value(node.right_node)
|
|
|
|
|
|
|
|
|
|
value = None
|
|
|
|
|
Type = None
|
|
|
|
|
if isinstance(right_type, ir.IntType) and isinstance(left_type, ir.IntType):
|
2025-10-14 07:46:54 +11:00
|
|
|
Type = self.type_map["Int"]
|
2025-10-13 21:05:03 +11:00
|
|
|
match operator:
|
|
|
|
|
case "+":
|
|
|
|
|
value = self.builder.add(left_value, right_value)
|
|
|
|
|
case "-":
|
|
|
|
|
value = self.builder.sub(left_value, right_value)
|
|
|
|
|
case "*":
|
|
|
|
|
value = self.builder.mul(left_value, right_value)
|
|
|
|
|
case "/":
|
|
|
|
|
value = self.builder.sdiv(left_value, right_value)
|
|
|
|
|
case "%":
|
|
|
|
|
value = self.builder.srem(left_value, right_value)
|
|
|
|
|
case "^":
|
|
|
|
|
# TODO
|
|
|
|
|
pass
|
2025-10-14 21:23:11 +11:00
|
|
|
case "<":
|
|
|
|
|
value = self.builder.icmp_signed('<', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case "<=":
|
|
|
|
|
value = self.builder.icmp_signed('<=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case ">":
|
|
|
|
|
value = self.builder.icmp_signed('>', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case ">=":
|
|
|
|
|
value = self.builder.icmp_signed('>=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case "==":
|
|
|
|
|
value = self.builder.icmp_signed('==', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
2025-10-14 21:30:56 +11:00
|
|
|
case "!=":
|
|
|
|
|
value = self.builder.icmp_signed('!=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
2025-10-13 21:05:03 +11:00
|
|
|
elif isinstance(right_type, ir.FloatType) and isinstance(left_type, ir.FloatType):
|
2025-10-14 07:46:54 +11:00
|
|
|
Type = self.type_map["Float"]
|
2025-10-13 21:05:03 +11:00
|
|
|
match operator:
|
|
|
|
|
case "+":
|
|
|
|
|
value = self.builder.fadd(left_value, right_value)
|
|
|
|
|
case "-":
|
|
|
|
|
value = self.builder.fsub(left_value, right_value)
|
|
|
|
|
case "*":
|
|
|
|
|
value = self.builder.fmul(left_value, right_value)
|
|
|
|
|
case "/":
|
|
|
|
|
value = self.builder.fdiv(left_value, right_value)
|
|
|
|
|
case "%":
|
|
|
|
|
value = self.builder.frem(left_value, right_value)
|
|
|
|
|
case "^":
|
|
|
|
|
# TODO
|
|
|
|
|
pass
|
2025-10-14 21:23:11 +11:00
|
|
|
case "<":
|
|
|
|
|
value = self.builder.fcmp_ordered('<', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case "<=":
|
|
|
|
|
value = self.builder.fcmp_ordered('<=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case ">":
|
|
|
|
|
value = self.builder.fcmp_ordered('>', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case ">=":
|
|
|
|
|
value = self.builder.fcmp_ordered('>=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
|
|
|
|
case "==":
|
|
|
|
|
value = self.builder.fcmp_ordered('==', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
2025-10-14 21:30:56 +11:00
|
|
|
case "!=":
|
|
|
|
|
value = self.builder.fcmp_ordered('!=', left_value, right_value)
|
|
|
|
|
Type = ir.IntType(1)
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
return value, Type
|
2025-10-15 06:59:29 +11:00
|
|
|
|
|
|
|
|
def __visit_call_expression(self, node: CallExpression) -> None:
|
|
|
|
|
name: str = node.function.value
|
|
|
|
|
params: list[Expression] = node.arguments
|
|
|
|
|
|
|
|
|
|
args = []
|
|
|
|
|
types = []
|
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
|
|
match name:
|
|
|
|
|
case _:
|
|
|
|
|
func, ret_type = self.environment.lookup(name)
|
|
|
|
|
ret = self.builder.call(func, args)
|
|
|
|
|
|
|
|
|
|
return ret, ret_type
|
2025-10-13 21:05:03 +11:00
|
|
|
# endregion
|
|
|
|
|
|
|
|
|
|
# endregion
|
|
|
|
|
|
|
|
|
|
# region Helper Methods
|
|
|
|
|
def __resolve_value(self, node: Expression) -> tuple[ir.Value, ir.Type]:
|
|
|
|
|
match node.type():
|
|
|
|
|
case NodeType.IntegerLiteral:
|
|
|
|
|
node: IntegerLiteral = node
|
2025-10-14 07:46:54 +11:00
|
|
|
value, Type = node.value, self.type_map['Int']
|
2025-10-13 21:05:03 +11:00
|
|
|
return ir.Constant(Type, value), Type
|
|
|
|
|
case NodeType.FloatLiteral:
|
|
|
|
|
node: FloatLiteral = node
|
2025-10-14 07:46:54 +11:00
|
|
|
value, Type = node.value, self.type_map['Float']
|
2025-10-13 21:05:03 +11:00
|
|
|
return ir.Constant(Type, value), Type
|
|
|
|
|
case NodeType.IdentifierLiteral:
|
|
|
|
|
node: IdentifierLiteral = node
|
|
|
|
|
ptr, Type = self.environment.lookup(node.value)
|
|
|
|
|
return self.builder.load(ptr), Type
|
2025-10-14 21:23:11 +11:00
|
|
|
case NodeType.BooleanLiteral:
|
|
|
|
|
node: BooleanLiteral = node
|
|
|
|
|
return ir.Constant(ir.IntType(1), 1 if node.value else 0), ir.IntType(1)
|
2025-10-13 21:05:03 +11:00
|
|
|
|
|
|
|
|
# expression value
|
|
|
|
|
case NodeType.InfixExpression:
|
|
|
|
|
return self.__visit_infix_expression(node)
|
2025-10-15 06:59:29 +11:00
|
|
|
case NodeType.CallExpression:
|
|
|
|
|
return self.__visit_call_expression(node)
|
2025-10-13 21:05:03 +11:00
|
|
|
# endregion
|