from generators.generator import Generator, SymbolTable from ground_ast import * from error import traceback from optimizers.x86_64 import X86_64Optimizer class X86_64Generator(Generator): def __init__(self, ast, code, output_path): super().__init__(ast, code, output_path) self.stack_size = 0 self.function_lines = [] def init(self): self.lines.append("global _start\n") self.lines.append("_start:\n\t") # generate code self.generate() self.write() def push(self, reg: str, lines): lines.append("push " + reg + "\n\t") self.stack_size += 1 def pop(self, reg: str, lines): lines.append("pop " + reg + "\n\t") self.stack_size -= 1 def get_variable(self, lines, var_name: str, reg: str, float: bool = False, offset: int = 0, no_stack_pop: bool = True): var = self.current_var_scope.lookup(var_name) var_pos = self.get_var_pos(var_name) try: #print(var["type"]) if var["type"] == FloatNode: conversion = { "rax": "xmm0", "rbx": "xmm1", "rdi": "xmm0" # ... } lines.append(f"movsd {conversion[reg]}, [rsp + {var_pos + offset}]\n\t") lines.append("add rsp, 8\n\t") self.stack_size += 1 elif var["type"] in [IntNode,StringNode]: if no_stack_pop: lines.append(f"mov {reg}, [rsp + {var_pos + offset}]\n\t") else: self.push( f"QWORD [rsp + {var_pos + offset}]", lines ) self.pop(reg, lines) elif var["type"] == BoolNode: if no_stack_pop: lines.append(f"mov {reg}, [rsp + {var_pos + offset}]\n\t") else: self.push( f"QWORD [rsp + {var_pos + offset}]", lines ) self.pop(reg, lines) except TypeError: # variable doesnt exist traceback(self.code, "NameError", f"\"{var_name}\" is not defined.") return var["type"] def get_var_pos(self, var_name: str): try: return (self.stack_size - self.current_var_scope.lookup(var_name)['stack_loc'] - 1) * 8 except TypeError: # not defined traceback(self.code, "TypeError", f"\"{var_name}\" is not defined.") def create_variable(self, lines, var_name: str, starting_value, var_type: Any = None): if var_type == None: var_type = type(starting_value) stack_location = self.stack_size if type(starting_value) == IntNode: lines.append(f"mov rax, {starting_value.value}\n\t") self.push("rax", lines) elif type(starting_value) == VarRefNode: var_type = self.get_variable(lines, starting_value.var_name, "rax") if var_type == FloatNode: lines.append("sub rsp, 8\n\t") lines.append("movsd [rsp], xmm0\n\t") else: self.push("rax", lines) elif type(starting_value) == FloatNode: name = self.add_constant(starting_value.value) lines.append("sub rsp, 8\n\t") # make space on the stack lines.append(f"movsd xmm0, {name}\n\t") lines.append("movsd [rsp], xmm0\n\t") self.stack_size += 1 elif type(starting_value) == StringNode: string_pointer = self.add_constant( starting_value.value ) string_len = self.add_constant(f"equ $ - {string_pointer[1:-1]}", no_string=True) lines.append(f"lea rax, {string_pointer}\n\t") self.push("rax", lines) lines.append(f"mov rax, {string_len[1:-1]}\n\t") self.push("rax", lines) elif type(starting_value) == BoolNode: self.push("1" if starting_value.value else "0", lines) elif type(starting_value) == str: if starting_value.startswith("xmm"): # floating point stuff lines.append("sub rsp, 8\n\t") # make space lines.append(f"movsd [rsp], {starting_value}\n\t") self.stack_size += 1 else: self.push(starting_value, lines) self.current_var_scope.define(var_name, {"stack_loc": stack_location, "type": var_type}) def change_variable(self, lines, var_name: str, new_value): var_pos = self.get_var_pos(var_name) if type(new_value) == IntNode: # we're changing a variable to a number lines.append(f"mov QWORD [rsp + {var_pos}], {new_value.value}\n\t") self.current_var_scope.table[var_name]["type"] = IntNode elif type(new_value) == VarRefNode: # we're changing a variable to the value of another variable var_type = self.get_variable(lines, new_value.var_name, "rax") lines.append(f"mov QWORD [rsp + {var_pos}], rax\n\t") self.current_var_scope.table[var_name]["type"] = var_type elif type(new_value) == StringNode: # we're changing a variable to a string lines.append(f"mov QWORD [rsp + {var_pos}], 0\n\t") string_pointer = self.add_constant(new_value.value) string_len = self.add_constant(f"equ $ - {string_pointer[1:-1]}", no_string=True) #lines.append(f"lea QWORD [rsp + {var_pos}], {string_pointer}\n\t") ##lines.append(f"mov QWORD [rsp + {var_pos + 8}], {string_len[1:-1]}\n\t") self.current_var_scope.table[var_name]["stack_loc"] = self.stack_size lines.append(f"lea rax, {string_pointer}\n\t") self.push("rax", lines) lines.append(f"mov rax, {string_len[1:-1]}\n\t") self.push("rax", lines) self.current_var_scope.table[var_name]["type"] = StringNode elif type(new_value) == BoolNode: lines.append(f"mov QWORD [rsp + {var_pos}], {'1' if new_value.value else '0'}\n\t") self.current_var_scope.table[var_name]["type"] = BoolNode elif type(new_value) == str: # we're changing a variable to the value of a register lines.append(f"mov QWORD [rsp + {var_pos}], {new_value}\n\t") self.current_var_scope.table[var_name]["type"] = IntNode def generate_LabelDecNode(self, node: LabelDecNode, lines): self.labels.append(node.name) lines.append("." + node.name + ":\n\t") def generate_FunctionNode(self, node: FunctionNode, lines): if node.return_type == None: traceback(self.code, "SyntaxError", "Functions require a return type.") if node.name == None: traceback(self.code, "SyntaxError", "Functions require a name.") self.current_var_scope = SymbolTable(self.current_var_scope) # function boilerplate self.function_lines.append(node.name + ":") self.push("rbp", self.function_lines) self.function_lines.append("mov rbp, rsp\n\t") for inst in node.statements: self.generate_InstructionNode(inst, self.function_lines) self.add_function(node) def generate_InstructionNode(self, node: InstructionNode, lines = None): if lines == None: lines = self.lines ### MISC ### if node.instruction == "end": self.clamp_instruction_args(node, 1, 1) if not type(node.arguments[0]) in [IntNode, VarRefNode]: # example: "end true" traceback(self.code, "TypeError", f"end expects an integer, not {node.arguments[0]}") lines.append("mov rax, 60\n\t") if type(node.arguments[0]) in [IntNode,BoolNode]: lines.append("mov rdi, " + str(node.arguments[0].value) + "\n\t") elif isinstance(node.arguments[0], VarRefNode): var_type = self.get_variable(lines, node.arguments[0].var_name, "rdi", no_stack_pop=True) if var_type == FloatNode: lines.append("cvttsd2si rdi, xmm0\n\t") else: if var_type not in [IntNode,BoolNode]: traceback(self.code, "TypeError", f"end expects an integer, not \"{var_type}\"") #lines.append("mov rdi, " + str(self.get_variable(lines, node.arguments[0].var_name)) + "\n\t") lines.append("syscall\n\t") ### VARIABLE INSTRUCTIONS ### elif node.instruction == "set": self.clamp_instruction_args(node, 2, 2) if not isinstance(node.arguments[0], VarPointerNode): traceback(self.code, "TypeError", f"the first argument of set should be a variable pointer, not \"{node.arguments[0]}\"") if type(node.arguments[1]) not in [IntNode, VarRefNode, FloatNode, StringNode, BoolNode]: traceback(self.code, "TypeError", f"variables can't be of type \"{type(node.arguments[1])}\"") variable_exists = self.current_var_scope.lookup(node.arguments[0].var_name) != None if not variable_exists: # create a new variable self.create_variable(lines, node.arguments[0].var_name, node.arguments[1]) else: # modify the existing one self.change_variable(lines, node.arguments[0].var_name, node.arguments[1]) ### MATH INSTRUCTIONS ### elif node.instruction in ["add", "subtract", "multiply"]: self.clamp_instruction_args(node, 3, 3) if type(node.arguments[2]) != VarPointerNode: traceback(self.code, "TypeError", f"the destination of the {node.instruction} command must be a variable pointer, not \"{node.arguments[2]}\"") # bro this entire god damn instruction is just error handling 😔 number1_type = None number2_type = None arg2 = "rbx" if isinstance(node.arguments[0], VarRefNode): number1_type = self.get_variable(lines, node.arguments[0].var_name, "rax") elif isinstance(node.arguments[0], FloatNode) or isinstance(node.arguments[1], FloatNode): number1_type = FloatNode constant_name = self.add_constant(node.arguments[0].value) lines.append(f"movsd xmm0, {constant_name}\n\t") elif isinstance(node.arguments[0], IntNode): number1_type = IntNode #arg1 = node.arguments[0].value lines.append(f"mov rax, {node.arguments[0].value}\n\t") else: traceback(self.code, "TypeError", f"expected a variable reference or number for argument 1 of add, got {node.arguments[0]}") if isinstance(node.arguments[1], VarRefNode): number2_type = self.get_variable(lines, node.arguments[1].var_name, "rbx") elif number1_type == FloatNode or isinstance(node.arguments[1], FloatNode): number2_type = FloatNode constant_name = self.add_constant(node.arguments[1].value) lines.append(f"movsd xmm1, {constant_name}\n\t") elif isinstance(node.arguments[1], IntNode): number2_type = IntNode arg2 = node.arguments[1].value #lines.append(f"mov rbx, {node.arguments[1].value}\n\t") else: traceback(self.code, "TypeError", f"expected a variable reference or number for argument 2 of add, got {node.arguments[1]}") # TODO: numbers can be added to numbers, but numbers cant be added to strings. but strings can be added to strings, etc... if number1_type not in [IntNode, FloatNode] or number2_type not in [IntNode, FloatNode]: traceback(self.code, "TypeError", f"Unsupported operation \"f{node.instruction}\" for \"{node.arguments[0]}\" and \"{node.arguments[1]}\".") if number1_type == IntNode and number2_type == IntNode: if node.instruction == "add": lines.append(f"add rax, {arg2}\n\t") elif node.instruction == "subtract": lines.append(f"sub rax, {arg2}\n\t") elif node.instruction == "multiply": lines.append(f"imul rax, {arg2}\n\t") else: if node.instruction == "add": lines.append(f"addsd xmm0, xmm1\n\t") elif node.instruction == "subtract": lines.append(f"subsd xmm0, xmm1\n\t") elif node.instruction == "multiply": lines.append(f"mulsd xmm0, xmm1\n\t") is_integer = number1_type == IntNode and number2_type == IntNode starting_reg = "rax" if is_integer else "xmm0" if self.current_var_scope.lookup(node.arguments[2].var_name) == None: # we need to create a variable for the destination self.create_variable(lines, node.arguments[2].var_name, starting_reg, IntNode if is_integer else FloatNode) else: self.change_variable(lines, node.arguments[2].var_name, starting_reg) elif node.instruction == "divide": self.clamp_instruction_args(node, 3, 3) if type(node.arguments[2]) != VarPointerNode: traceback(self.code, "TypeError", f"the destination of the divide command must be a variable pointer, not \"{node.arguments[2]}\"") # bro this entire god damn instruction is just error handling 😔 arg2 = "xmm1" number1_type = None number2_type = None if isinstance(node.arguments[0], VarRefNode): number1_type = self.get_variable(lines, node.arguments[0].var_name, "rax", True) elif type(node.arguments[0]) in [IntNode, FloatNode]: number1_type = FloatNode constant_name = self.add_constant(node.arguments[0].value) lines.append(f"movsd xmm0, {constant_name}\n\t") else: traceback(self.code, "TypeError", f"expected a variable reference or number for argument 1 of divide, got {node.arguments[0]}") if isinstance(node.arguments[1], VarRefNode): number2_type = self.get_variable(lines, node.arguments[1].var_name, "rbx", True) elif type(node.arguments[1]) in [IntNode, FloatNode]: number2_type = FloatNode constant_name = self.add_constant(node.arguments[1].value) lines.append(f"movsd xmm1, {constant_name}\n\t") else: traceback(self.code, "TypeError", f"expected a variable reference or number for argument 2 of divide, got {node.arguments[1]}") # TODO: numbers can be added to numbers, but numbers cant be added to strings. but strings can be added to strings, etc... if number1_type not in [IntNode, FloatNode] or number2_type not in [IntNode, FloatNode]: traceback(self.code, "TypeError", f"Unsupported operation \"divide\" for \"{node.arguments[0]}\" and \"{node.arguments[1]}\".") lines.append(f"divsd xmm0, xmm1\n\t") if self.current_var_scope.lookup(node.arguments[2].var_name) == None: # we need to create a variable for the destination self.create_variable(lines, node.arguments[2].var_name, "xmm0", FloatNode) else: self.change_variable(lines, node.arguments[2].var_name, "xmm0") elif node.instruction == "stdout": self.clamp_instruction_args(node, 1, 1) arg = node.arguments[0] printed_value = arg.__str__() if isinstance(arg, VarRefNode): var_type = self.get_variable(lines, arg.var_name, "rsi", offset=0, no_stack_pop=True) if var_type == StringNode: self.get_variable(lines, arg.var_name, "rdx", offset=-8, no_stack_pop=True) else: traceback(self.code, "TypeError", f"You can't print \"{var_type(None).__repr__()}\", try converting it to a string first.") else: string_pointer = self.add_constant(printed_value)[1:-1] string_len = self.add_constant(f"equ $ - {string_pointer}", True)[1:-1] lines.append(f"mov rsi, {string_pointer}\n\t") lines.append(f"mov rdx, {string_len}\n\t") # length lines.append("mov rax, 1\n\t") # sys_write syscall lines.append("mov rdi, 1\n\t") # a file descriptor of 1 is stdout lines.append("syscall\n\t") elif node.instruction == "jump": self.clamp_instruction_args(node, 1, 1) if not isinstance(node.arguments[0], LabelRefNode): traceback(self.code, "TypeError", f"jump expects a label reference as the first argument, not \"{node.arguments[0]}\"") lines.append(f"jmp .{node.arguments[0].name}\n\t") elif node.instruction == "if": self.clamp_instruction_args(node, 2, 2) if not type(node.arguments[0]) in [VarRefNode,BoolNode,StringNode,FloatNode,IntNode]: traceback(self.code, "TypeError", f"if expects a value or variable refernce as the first argument, not \"{node.arguments[0]}\"") elif not isinstance(node.arguments[1], LabelRefNode): traceback(self.code, "TypeError", f"if expects a label reference as the second argument, not \"{node.arguments[1]}\"") if isinstance(node.arguments[0], BoolNode): if node.arguments[0].value: lines.append(f"jmp .{node.arguments[1].name}\n\t") #lines.append("mov eax, 1") #lines.append(f"cmp eax, {1 if node.arguments[0].value else 0}") elif type(node.arguments[0]) in [IntNode,FloatNode]: if node.arguments[0].value != 0: lines.append(f"jmp .{node.arguments[1].name}\n\t") elif isinstance(node.arguments[0], VarRefNode): self.get_variable(lines, node.arguments[0].var_name, "eax") lines.append(f"test eax, eax\n\t") lines.append(f"jnz .{node.arguments[1].name}\n\t") elif node.instruction in ["equal", "inequal", "greater", "lesser"]: self.clamp_instruction_args(node, 3, 3) if not type(node.arguments[0]) in [VarRefNode,BoolNode,FloatNode,IntNode]: traceback(self.code, "TypeError", f"{node.instruction} expects a value or variable refernce as the first argument, not \"{node.arguments[0].__repr__()}\"") elif not type(node.arguments[1]) in [VarRefNode,BoolNode,FloatNode,IntNode]: traceback(self.code, "TypeError", f"{node.instruction} expects a value or variable refernce as the second argument, not \"{node.arguments[1].__repr__()}\"") elif not isinstance(node.arguments[2], VarPointerNode): traceback(self.code, "TypeError", f"the third argument of {node.instruction} should be a variable pointer, not \"{node.arguments[2].__repr__()}\"") arg1 = None arg2 = None if isinstance(node.arguments[0], BoolNode): lines.append(f"mov rax, {int(node.arguments[0].value)}\n\t") arg1 = "rax" elif isinstance(node.arguments[0], IntNode): lines.append(f"mov rax, {node.arguments[0].value}\n\t") arg1 = "rax" elif isinstance(node.arguments[0], FloatNode): const_name = self.add_constant(node.arguments[0].value) lines.append(f"movsd xmm0, {const_name}\n\t") arg1 = "xmm0" elif isinstance(node.arguments[0], VarRefNode): self.get_variable(lines, node.arguments[0].var_name, "rax") arg1 = "rax" if isinstance(node.arguments[1], BoolNode): lines.append(f"mov rbx, {int(node.arguments[1].value)}\n\t") arg2 = "rbx" elif isinstance(node.arguments[1], IntNode): lines.append(f"mov rbx, {node.arguments[1].value}\n\t") arg2 = "rbx" elif isinstance(node.arguments[1], FloatNode): const_name = self.add_constant(node.arguments[1].value) lines.append(f"movsd xmm1, {const_name}\n\t") arg2 = "xmm1" elif isinstance(node.arguments[1], VarRefNode): self.get_variable(lines, node.arguments[1].var_name, "rbx") arg2 = "rbx" lines.append(f"cmp {arg1}, {arg2}\n\t") instructions = { "equal": "sete", "inequal": "setne", "greater": "setg", "lesser": "setl" } lines.append(f"{instructions[node.instruction]} al\n\t") lines.append("movzx rax, al\n\t") var_name = node.arguments[2].var_name if self.current_var_scope.lookup(var_name) == None: self.create_variable(lines, var_name, "rax", BoolNode) else: self.change_variable(lines, var_name, "rax") elif node.instruction == "endfun": return elif node.instruction == "return": self.clamp_instruction_args(node, 0, 1) if len(node.arguments) == 1: if isinstance(node.arguments[0], IntNode): lines.append(f"mov rax, {node.arguments[0].value}") elif isinstance(node.arguments[0], BoolNode): lines.append(f"mov rax, {int(node.arguments[0].value)}") elif isinstance(node.arguments[0], FloatNode): lines.append(f"mov xmm0, {node.arguments[0].value}") #self.get_variable(lines, node.arguments[0].var_name, "rax") else: lines.append("mov rax, 0\n\t") self.pop("rbp", lines) lines.append("ret\n\t") old_scope = self.current_var_scope self.current_var_scope = self.current_var_scope.parent del old_scope elif node.instruction == "call": self.clamp_instruction_args(node, 1, 2) if not isinstance(node.arguments[0], FunctionCallNode): traceback(self.code, "TypeError", "Argument 1 of call needs to be a function reference.") func = self.functions.get(node.arguments[0].func_name, None) if not func: traceback(self.code, "TypeError", f"Function \"{node.arguments[0].func_name}\" is not defined.") if len(self.arg_list) != len(func.args): traceback(self.code, "TypeError", f"Function \"{node.arguments[0].func_name}\" takes {len(func.args)} arguments, but got {len(self.arg_list)}") # stack alignment if self.stack_size % 2 == 0: lines.append("sub rsp, 8\n\t") # align the stack to 16 bytes for i, arg in enumerate(self.arg_list): #self.create_variable(lines, func.args[i].name, arg, func.args[i].arg_type) value = "" if isinstance(arg, IntNode): value = arg.value elif isinstance(arg, StringNode): value = self.add_constant(arg.value) else: traceback(self.code, "CallError", f"Can't pass {arg} to function.") if i == 0: lines.append(f"mov rdi, {value}") elif i == 1: lines.append(f"mov rsi, {value}") elif i == 2: lines.append(f"mov rdx, {value}") elif i == 3: lines.append(f"mov rcx, {value}") elif i == 4: lines.append(f"mov r8, {value}") elif i == 5: lines.append(f"mov r9, {value}") else: traceback(self.code, "CallError", "Functions with more than 6 args aren't supported yet, sorry...") lines.append(f"call {node.arguments[0].func_name}\n\t") self.lines.append(f"add rsp, {len(self.arg_list) * 8}") self.arg_list.clear() if len(node.arguments) == 2: if not isinstance(node.arguments[1], VarPointerNode): traceback(self.code, "TypeError", "Argument 1 of call needs to be a variable pointer.") if self.current_var_scope.lookup(node.arguments[1].var_name): self.change_variable(lines, node.arguments[1].var_name, "rax") else: self.create_variable(lines, node.arguments[1].var_name, "rax", self.ground_type_to_node(self.functions.get(node.arguments[0].func_name).return_type)) elif node.instruction == "pusharg": self.clamp_instruction_args(node, 1, 1) #if type(node.arguments[0]) not in [IntNode]: # traceback(self.code, "TypeError", f"A {node.arguments[0]} can't be passed as an argument.") self.arg_list.append(node.arguments[0]) else: raise NotImplementedError(f"A generate method hasn't been made for the \"{node.instruction}\" instruction.") def write(self): with open(self.output_path + ".asm", "w") as f: f.write("; ~~~ Auto generated by the GroundPY compiler for Linux x86_64 targets. ~~~\n\n") f.write("section .data\n") for name, const in self.constants.items(): value = const["value"] f.write("." + name + ": ") value_type = type(value) if value_type == str: if not const["no_string"]: final = f'db "' + value.replace("\\n", "\", 10, \"") + "\", 0" final = final.replace(", \"\", ", ", ") f.write(final) else: f.write(value) elif value_type == float or value_type == int: f.write(f"dq {float(value)}") f.write("\n") f.write("section .text\n") optimizer = X86_64Optimizer(self.lines + self.function_lines) f.writelines(optimizer.optimize_until_stable())