From 9d345724ea2ee7a9e67778cf16c3f9330fc5b494 Mon Sep 17 00:00:00 2001 From: Maxwell Jeffress Date: Sun, 16 Nov 2025 18:45:13 +1100 Subject: [PATCH] Add simple C++ compiler --- src/main.cpp | 465 ++++++++++++++++++++++++++++++++++++++++++-- tests/compileme.ppl | 7 + 2 files changed, 459 insertions(+), 13 deletions(-) create mode 100644 tests/compileme.ppl diff --git a/src/main.cpp b/src/main.cpp index 7c418bf..286a3b8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,11 +8,12 @@ #include #include #include +#include #include "../libs/linenoise.hpp" enum class ValueTypes { - Identifier, Int, Double, String, List, Function, Type, Nil + Identifier, Int, Double, String, List, Function, Type, Nil, Auto }; enum class ParserErrorType { @@ -31,6 +32,41 @@ enum class InterpreterErrorType { UnknownEnvironment }; +enum class CTranspilerErrorType { + UnknownInstruction, + UnexpectedToken, + IncorrectTokenType, + UnknownEnvironment +}; + +class CTranspilerError : public std::exception { + std::string message; + CTranspilerErrorType type; + public: + explicit CTranspilerError(CTranspilerErrorType type) : type(type) { + std::stringstream ss; + ss << "Compiling error: "; + switch (type) { + case CTranspilerErrorType::UnknownInstruction: + ss << "UnknownInstruction"; + break; + case CTranspilerErrorType::UnexpectedToken: + ss << "UnexpectedToken"; + break; + case CTranspilerErrorType::IncorrectTokenType: + ss << "IncorrectTokenType"; + break; + case CTranspilerErrorType::UnknownEnvironment: + ss << "UnknownEnvironment"; + break; + } + message = ss.str(); + } + [[nodiscard]] const char *what() const noexcept override { + return message.c_str(); + } +}; + class InterpretingError : public std::exception { std::string message; InterpreterErrorType errorType; @@ -121,43 +157,43 @@ class Value { std::variant, Function, ValueTypes> value; public: ValueTypes type; - std::optional getInt() { + [[nodiscard]] std::optional getInt() const { if (std::holds_alternative(value)) { return std::get(value); } return {}; } - std::optional getDouble() { + [[nodiscard]] std::optional getDouble() const { if (std::holds_alternative(value)) { return std::get(value); } return {}; } - std::optional getString() { + [[nodiscard]] std::optional getString() const { if (std::holds_alternative(value)) { return std::get(value); } return {}; } - std::optional> getList() { + [[nodiscard]] std::optional> getList() const { if (std::holds_alternative>(value)) { return std::get>(value); } return {}; } - std::optional getFunction() { + [[nodiscard]] std::optional getFunction() const { if (std::holds_alternative(value)) { return std::get(value); } return {}; } - std::optional getType() { + [[nodiscard]] std::optional getType() const { if (std::holds_alternative(value)) { return std::get(value); } return {}; } - void print() { + void print() const { switch (type) { case ValueTypes::String: { std::cout << getString().value(); @@ -182,6 +218,7 @@ class Value { } listElement.print(); } + break; } case ValueTypes::Function: { std::cout << ""; @@ -199,7 +236,7 @@ class Value { } - bool operator==(Value &otherValue) { + bool operator==(Value &otherValue) const { if (type != otherValue.type) { return false; } @@ -240,6 +277,49 @@ class Value { } } + friend std::ostream& operator<<(std::ostream &os, const Value &obj) { + switch (obj.type) { + case ValueTypes::String: { + os << obj.getString().value(); + break; + } + case ValueTypes::Int: { + os << obj.getInt().value(); + break; + } + case ValueTypes::Double: { + os << obj.getDouble().value(); + break; + } + case ValueTypes::List: { + auto list = obj.getList().value(); + bool first = true; + for (auto& listElement : list) { + if (!first) { + os << ", "; + } else { + first = false; + } + listElement.print(); + } + break; + } + case ValueTypes::Function: { + os << ""; + break; + } + case ValueTypes::Type: { + os << ""; + break; + } + default: + case ValueTypes::Nil: { + os << "\033[2;3;96m" << "nil" << "\033[0m"; + } + } + return os; + } + explicit Value() : value(nullptr), type(ValueTypes::Nil) {} explicit Value(int in) : value(in), type(ValueTypes::Int) {} explicit Value(double in) : value(in), type(ValueTypes::Double) {} @@ -929,12 +1009,361 @@ class Interpreter { Interpreter() = default; }; +class CppTranspiler { + std::vector instructions; + std::map functions; + std::map environment; + std::vector headers; + std::vector body; + std::vector tail; + int fnAmount = 0; + + void validateFunctionCall(const std::string& name, const std::vector>& args) { + // Check if function exists + if (functions.find(name) == functions.end()) { + throw CTranspilerError(CTranspilerErrorType::UnknownEnvironment); + } + + Function& func = functions[name]; + + // Check argument count + if (args.size() != func.arguments.size()) { + std::cerr << "Function '" << name << "' expects " + << func.arguments.size() << " arguments but got " + << args.size() << std::endl; + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + + // Check argument types (if we know them at compile time) + for (size_t i = 0; i < args.size(); i++) { + if (std::holds_alternative(args[i])) { + Value argVal = std::get(args[i]); + ValueTypes expectedType = func.arguments[i].second; + + // Only validate if it's a literal value (not an identifier or expression) + if (argVal.type != ValueTypes::Identifier && argVal.type != expectedType) { + std::cerr << "Function '" << name << "' argument " << i + << " ('" << func.arguments[i].first << "') expects type " + << static_cast(expectedType) << " but got " + << static_cast(argVal.type) << std::endl; + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + } + } + } + + std::string compileFunction(const Instruction &instruction) { + std::stringstream functionss; + Function function; + + auto returnTypeValue = std::get(instruction.args[0]); + if (returnTypeValue.type != ValueTypes::Type) { + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + ValueTypes returnType = returnTypeValue.getType().value(); + std::vector> args; + + int argAmount = 0; + if (std::holds_alternative(instruction.args[1]) && + std::get(instruction.args[1]).type == ValueTypes::Identifier && + std::get(instruction.args[1]).getString().value() != "[") { + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + + // Parse arguments + for (int i = 2; i < instruction.args.size(); i += 2) { + Value argTypeVal = std::get(instruction.args[i]); + if (argTypeVal.type != ValueTypes::Type) { + if (argTypeVal.type == ValueTypes::Identifier && + argTypeVal.getString().value() == "]") { + break; + } + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + ValueTypes argType = argTypeVal.getType().value(); + + if (std::holds_alternative(instruction.args[i + 1])) { + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + Value argNameVal = std::get(instruction.args[i + 1]); + if (argNameVal.type != ValueTypes::Identifier) { + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + std::string argName = argNameVal.getString().value(); + + args.emplace_back(argName, argType); + argAmount += 2; + } + + std::vector body; + for (int i = argAmount + 3; i < instruction.args.size(); i++) { + if (std::holds_alternative(instruction.args[i])) { + body.push_back(std::get(instruction.args[i])); + } else { + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + } + + // Store in function object + function.returnType = returnType; + function.arguments = args; + function.code = body; + + // Generate C++ code + std::string returntype; + switch (returnType) { + case ValueTypes::Int: + returntype = "int"; + break; + case ValueTypes::Double: + returntype = "double"; + break; + case ValueTypes::String: + returntype = "std::string"; + break; + default: + std::cout << "that's not currently supported" << std::endl; + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + + bool first = true; + std::stringstream argss; + for (const auto& arg : function.arguments) { + if (!first) { + argss << ", "; + } + first = false; + + switch (arg.second) { + case ValueTypes::Int: + argss << "int " << arg.first; + break; + case ValueTypes::Double: + argss << "double " << arg.first; + break; + case ValueTypes::String: + argss << "std::string " << arg.first; + break; + default: + std::cout << "that's not currently supported" << std::endl; + throw InterpretingError(InterpreterErrorType::IncorrectTokenType); + } + } + + functionss << "[](" << argss.str() << ") -> " << returntype << " {\n"; + for (const auto& inst : function.code) { + functionss << " " << compileInstruction(inst) << ";\n"; + } + functionss << "}"; + + return functionss.str(); + } + std::string compileInstruction(const Instruction &instruction) { + // the value is for values being held, the std::string is for compiled instructions + std::vector> args; + for (const auto &arg : instruction.args) { + if (std::holds_alternative(arg)) { + args.emplace_back("(" + compileInstruction(std::get(arg)) + ")"); + } + if (std::holds_alternative(arg)) { + args.emplace_back(std::get(arg)); + } + } + if (functions.find(instruction.instruction) != functions.end()) { + // Validate the call + validateFunctionCall(instruction.instruction, args); + + // Compile the call + std::stringstream call; + call << instruction.instruction << "("; + bool first = true; + for (const auto& arg : args) { + if (!first) call << ", "; + first = false; + + if (std::holds_alternative(arg)) { + call << std::get(arg); + } else { + Value v = std::get(arg); + if (v.type == ValueTypes::String) { + call << "\"" << v.getString().value() << "\""; + } else if (v.type == ValueTypes::Int) { + call << v.getInt().value(); + } else if (v.type == ValueTypes::Double) { + call << v.getDouble().value(); + } else if (v.type == ValueTypes::Identifier) { + call << v.getString().value(); + } + } + } + call << ")"; + return call.str(); + } + if (instruction.instruction == "let") { + if (args.size() < 2) { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + if (!std::holds_alternative(args[0])) { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + Value nameVal = std::get(args[0]); + if (nameVal.type != ValueTypes::Identifier) { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + std::string varName = nameVal.getString().value(); + + // Check if it's a function definition + if (std::holds_alternative(args[1])) { + std::string compiled = std::get(args[1]); + // Check if the original instruction.args[1] was a function + if (std::holds_alternative(instruction.args[1])) { + Instruction funcInst = std::get(instruction.args[1]); + if (funcInst.instruction == "function") { + // Extract the function signature and store it + Function func; + auto returnTypeValue = std::get(funcInst.args[0]); + func.returnType = returnTypeValue.getType().value(); + + // Parse arguments + int argAmount = 0; + for (int i = 2; i < funcInst.args.size(); i += 2) { + Value argTypeVal = std::get(funcInst.args[i]); + if (argTypeVal.type != ValueTypes::Type) { + if (argTypeVal.type == ValueTypes::Identifier && + argTypeVal.getString().value() == "]") { + break; + } + break; + } + ValueTypes argType = argTypeVal.getType().value(); + Value argNameVal = std::get(funcInst.args[i + 1]); + std::string argName = argNameVal.getString().value(); + + func.arguments.emplace_back(argName, argType); + argAmount += 2; + } + + // Store the function signature + functions[varName] = func; + } + } + + return "auto " + varName + " = " + compiled; + } + + std::stringstream code; + + code << "auto "; + if (std::holds_alternative(args[0]) && std::get(args[0]).type == ValueTypes::Identifier) { + code << std::get(args[0]).getString().value() << " = "; + } else { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + + if (std::holds_alternative(args[1])) { + code << std::get(args[1]); + } + if (std::holds_alternative(args[1])) { + code << std::get(args[1]); + } + + return code.str(); + } + if (instruction.instruction == "function") { + if (std::ranges::find(headers, "functional") == headers.end()) { + headers.emplace_back("functional"); + } + return compileFunction(instruction); + } + if (instruction.instruction == "return") { + if (!args.empty()) { + if (std::holds_alternative(args[0])) { + Value value = std::get(args[0]); + if (value.type == ValueTypes::Int) { + return "return " + std::to_string(value.getInt().value()); + } else { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + } + if (std::holds_alternative(args[0])) { + return "return(" + std::get(args[0]) + ")"; + } + } else { + throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType); + } + } + if (instruction.instruction == "print") { + std::stringstream retval; + if (std::ranges::find(headers, "iostream") == headers.end()) { + headers.emplace_back("iostream"); + } + for (const auto &arg : args) { + if (std::holds_alternative(arg)) { + retval << "std::cout << " << std::get(arg) << " << \"\\n\""; + } + if (std::holds_alternative(arg)) { + if (std::get(arg).type == ValueTypes::String) { + retval << "std::cout << \"" << std::get(arg) << "\" << \"\\n\""; + } else { + retval << "std::cout << " << std::get(arg) << " << \"\\n\""; + } + } + } + return retval.str(); + } + throw InterpretingError(InterpreterErrorType::UnknownInstruction); + } + + public: + std::string createCppFile() const { + std::stringstream cpp; + cpp << "// Autogenerated by the Pipple compiler.\n// Headers\n"; + for (const auto &header : headers) { + cpp << "#include <" << header << ">\n"; + } + cpp << "\n// Code\n"; + for (const auto &body : body) { + cpp << body << "\n"; + } + for (const auto &tail : tail) { + cpp << tail << "\n"; + } + return cpp.str(); + + } + void addInstruction(const Instruction &instruction) { + body.emplace_back(compileInstruction(instruction) + ";"); + } + explicit CppTranspiler(std::vector in) : instructions(std::move(in)) { + for (Instruction &instruction : instructions) { + body.push_back(compileInstruction(instruction)); + } + std::cout << createCppFile() << std::endl; + } + CppTranspiler() { + headers = {}; + body = { + "void* nil = nullptr;", + "int main(int argc, char** argv) {", + }; + tail = { + "}" + }; + } +}; + int main(int argc, char** argv) { bool isInteractive = true; + bool compiling = false; std::string program; if (argc > 1) { + int fileNum = 1; + if (strcmp(argv[1], "-c") == 0) { + fileNum = 2; + compiling = true; + } isInteractive = false; - std::ifstream ifs(argv[1]); + std::ifstream ifs(argv[fileNum]); std::stringstream buffer; if (ifs.is_open()) { buffer << ifs.rdbuf(); @@ -945,6 +1374,7 @@ int main(int argc, char** argv) { linenoise::SetHistoryMaxLen(50); Parser parser; Interpreter interpreter; + CppTranspiler transpiler; while (true) { if (isInteractive) { if (linenoise::Readline("pipple> ", program)) { @@ -1000,10 +1430,16 @@ int main(int argc, char** argv) { if (!isInteractive) return 1; } try { - for (auto instruction : parser.instructions) { - interpreter.interpretInstruction(instruction); - } + if (compiling) { + for (const auto &instruction : parser.instructions) { + transpiler.addInstruction(instruction); + } + } else { + for (auto instruction : parser.instructions) { + interpreter.interpretInstruction(instruction); + } + } } catch (const InterpretingError& e) { std::cerr << e.what() << std::endl; if (!isInteractive) return 1; @@ -1012,5 +1448,8 @@ int main(int argc, char** argv) { break; } } + if (compiling) { + std::cout << transpiler.createCppFile() << std::endl; + } return 0; } diff --git a/tests/compileme.ppl b/tests/compileme.ppl new file mode 100644 index 0000000..d3c4e30 --- /dev/null +++ b/tests/compileme.ppl @@ -0,0 +1,7 @@ +(print "dingus") +(let x (function int [int x int y] + (print "dingus, but this time we're in a function") + (return 5) +)) +(print (x 5 6)) +(return 69)