Add simple C++ compiler

This commit is contained in:
2025-11-16 18:45:13 +11:00
parent c9c00e219d
commit 9d345724ea
2 changed files with 459 additions and 13 deletions

View File

@@ -8,11 +8,12 @@
#include <sstream>
#include <map>
#include <memory>
#include <algorithm>
#include "../libs/linenoise.hpp"
enum class ValueTypes {
Identifier, Int, Double, String, List, Function, Type, Nil
Identifier, Int, Double, String, List, Function, Type, Nil, Auto
};
enum class ParserErrorType {
@@ -31,6 +32,41 @@ enum class InterpreterErrorType {
UnknownEnvironment
};
enum class CTranspilerErrorType {
UnknownInstruction,
UnexpectedToken,
IncorrectTokenType,
UnknownEnvironment
};
class CTranspilerError : public std::exception {
std::string message;
CTranspilerErrorType type;
public:
explicit CTranspilerError(CTranspilerErrorType type) : type(type) {
std::stringstream ss;
ss << "Compiling error: ";
switch (type) {
case CTranspilerErrorType::UnknownInstruction:
ss << "UnknownInstruction";
break;
case CTranspilerErrorType::UnexpectedToken:
ss << "UnexpectedToken";
break;
case CTranspilerErrorType::IncorrectTokenType:
ss << "IncorrectTokenType";
break;
case CTranspilerErrorType::UnknownEnvironment:
ss << "UnknownEnvironment";
break;
}
message = ss.str();
}
[[nodiscard]] const char *what() const noexcept override {
return message.c_str();
}
};
class InterpretingError : public std::exception {
std::string message;
InterpreterErrorType errorType;
@@ -121,43 +157,43 @@ class Value {
std::variant<int, double, std::string, void*, std::vector<Value>, Function, ValueTypes> value;
public:
ValueTypes type;
std::optional<int> getInt() {
[[nodiscard]] std::optional<int> getInt() const {
if (std::holds_alternative<int>(value)) {
return std::get<int>(value);
}
return {};
}
std::optional<double> getDouble() {
[[nodiscard]] std::optional<double> getDouble() const {
if (std::holds_alternative<double>(value)) {
return std::get<double>(value);
}
return {};
}
std::optional<std::string> getString() {
[[nodiscard]] std::optional<std::string> getString() const {
if (std::holds_alternative<std::string>(value)) {
return std::get<std::string>(value);
}
return {};
}
std::optional<std::vector<Value>> getList() {
[[nodiscard]] std::optional<std::vector<Value>> getList() const {
if (std::holds_alternative<std::vector<Value>>(value)) {
return std::get<std::vector<Value>>(value);
}
return {};
}
std::optional<Function> getFunction() {
[[nodiscard]] std::optional<Function> getFunction() const {
if (std::holds_alternative<Function>(value)) {
return std::get<Function>(value);
}
return {};
}
std::optional<ValueTypes> getType() {
[[nodiscard]] std::optional<ValueTypes> getType() const {
if (std::holds_alternative<ValueTypes>(value)) {
return std::get<ValueTypes>(value);
}
return {};
}
void print() {
void print() const {
switch (type) {
case ValueTypes::String: {
std::cout << getString().value();
@@ -182,6 +218,7 @@ class Value {
}
listElement.print();
}
break;
}
case ValueTypes::Function: {
std::cout << "<function>";
@@ -199,7 +236,7 @@ class Value {
}
bool operator==(Value &otherValue) {
bool operator==(Value &otherValue) const {
if (type != otherValue.type) {
return false;
}
@@ -240,6 +277,49 @@ class Value {
}
}
friend std::ostream& operator<<(std::ostream &os, const Value &obj) {
switch (obj.type) {
case ValueTypes::String: {
os << obj.getString().value();
break;
}
case ValueTypes::Int: {
os << obj.getInt().value();
break;
}
case ValueTypes::Double: {
os << obj.getDouble().value();
break;
}
case ValueTypes::List: {
auto list = obj.getList().value();
bool first = true;
for (auto& listElement : list) {
if (!first) {
os << ", ";
} else {
first = false;
}
listElement.print();
}
break;
}
case ValueTypes::Function: {
os << "<function>";
break;
}
case ValueTypes::Type: {
os << "<type>";
break;
}
default:
case ValueTypes::Nil: {
os << "\033[2;3;96m" << "nil" << "\033[0m";
}
}
return os;
}
explicit Value() : value(nullptr), type(ValueTypes::Nil) {}
explicit Value(int in) : value(in), type(ValueTypes::Int) {}
explicit Value(double in) : value(in), type(ValueTypes::Double) {}
@@ -929,12 +1009,361 @@ class Interpreter {
Interpreter() = default;
};
class CppTranspiler {
std::vector<Instruction> instructions;
std::map<std::string, Function> functions;
std::map<std::string, Value> environment;
std::vector<std::string> headers;
std::vector<std::string> body;
std::vector<std::string> tail;
int fnAmount = 0;
void validateFunctionCall(const std::string& name, const std::vector<std::variant<Value, std::string>>& args) {
// Check if function exists
if (functions.find(name) == functions.end()) {
throw CTranspilerError(CTranspilerErrorType::UnknownEnvironment);
}
Function& func = functions[name];
// Check argument count
if (args.size() != func.arguments.size()) {
std::cerr << "Function '" << name << "' expects "
<< func.arguments.size() << " arguments but got "
<< args.size() << std::endl;
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
// Check argument types (if we know them at compile time)
for (size_t i = 0; i < args.size(); i++) {
if (std::holds_alternative<Value>(args[i])) {
Value argVal = std::get<Value>(args[i]);
ValueTypes expectedType = func.arguments[i].second;
// Only validate if it's a literal value (not an identifier or expression)
if (argVal.type != ValueTypes::Identifier && argVal.type != expectedType) {
std::cerr << "Function '" << name << "' argument " << i
<< " ('" << func.arguments[i].first << "') expects type "
<< static_cast<int>(expectedType) << " but got "
<< static_cast<int>(argVal.type) << std::endl;
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
}
}
}
std::string compileFunction(const Instruction &instruction) {
std::stringstream functionss;
Function function;
auto returnTypeValue = std::get<Value>(instruction.args[0]);
if (returnTypeValue.type != ValueTypes::Type) {
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
ValueTypes returnType = returnTypeValue.getType().value();
std::vector<std::pair<std::string, ValueTypes>> args;
int argAmount = 0;
if (std::holds_alternative<Value>(instruction.args[1]) &&
std::get<Value>(instruction.args[1]).type == ValueTypes::Identifier &&
std::get<Value>(instruction.args[1]).getString().value() != "[") {
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
// Parse arguments
for (int i = 2; i < instruction.args.size(); i += 2) {
Value argTypeVal = std::get<Value>(instruction.args[i]);
if (argTypeVal.type != ValueTypes::Type) {
if (argTypeVal.type == ValueTypes::Identifier &&
argTypeVal.getString().value() == "]") {
break;
}
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
ValueTypes argType = argTypeVal.getType().value();
if (std::holds_alternative<Instruction>(instruction.args[i + 1])) {
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
Value argNameVal = std::get<Value>(instruction.args[i + 1]);
if (argNameVal.type != ValueTypes::Identifier) {
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
std::string argName = argNameVal.getString().value();
args.emplace_back(argName, argType);
argAmount += 2;
}
std::vector<Instruction> body;
for (int i = argAmount + 3; i < instruction.args.size(); i++) {
if (std::holds_alternative<Instruction>(instruction.args[i])) {
body.push_back(std::get<Instruction>(instruction.args[i]));
} else {
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
}
// Store in function object
function.returnType = returnType;
function.arguments = args;
function.code = body;
// Generate C++ code
std::string returntype;
switch (returnType) {
case ValueTypes::Int:
returntype = "int";
break;
case ValueTypes::Double:
returntype = "double";
break;
case ValueTypes::String:
returntype = "std::string";
break;
default:
std::cout << "that's not currently supported" << std::endl;
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
bool first = true;
std::stringstream argss;
for (const auto& arg : function.arguments) {
if (!first) {
argss << ", ";
}
first = false;
switch (arg.second) {
case ValueTypes::Int:
argss << "int " << arg.first;
break;
case ValueTypes::Double:
argss << "double " << arg.first;
break;
case ValueTypes::String:
argss << "std::string " << arg.first;
break;
default:
std::cout << "that's not currently supported" << std::endl;
throw InterpretingError(InterpreterErrorType::IncorrectTokenType);
}
}
functionss << "[](" << argss.str() << ") -> " << returntype << " {\n";
for (const auto& inst : function.code) {
functionss << " " << compileInstruction(inst) << ";\n";
}
functionss << "}";
return functionss.str();
}
std::string compileInstruction(const Instruction &instruction) {
// the value is for values being held, the std::string is for compiled instructions
std::vector<std::variant<Value, std::string>> args;
for (const auto &arg : instruction.args) {
if (std::holds_alternative<Instruction>(arg)) {
args.emplace_back("(" + compileInstruction(std::get<Instruction>(arg)) + ")");
}
if (std::holds_alternative<Value>(arg)) {
args.emplace_back(std::get<Value>(arg));
}
}
if (functions.find(instruction.instruction) != functions.end()) {
// Validate the call
validateFunctionCall(instruction.instruction, args);
// Compile the call
std::stringstream call;
call << instruction.instruction << "(";
bool first = true;
for (const auto& arg : args) {
if (!first) call << ", ";
first = false;
if (std::holds_alternative<std::string>(arg)) {
call << std::get<std::string>(arg);
} else {
Value v = std::get<Value>(arg);
if (v.type == ValueTypes::String) {
call << "\"" << v.getString().value() << "\"";
} else if (v.type == ValueTypes::Int) {
call << v.getInt().value();
} else if (v.type == ValueTypes::Double) {
call << v.getDouble().value();
} else if (v.type == ValueTypes::Identifier) {
call << v.getString().value();
}
}
}
call << ")";
return call.str();
}
if (instruction.instruction == "let") {
if (args.size() < 2) {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
if (!std::holds_alternative<Value>(args[0])) {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
Value nameVal = std::get<Value>(args[0]);
if (nameVal.type != ValueTypes::Identifier) {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
std::string varName = nameVal.getString().value();
// Check if it's a function definition
if (std::holds_alternative<std::string>(args[1])) {
std::string compiled = std::get<std::string>(args[1]);
// Check if the original instruction.args[1] was a function
if (std::holds_alternative<Instruction>(instruction.args[1])) {
Instruction funcInst = std::get<Instruction>(instruction.args[1]);
if (funcInst.instruction == "function") {
// Extract the function signature and store it
Function func;
auto returnTypeValue = std::get<Value>(funcInst.args[0]);
func.returnType = returnTypeValue.getType().value();
// Parse arguments
int argAmount = 0;
for (int i = 2; i < funcInst.args.size(); i += 2) {
Value argTypeVal = std::get<Value>(funcInst.args[i]);
if (argTypeVal.type != ValueTypes::Type) {
if (argTypeVal.type == ValueTypes::Identifier &&
argTypeVal.getString().value() == "]") {
break;
}
break;
}
ValueTypes argType = argTypeVal.getType().value();
Value argNameVal = std::get<Value>(funcInst.args[i + 1]);
std::string argName = argNameVal.getString().value();
func.arguments.emplace_back(argName, argType);
argAmount += 2;
}
// Store the function signature
functions[varName] = func;
}
}
return "auto " + varName + " = " + compiled;
}
std::stringstream code;
code << "auto ";
if (std::holds_alternative<Value>(args[0]) && std::get<Value>(args[0]).type == ValueTypes::Identifier) {
code << std::get<Value>(args[0]).getString().value() << " = ";
} else {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
if (std::holds_alternative<Value>(args[1])) {
code << std::get<Value>(args[1]);
}
if (std::holds_alternative<std::string>(args[1])) {
code << std::get<std::string>(args[1]);
}
return code.str();
}
if (instruction.instruction == "function") {
if (std::ranges::find(headers, "functional") == headers.end()) {
headers.emplace_back("functional");
}
return compileFunction(instruction);
}
if (instruction.instruction == "return") {
if (!args.empty()) {
if (std::holds_alternative<Value>(args[0])) {
Value value = std::get<Value>(args[0]);
if (value.type == ValueTypes::Int) {
return "return " + std::to_string(value.getInt().value());
} else {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
}
if (std::holds_alternative<std::string>(args[0])) {
return "return(" + std::get<std::string>(args[0]) + ")";
}
} else {
throw CTranspilerError(CTranspilerErrorType::IncorrectTokenType);
}
}
if (instruction.instruction == "print") {
std::stringstream retval;
if (std::ranges::find(headers, "iostream") == headers.end()) {
headers.emplace_back("iostream");
}
for (const auto &arg : args) {
if (std::holds_alternative<std::string>(arg)) {
retval << "std::cout << " << std::get<std::string>(arg) << " << \"\\n\"";
}
if (std::holds_alternative<Value>(arg)) {
if (std::get<Value>(arg).type == ValueTypes::String) {
retval << "std::cout << \"" << std::get<Value>(arg) << "\" << \"\\n\"";
} else {
retval << "std::cout << " << std::get<Value>(arg) << " << \"\\n\"";
}
}
}
return retval.str();
}
throw InterpretingError(InterpreterErrorType::UnknownInstruction);
}
public:
std::string createCppFile() const {
std::stringstream cpp;
cpp << "// Autogenerated by the Pipple compiler.\n// Headers\n";
for (const auto &header : headers) {
cpp << "#include <" << header << ">\n";
}
cpp << "\n// Code\n";
for (const auto &body : body) {
cpp << body << "\n";
}
for (const auto &tail : tail) {
cpp << tail << "\n";
}
return cpp.str();
}
void addInstruction(const Instruction &instruction) {
body.emplace_back(compileInstruction(instruction) + ";");
}
explicit CppTranspiler(std::vector<Instruction> in) : instructions(std::move(in)) {
for (Instruction &instruction : instructions) {
body.push_back(compileInstruction(instruction));
}
std::cout << createCppFile() << std::endl;
}
CppTranspiler() {
headers = {};
body = {
"void* nil = nullptr;",
"int main(int argc, char** argv) {",
};
tail = {
"}"
};
}
};
int main(int argc, char** argv) {
bool isInteractive = true;
bool compiling = false;
std::string program;
if (argc > 1) {
int fileNum = 1;
if (strcmp(argv[1], "-c") == 0) {
fileNum = 2;
compiling = true;
}
isInteractive = false;
std::ifstream ifs(argv[1]);
std::ifstream ifs(argv[fileNum]);
std::stringstream buffer;
if (ifs.is_open()) {
buffer << ifs.rdbuf();
@@ -945,6 +1374,7 @@ int main(int argc, char** argv) {
linenoise::SetHistoryMaxLen(50);
Parser parser;
Interpreter interpreter;
CppTranspiler transpiler;
while (true) {
if (isInteractive) {
if (linenoise::Readline("pipple> ", program)) {
@@ -1000,10 +1430,16 @@ int main(int argc, char** argv) {
if (!isInteractive) return 1;
}
try {
for (auto instruction : parser.instructions) {
interpreter.interpretInstruction(instruction);
}
if (compiling) {
for (const auto &instruction : parser.instructions) {
transpiler.addInstruction(instruction);
}
} else {
for (auto instruction : parser.instructions) {
interpreter.interpretInstruction(instruction);
}
}
} catch (const InterpretingError& e) {
std::cerr << e.what() << std::endl;
if (!isInteractive) return 1;
@@ -1012,5 +1448,8 @@ int main(int argc, char** argv) {
break;
}
}
if (compiling) {
std::cout << transpiler.createCppFile() << std::endl;
}
return 0;
}