Initial commit

This commit is contained in:
2025-10-25 21:28:16 +11:00
commit 9e76fca977
9 changed files with 881 additions and 0 deletions

70
src/lexer/lexer.cpp Normal file
View File

@@ -0,0 +1,70 @@
#include "lexer.h"
#include <string>
#include <algorithm>
#include <iostream>
#include <optional>
#include <ostream>
#include <utility>
#include <vector>
std::optional<char> Lexer::consume() {
incrementor ++;
if (incrementor < file.size()) {
return file[incrementor];
} else {
return {};
}
}
std::optional<char> Lexer::peek(int ahead) {
if (incrementor + ahead < file.size()) {
return file[incrementor + ahead];
} else {
return {};
}
}
bool Lexer::isDelimiter(char c) {
if (std::find(delimiters.begin(), delimiters.end(), c) != delimiters.end()) {
return true;
} else {
return false;
}
}
/**
* @brief Constructs a Lexer object and tokenizes the provided input string.
*
* This constructor initializes the Lexer with the given input string and processes it
* to generate a list of tokens. It supports handling strings encapsulated by double quotes
* and uses specified delimiters to separate tokens.
*
* @param in The input string to be tokenized.
* @return A constructed Lexer instance with tokenized content stored in the `content` member.
*/
Lexer::Lexer(std::string in) : file(std::move(in)) {
std::string buf;
bool instring = false;
while (true) {
std::optional<char> c = consume();
if (c.has_value()) {
if (c.value() == '"') {
instring = !instring;
if (!instring) {
content.push_back(buf + '"');
buf.clear();
continue;
}
}
if (!instring && isDelimiter(c.value())) {
if (!buf.empty()) content.push_back(buf);
if (c.value() != ' ') content.emplace_back(1, c.value());
buf.clear();
} else {
buf += c.value();
}
} else {
break;
}
}
}

30
src/lexer/lexer.h Normal file
View File

@@ -0,0 +1,30 @@
#pragma once
#include <string>
#include <vector>
#include <optional>
/**
* @class Lexer
* @brief The Lexer class processes input strings to tokenize and parse contents.
*
* This class is designed to take a given string input, tokenize it based on
* specific delimiters, and store the resulting tokens. It facilitates basic
* operations like consuming, peeking at characters, and identifying delimiters.
*/
class Lexer {
private:
std::vector<char> delimiters = {
'(', ')', '{', '}', '.', '\n', ' '
};
std::string file;
size_t incrementor = -1;
std::optional<char> consume();
std::optional<char> peek(int ahead = 1);
bool isDelimiter(char c);
public:
std::vector<std::string> content;
explicit Lexer(std::string in);
};

28
src/main.cpp Normal file
View File

@@ -0,0 +1,28 @@
#include <iostream>
#include <fstream>
#include "lexer/lexer.h"
#include "parser/parser.h"
#include "runner/runner.h"
int main(int argc, char** argv) {
if (argc <= 1) {
std::cout << "Usage: " << argv[0] << " (file)" << std::endl;
exit(1);
}
std::string fileContent;
{
std::ifstream file(argv[1]);
if (file) {
std::string buf;
while (std::getline(file, buf)) {
fileContent += buf + "\n";
}
} else {
std::cout << "Could not open file" << std::endl;
exit(1);
}
}
Lexer lexer(fileContent);
ASTCodeBlock codeBlock(lexer.content);
Executor executor(codeBlock, true);
}

292
src/parser/parser.cpp Normal file
View File

@@ -0,0 +1,292 @@
#include "parser.h"
#include <iostream>
#include <utility>
#include <optional>
#include <ostream>
#include <variant>
#include <string>
#include <vector>
ASTValue::ASTValue() : type(ValueType::None) {}
ASTValue::ASTValue(std::string in) : type(ValueType::String), value(in) {}
ASTValue::ASTValue(bool in) : type(ValueType::Bool), value(in) {}
ASTValue::ASTValue(long long in) : type(ValueType::Int), value(in) {}
ASTValue::ASTValue(double in) : type(ValueType::Float), value(in) {}
ValueType ASTValue::getValueType(std::string in) {
if (in.size() < 1) {
return ValueType::None;
}
if (in.front() == '"' && in.back() == '"') {
return ValueType::String;
}
if (in == "true" || in == "false") {
return ValueType::Bool;
}
bool isInt = true;
bool isFloat = false;
for (const char &c : in) {
if (std::isdigit(c) == false) {
if (c == '.' && isFloat == false) {
isFloat = true;
isInt = false;
} else {
isInt = false;
isFloat = false;
break;
}
}
}
if (isInt) {
return ValueType::Int;
}
if (isFloat) {
if (in == ".") return ValueType::None;
return ValueType::Float;
}
return ValueType::None;
}
ASTFunction::ASTFunction(ASTCodeBlock body) : body(std::move(body)) {}
ASTFunction::ASTFunction() {}
ASTFunctionCall::ASTFunctionCall(std::string func, std::vector<ASTNode> args) : func(std::move(func)), args(std::move(args)) {}
ASTIdentifier::ASTIdentifier(std::string in) : name(std::move(in)) {}
std::optional<std::string> ASTValue::getString() {
if (type == ValueType::String && std::holds_alternative<std::string>(value)) {
return std::get<std::string>(value);
} else {
return {};
}
}
std::optional<int> ASTValue::getInt() {
if (type == ValueType::Int && std::holds_alternative<long long>(value)) {
return std::get<long long>(value);
} else {
return {};
}
}
std::optional<double> ASTValue::getFloat() {
if (type == ValueType::Float && std::holds_alternative<double>(value)) {
return std::get<double>(value);
} else {
return {};
}
}
std::optional<bool> ASTValue::getBool() {
if (type == ValueType::Bool && std::holds_alternative<bool>(value)) {
return std::get<bool>(value);
} else {
return {};
}
}
std::optional<std::string> ASTCodeBlock::consume() {
if (iterator < content.size()) {
return content[iterator++]; // Post-increment: returns current, then increments
}
return {};
}
std::optional<std::string> ASTCodeBlock::peek(int ahead) {
if (iterator + ahead < content.size()) {
return content[iterator + ahead];
}
return {};
}
TokenType ASTCodeBlock::getTokenType() {
if (peek(0).has_value() == false) {
return TokenType::None;
}
std::optional<std::string> tokenv = peek(0);
std::string token;
if (tokenv.has_value()) {
token = tokenv.value();
} else {
return TokenType::None;
}
// Check for values first
if (ASTValue().getValueType(token) != ValueType::None) {
return TokenType::Value;
}
// Check for special characters/keywords
if (token == "{") {
return TokenType::CodeBlockStart;
}
if (token == "}") {
return TokenType::CodeBlockEnd;
}
if (token == "(") {
return TokenType::OpenParen;
}
if (token == ")") {
return TokenType::CloseParen;
}
if (token == "\n") {
return TokenType::NewLine;
}
// Check if current token is "func" followed by "{"
if (token == "func" && peek(1).has_value() && peek(1).value() == "{") {
return TokenType::Function;
}
// Check if next token is "(" (function call)
if (peek(1).has_value() && peek(1).value() == "(") {
return TokenType::FunctionCallStart;
}
// If none of the above, it's an identifier
return TokenType::Identifier;
}
/**
* @brief Parses a block of code and creates an Abstract Syntax Tree (AST) representation.
*
* This method iterates through the tokens of a given block of code, evaluates the type
* of each token, and adds the corresponding node to the AST.
*
* Supported token types include:
* - Values (e.g., strings, booleans, integers, floats)
* - Identifiers
* - Functions with nested code blocks
* - Function calls with arguments
* - Nested code blocks
*
* The method utilizes a `switch` statement based on the `TokenType` to determine the
* appropriate handling of each token. Each token is either consumed, parsed, and converted
* to an appropriate AST node type or processed for special structures (e.g., functions,
* code blocks, or function calls).
*
* @remarks This method assumes valid tokenized input. If an end-of-file condition
* occurs while parsing functions or function calls, the program exits with an error.
*/
void ASTCodeBlock::parseBlock() {
while (true) {
std::optional<std::string> token = peek(0);
if (token.has_value() == false) {
return;
}
TokenType tokenType = getTokenType();
ValueType valueType = ASTValue().getValueType(token.value());
std::optional<std::string> currentToken = consume();
if (currentToken.has_value()) {
switch (tokenType) {
case TokenType::Value: {
switch (valueType) {
case ValueType::String:
nodes.emplace_back(std::make_shared<ASTValue>(currentToken.value().substr(1, currentToken.value().size() - 2)));
break;
case ValueType::Bool:
nodes.emplace_back(std::make_shared<ASTValue>(currentToken.value() == "true"));
break;
case ValueType::Int:
nodes.emplace_back(std::make_shared<ASTValue>(std::stoll(currentToken.value())));
break;
case ValueType::Float:
nodes.emplace_back(std::make_shared<ASTValue>(std::stod(currentToken.value())));
break;
default:
break;
}
break;
}
case TokenType::Identifier:
nodes.emplace_back(std::make_shared<ASTIdentifier>(currentToken.value()));
break;
case TokenType::Function: {
std::vector<std::string> body;
consume();
int depth = 1;
while (depth > 0) {
std::optional<std::string> token = consume();
if (token.has_value()) {
if (token.value() == "{") {
depth++;
} else if (token.value() == "}") {
depth--;
if (depth == 0) {
break;
}
}
body.push_back(token.value());
} else {
std::cout << "Reached end of file while parsing function" << std::endl;
exit(1);
}
}
consume();
nodes.emplace_back(std::make_shared<ASTFunction>(ASTCodeBlock(body)));
break;
}
case TokenType::FunctionCallStart: {
std::vector<std::string> args;
std::optional<std::string> fnName = peek(-1);
std::string fnNameStr;
if (fnName.has_value()) {
fnNameStr = fnName.value();
} else {
std::cout << "Reached end of file while parsing function call" << std::endl;
exit(1);
}
while (getTokenType() != TokenType::CloseParen) {
std::optional<std::string> token = consume();
if (token.has_value()) {
args.push_back(token.value());
} else {
std::cout << "Reached end of file while parsing function call" << std::endl;
exit(1);
}
}
consume();
nodes.emplace_back(std::make_shared<ASTFunctionCall>(fnNameStr, ASTCodeBlock(args).nodes));
break;
}
case TokenType::CodeBlockStart: {
std::vector<std::string> body;
int depth = 1;
while (depth > 0) {
std::optional<std::string> token = consume();
if (token.has_value()) {
if (token.value() == "{") {
depth++;
} else if (token.value() == "}") {
depth--;
if (depth == 0) {
break;
}
}
body.push_back(token.value());
} else {
std::cout << "Reached end of file while parsing code block" << std::endl;
exit(1);
}
}
nodes.emplace_back(std::make_shared<ASTCodeBlock>(body));
break;
}
default:
break;
}
} else {
break;
}
}
}
ASTCodeBlock::ASTCodeBlock() {}
ASTCodeBlock::ASTCodeBlock(std::vector<std::string> in) : content(std::move(in)) {
parseBlock();
}

98
src/parser/parser.h Normal file
View File

@@ -0,0 +1,98 @@
#pragma once
#include <memory>
#include <optional>
#include <vector>
#include <string>
#include <variant>
class ASTValue;
class ASTFunction;
class ASTFunctionCall;
class ASTCodeBlock;
class ASTIdentifier;
typedef std::variant<std::shared_ptr<ASTValue>, std::shared_ptr<ASTFunction>, std::shared_ptr<ASTFunctionCall>, std::shared_ptr<ASTCodeBlock>, std::shared_ptr<ASTIdentifier>> ASTNode;
typedef std::variant<long long, double, std::string, bool> RealValue;
enum class ValueType {
Int, Float, String, Bool, None
};
enum class TokenType {
Identifier, Value, Function, FunctionCallStart, OpenParen, CloseParen, CodeBlockStart, CodeBlockEnd, NewLine, None
};
/**
* @class ASTValue
* @brief Represents a value in the Abstract Syntax Tree (AST).
*
* The ASTValue class encapsulates different types of values, including integers,
* floating-point numbers, strings, and boolean. It provides methods for type identification
* and value retrieval.
*/
class ASTValue {
private:
RealValue value;
public:
ValueType type;
ValueType getValueType(std::string in);
std::optional<std::string> getString();
std::optional<int> getInt();
std::optional<double> getFloat();
std::optional<bool> getBool();
explicit ASTValue(std::string in);
explicit ASTValue(long long in);
explicit ASTValue(double in);
explicit ASTValue(bool in);
ASTValue();
};
/**
* @class ASTCodeBlock
* @brief Represents a block of code in the Abstract Syntax Tree (AST).
*
* The ASTCodeBlock class is responsible for encapsulating and parsing a block
* of code represented as a sequence of strings. It maintains the raw content
* of the block, processes its elements, and classifies tokens to construct a
* structured representation. The parsed elements are stored as AST nodes.
*
* Functions provided include utilities for token parsing, peeking into
* upcoming tokens, identifying token types, and managing the iterator for
* sequential token processing.
*/
class ASTCodeBlock {
private:
std::vector<std::string> content;
size_t iterator = 0;
void parseBlock();
std::optional<std::string> consume();
std::optional<std::string> peek(int ahead = 1);
TokenType getTokenType();
public:
std::vector<ASTNode> nodes;
explicit ASTCodeBlock(std::vector<std::string>);
ASTCodeBlock();
};
class ASTFunction {
public:
ASTCodeBlock body;
explicit ASTFunction(ASTCodeBlock body);
ASTFunction();
};
class ASTFunctionCall {
public:
std::string func;
std::vector<ASTNode> args;
ASTFunctionCall(std::string func, std::vector<ASTNode> args);
};
class ASTIdentifier {
public:
std::string name;
explicit ASTIdentifier(std::string in);
};
ASTNode parser(std::vector<std::string> in);

249
src/runner/runner.cpp Normal file
View File

@@ -0,0 +1,249 @@
#include "runner.h"
#include <iostream>
#include <ostream>
#include <utility>
#include "../parser/parser.h"
std::optional<ASTNode> Executor::consume() {
if (iterator < code.nodes.size()) {
return code.nodes[iterator++];
}
return {};
}
std::optional<ASTNode> Executor::peek(int ahead) {
if (iterator + ahead < code.nodes.size()) {
return code.nodes[iterator + ahead];
}
return {};
}
/**
* Constructs an `Executor` object.
*
* This constructor initializes the `Executor` by taking an abstract syntax tree (AST) code block as input,
* setting up the execution context, including variables, functions, and arguments, and continuously parsing
* and executing the AST nodes until the end of the block is reached.
*
* @param in The abstract syntax tree (AST) code block to execute.
* @param isInitCall A boolean flag to determine if this is the initial entry point to execution.
* If true, it runs the "main" function after setting up the context.
* @param scopeVals A map of variable names (strings) to their corresponding `ASTValue` objects used as
* the current scope of variables.
* @param scopeFns A map of function names (strings) to their corresponding `ASTFunction` objects used as
* the current set of functions within scope.
* @param args A vector of `ASTValue` objects passed as arguments for the context of this execution.
*
* @details
* - If arguments are provided, they are assigned to variables named `arg0`, `arg1`, etc., in the local scope.
* - Functions can be defined dynamically within the block and will be stored in the `functions` map.
* - Variable assignments and supported operators (e.g., '=', '==', '!=', etc.) are processed if encountered.
* - If a function call is encountered, the corresponding function body is executed in a new `Executor` context,
* passing along argument values and maintaining the state of variables and functions.
* - Special support for `import()` calls is provided when encountered in the root function.
* - If `isInitCall` is true, ensures the "main" function is executed after parsing and executing the AST.
* - The constructor uses recursive execution for nested function calls.
*
* @note Exits the process if critical execution errors occur (e.g., unexpected nodes or missing values).
*/
Executor::Executor(ASTCodeBlock in, bool isInitCall, std::map<std::string, ASTValue> scopeVals, std::map<std::string, ASTFunction> scopeFns, std::vector<ASTValue> args) : code(std::move(in)), variables(std::move(scopeVals)), functions(std::move(scopeFns)) {
for (size_t i = 0; i < args.size(); i++) {
variables["arg" + std::to_string(i)] = args[i];
}
while (true) {
std::optional<ASTNode> node = consume();
if (node.has_value()) {
// for if we see an identifier
if (std::holds_alternative<std::shared_ptr<ASTIdentifier>>(node.value())) {
std::optional<ASTNode> next = consume();
if (next.has_value()) {
// function assignment
// eg: main func { ... }
// eg: dingus func { ... }
if (std::holds_alternative<std::shared_ptr<ASTFunction>>(next.value())) {
functions[std::get<std::shared_ptr<ASTIdentifier>>(node.value())->name] = *std::get<std::shared_ptr<ASTFunction>>(next.value());
} else if (std::holds_alternative<std::shared_ptr<ASTIdentifier>>(next.value())) {
std::string id = std::get<std::shared_ptr<ASTIdentifier>>(next.value())->name;
if (id == "=") {
// setting a variable
std::optional<ASTNode> valueNode = consume();
if (valueNode.has_value()) {
ASTValue next;
if (std::holds_alternative<std::shared_ptr<ASTValue>>(valueNode.value())) {
variables[std::get<std::shared_ptr<ASTIdentifier>>(node.value())->name] = *std::get<std::shared_ptr<ASTValue>>(valueNode.value());
} else if (std::holds_alternative<std::shared_ptr<ASTFunction>>(valueNode.value())) {
functions[std::get<std::shared_ptr<ASTIdentifier>>(node.value())->name] = *std::get<std::shared_ptr<ASTFunction>>(valueNode.value());
} else {
std::cout << "Expected value or function after = sign" << std::endl;
exit(1);
}
} else {
std::cout << "Expected value after = sign" << std::endl;
exit(1);
}
} else if (id == "==") {
} else if (id == "!=") {
} else if (id == ">") {
} else if (id == ">=") {
} else if (id == "<") {
} else if (id == "<=") {
}
} else {
std::cout << "Expected function or operator after identifier" << std::endl;
exit(1);
}
}
}
// if we see a function call
// note: we only accept calls to import() in the root function
if (std::holds_alternative<std::shared_ptr<ASTFunctionCall>>(node.value())) {
std::string fnName = std::get<std::shared_ptr<ASTFunctionCall>>(node.value())->func;
std::vector<ASTNode> callArgNodes = std::get<std::shared_ptr<ASTFunctionCall>>(node.value())->args;
std::vector<ASTValue> callArgs;
for (auto &callArgNode : callArgNodes) {
if (std::holds_alternative<std::shared_ptr<ASTValue>>(callArgNode)) {
callArgs.push_back(*std::get<std::shared_ptr<ASTValue>>(callArgNode));
}
}
if (fnName == "import") {
// work on importing modules later
continue;
}
if (isInitCall) {
std::cout << "Function " << fnName << " not allowed in root function" << std::endl;
exit(1);
}
if (fnName == "print") {
for (ASTValue &arg : callArgs) {
if (arg.type == ValueType::String) {
std::optional<std::string> argString = arg.getString();
if (argString.has_value()) {
std::cout << argString.value();
} else {
std::cout << "Type mismatch - expecting string but got something else" << std::endl;
}
} else if (arg.type == ValueType::Int) {
std::optional<int> argInt = arg.getInt();
if (argInt.has_value()) {
std::cout << argInt.value();
} else {
std::cout << "Type mismatch - expecting int but got something else" << std::endl;
}
} else if (arg.type == ValueType::Float) {
std::optional<double> argFloat = arg.getFloat();
if (argFloat.has_value()) {
std::cout << argFloat.value();
} else {
std::cout << "Type mismatch - expecting float but got something else" << std::endl;
}
} else if (arg.type == ValueType::Bool) {
std::optional<bool> argBool = arg.getBool();
if (argBool.has_value()) {
std::cout << argBool.value();
} else {
std::cout << "Type mismatch - expecting bool but got something else" << std::endl;
}
} else {
std::cout << "Type mismatch - expecting string, int, float, or bool but got something else" << std::endl;
}
}
} else if (fnName == "println") {
for (ASTValue &arg : callArgs) {
if (arg.type == ValueType::String) {
std::optional<std::string> argString = arg.getString();
if (argString.has_value()) {
std::cout << argString.value() << std::endl;
} else {
std::cout << "Type mismatch - expecting string but got something else" << std::endl;
}
} else if (arg.type == ValueType::Int) {
std::optional<int> argInt = arg.getInt();
if (argInt.has_value()) {
std::cout << argInt.value() << std::endl;
} else {
std::cout << "Type mismatch - expecting int but got something else" << std::endl;
}
} else if (arg.type == ValueType::Float) {
std::optional<double> argFloat = arg.getFloat();
if (argFloat.has_value()) {
std::cout << argFloat.value() << std::endl;
} else {
std::cout << "Type mismatch - expecting float but got something else" << std::endl;
}
} else if (arg.type == ValueType::Bool) {
std::optional<bool> argBool = arg.getBool();
if (argBool.has_value()) {
std::cout << argBool.value() << std::endl;
} else {
std::cout << "Type mismatch - expecting bool but got something else" << std::endl;
}
} else {
std::cout << "Type mismatch - expecting string, int, float, or bool but got something else" << std::endl;
}
}
} else if (fnName == "if") {
if (callArgs.empty()) {
std::cout << "Expected at least one argument to if statement" << std::endl;
exit(1);
}
if (callArgs[0].type != ValueType::Bool) {
std::cout << "Expected first argument to if statement to be a boolean" << std::endl;
exit(1);
}
std::optional<ASTNode> block = consume();
if (!block.has_value()) {
std::cout << "If statement expects a body" << std::endl;
exit(1);
}
if (callArgs[0].getBool().value()) {
if (std::holds_alternative<std::shared_ptr<ASTCodeBlock>>(block.value())) {
Executor(*std::get<std::shared_ptr<ASTCodeBlock>>(block.value()), false, variables, functions);
}
}
} else if (fnName == "while") {
if (callArgs.empty()) {
std::cout << "Expected at least one argument to if statement" << std::endl;
exit(1);
}
if (callArgs[0].type != ValueType::Bool) {
std::cout << "Expected first argument to if statement to be a boolean" << std::endl;
exit(1);
}
std::optional<ASTNode> block = consume();
if (!block.has_value()) {
std::cout << "If statement expects a body" << std::endl;
exit(1);
}
if (callArgs[0].getBool().value()) {
while (callArgs[0].getBool().value()) {
if (std::holds_alternative<std::shared_ptr<ASTCodeBlock>>(block.value())) {
Executor(*std::get<std::shared_ptr<ASTCodeBlock>>(block.value()), false, variables, functions);
}
}
}
} else {
if (functions.find(fnName) != functions.end()) {
Executor(functions[fnName].body, false, variables, functions, callArgs);
} else {
std::cout << "Function " << fnName << " not found" << std::endl;
exit(1);
}
}
}
} else {
break;
}
}
if (isInitCall) {
Executor(functions["main"].body, false, variables, functions);
}
}

27
src/runner/runner.h Normal file
View File

@@ -0,0 +1,27 @@
#pragma once
#include <map>
#include "../parser/parser.h"
/**
* @class Executor
* @brief Responsible for executing a sequence of operations defined in an abstract syntax tree (AST).
*
* This class provides execution functionality for ASTCodeBlock objects.
* It maintains a mapping of variables and functions that can be used within the
* context of execution. The class implements mechanisms for traversing AST nodes
* and consuming or peeking at individual nodes.
*/
class Executor {
private:
std::map<std::string, ASTFunction> functions;
std::map<std::string, ASTValue> variables;
ASTCodeBlock code;
size_t iterator = 0;
std::optional<ASTNode> consume();
std::optional<ASTNode> peek(int ahead = 1);
public:
explicit Executor(ASTCodeBlock in, bool isInitCall = false, std::map<std::string, ASTValue> scopeVals = {}, std::map<std::string, ASTFunction> scopeFns = {}, std::vector<ASTValue> args = {});
};