Files
highground-fork/src/main.cpp

641 lines
26 KiB
C++
Raw Normal View History

2025-12-14 14:21:19 +11:00
#include <cctype>
#include <cstdint>
2025-12-13 18:07:26 +11:00
#include <groundvm.h>
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <sstream>
#include <optional>
2025-12-14 14:21:19 +11:00
#include <variant>
2025-12-13 18:07:26 +11:00
2025-12-14 16:07:58 +11:00
#define parseOneToken(token) Parser({token.value()}).parse().children[0]
2025-12-13 18:07:26 +11:00
namespace HighGround {
2025-12-14 14:21:19 +11:00
int tmpIdIterator = 0;
2025-12-15 11:37:27 +11:00
int labelIterator = 0;
2025-12-14 14:21:19 +11:00
2025-12-13 18:07:26 +11:00
namespace Parser {
enum class HGNodeType {
2025-12-14 17:17:19 +11:00
Add, Subtract, Equal, Set, While, If, Value, Identifier, None, Root, CodeBlock, CodeBlockStart, CodeBlockEnd, Puts
2025-12-14 14:21:19 +11:00
};
enum class HGDataType {
Int, String, Double, Bool, Char, None
2025-12-13 18:07:26 +11:00
};
class HGNode;
2025-12-14 14:21:19 +11:00
class HGGroundCodeBlock {
public:
std::vector<GroundInstruction> code;
HGGroundCodeBlock() = default;
};
class HGData {
2025-12-14 14:49:32 +11:00
typedef std::variant<int64_t, std::string, double, bool, char> varData;
varData data;
2025-12-14 14:21:19 +11:00
public:
HGDataType type = HGDataType::Int;
HGData() = default;
2025-12-14 14:49:32 +11:00
HGData(int64_t in) : data(in), type(HGDataType::Int) {}
HGData(double in) : data(in), type(HGDataType::Double) {}
HGData(std::string in) : data(in), type(HGDataType::String) {}
HGData(char in) : data(in), type(HGDataType::Char) {}
HGData(bool in) : data(in), type(HGDataType::Bool) {}
2025-12-14 14:21:19 +11:00
std::optional<int64_t> getInt() {
if (type == HGDataType::Int) {
return std::get<int64_t>(data);
} else {
return {};
}
}
2025-12-14 14:49:32 +11:00
std::optional<double> getDouble() {
if (type == HGDataType::Double) {
return std::get<double>(data);
} else {
return {};
}
}
std::optional<std::string> getString() {
if (type == HGDataType::String) {
return std::get<std::string>(data);
} else {
return {};
}
}
std::optional<char> getChar() {
if (type == HGDataType::Char) {
return std::get<char>(data);
} else {
return {};
}
}
std::optional<bool> getBool() {
if (type == HGDataType::Bool) {
return std::get<bool>(data);
} else {
return {};
}
}
2025-12-14 14:21:19 +11:00
};
2025-12-13 18:07:26 +11:00
class HGNode {
HGNodeType nodeType = HGNodeType::None;
2025-12-14 14:21:19 +11:00
HGData data;
2025-12-13 18:07:26 +11:00
public:
2025-12-14 15:38:36 +11:00
std::vector<HGNode> children;
2025-12-14 14:21:19 +11:00
std::string outputId;
2025-12-13 18:07:26 +11:00
HGNode(HGNodeType nodeType) : nodeType(nodeType) {}
2025-12-14 14:21:19 +11:00
HGNode(HGNodeType nodeType, HGData data) : nodeType(nodeType), data(data) {}
2025-12-13 18:07:26 +11:00
HGNode() = default;
void addNode(HGNode in) {
children.push_back(in);
}
2025-12-14 14:21:19 +11:00
void setValue(HGData in) {
data = in;
}
const std::vector<HGGroundCodeBlock> generateCode() {
std::vector<HGGroundCodeBlock> code;
2025-12-15 11:37:27 +11:00
if (nodeType != HGNodeType::If) for (auto& child : children) {
2025-12-14 14:21:19 +11:00
auto childCode = child.generateCode();
code.insert(code.end(), childCode.begin(), childCode.end());
}
switch (nodeType) {
case HGNodeType::Value: {
outputId = "tmp_" + std::to_string(tmpIdIterator++);
HGGroundCodeBlock codeBlock;
GroundInstruction gi = groundCreateInstruction(SET);
groundAddReferenceToInstruction(&gi, groundCreateReference(DIRREF, outputId.data()));
switch (data.type) {
case HGDataType::Int: {
auto dataopt = data.getInt();
if (dataopt) {
groundAddValueToInstruction(&gi, groundCreateValue(INT, dataopt.value()));
}
2025-12-14 14:49:32 +11:00
break;
}
case HGDataType::Double: {
auto dataopt = data.getDouble();
if (dataopt) {
groundAddValueToInstruction(&gi, groundCreateValue(DOUBLE, dataopt.value()));
}
break;
}
case HGDataType::String: {
auto dataopt = data.getString();
if (dataopt) {
groundAddValueToInstruction(&gi, groundCreateValue(STRING, dataopt.value().c_str()));
}
break;
}
case HGDataType::Char: {
auto dataopt = data.getChar();
if (dataopt) {
groundAddValueToInstruction(&gi, groundCreateValue(CHAR, dataopt.value()));
}
break;
}
case HGDataType::Bool: {
auto dataopt = data.getBool();
if (dataopt) {
groundAddValueToInstruction(&gi, groundCreateValue(BOOL, dataopt.value()));
}
break;
2025-12-14 14:21:19 +11:00
}
}
codeBlock.code.push_back(gi);
code.push_back(codeBlock);
break;
}
2025-12-14 15:38:36 +11:00
case HGNodeType::Add: {
HGGroundCodeBlock codeBlock;
outputId = "tmp_" + std::to_string(tmpIdIterator++);
GroundInstruction gi = groundCreateInstruction(ADD);
if (children.size() < 2) {
std::cout << "Need more stuff to add\n";
}
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[0].outputId.data()));
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[1].outputId.data()));
groundAddReferenceToInstruction(&gi, groundCreateReference(DIRREF, outputId.data()));
codeBlock.code.push_back(gi);
code.push_back(codeBlock);
break;
}
2025-12-19 16:17:57 +11:00
case HGNodeType::Equal: {
std::cout << "Equal node children: " << children.size() << "\n";
std::cout << "Left outputId: '" << children[0].outputId << "'\n";
std::cout << "Right outputId: '" << children[1].outputId << "'\n";
HGGroundCodeBlock codeBlock;
outputId = "tmp_" + std::to_string(tmpIdIterator++);
GroundInstruction gi = groundCreateInstruction(EQUAL);
if (children.size() < 2) {
std::cout << "Need more stuff to equal\n";
}
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[0].outputId.data()));
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[1].outputId.data()));
groundAddReferenceToInstruction(&gi, groundCreateReference(DIRREF, outputId.data()));
codeBlock.code.push_back(gi);
code.push_back(codeBlock);
break;
}
2025-12-14 16:07:58 +11:00
case HGNodeType::Puts: {
HGGroundCodeBlock codeBlock;
GroundInstruction gi = groundCreateInstruction(PRINTLN);
if (children.size() < 1) {
std::cout << "Need more stuff to puts\n";
}
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[0].outputId.data()));
codeBlock.code.push_back(gi);
code.push_back(codeBlock);
break;
2025-12-14 14:21:19 +11:00
}
2025-12-15 11:37:27 +11:00
case HGNodeType::If: {
auto conditionCode = children[0].generateCode();
code.insert(code.end(), conditionCode.begin(), conditionCode.end());
outputId = "tmp_" + std::to_string(tmpIdIterator++);
HGGroundCodeBlock codeBlock;
GroundInstruction gi = groundCreateInstruction(NOT);
groundAddReferenceToInstruction(&gi, groundCreateReference(VALREF, children[0].outputId.data()));
groundAddReferenceToInstruction(&gi, groundCreateReference(DIRREF, outputId.data()));
codeBlock.code.push_back(gi);
std::string labelId = "if_" + std::to_string(labelIterator++);
GroundInstruction gi2 = groundCreateInstruction(IF);
groundAddReferenceToInstruction(&gi2, groundCreateReference(VALREF, outputId.data()));
groundAddReferenceToInstruction(&gi2, groundCreateReference(LINEREF, labelId.data()));
codeBlock.code.push_back(gi2);
code.push_back(codeBlock);
for (size_t i = 1; i < children.size(); i++) {
auto childCode = children[i].generateCode();
code.insert(code.end(), childCode.begin(), childCode.end());
}
codeBlock.code.clear();
GroundInstruction gi3 = groundCreateInstruction(CREATELABEL);
groundAddReferenceToInstruction(&gi3, groundCreateReference(LABEL, labelId.data()));
codeBlock.code.push_back(gi3);
2025-12-19 16:17:57 +11:00
codeBlock.code.push_back(groundCreateInstruction(PAUSE));
2025-12-15 11:37:27 +11:00
code.push_back(codeBlock);
break;
}
2025-12-14 16:07:58 +11:00
default: {}
2025-12-14 14:21:19 +11:00
}
2025-12-13 18:07:26 +11:00
return code;
}
2025-12-14 14:21:19 +11:00
2025-12-13 18:07:26 +11:00
};
2025-12-14 14:21:19 +11:00
class Parser {
std::vector<std::string> tokensToParse;
size_t current;
size_t size;
std::optional<std::string> peek(int ahead = 1) {
if (current + ahead < size) {
return tokensToParse[current + ahead];
} else {
return {};
}
}
std::optional<std::string> consume() {
if (current < size) {
return tokensToParse[current++];
} else {
return {};
}
}
bool isInt(std::string in) {
for (const char& c : in) {
if (!std::isdigit(c)) {
return false;
}
}
return true;
}
bool isDouble(std::string in) {
bool foundDot = false;
for (const char& c : in) {
if (!std::isdigit(c)) {
if (!foundDot && c == '.') {
foundDot = true;
continue;
}
return false;
}
}
return true;
}
2025-12-14 14:49:32 +11:00
bool isString(std::string in) {
if (in.size() > 1 && in[0] == '"' && in.back() == '"') {
return true;
}
return false;
}
bool isChar(std::string in) {
if (in.size() == 3 && in[0] == '\'' && in.back() == '\'') {
return true;
}
return false;
}
bool isBool(std::string in) {
if (in == "true" || in == "false") {
return true;
}
return false;
}
2025-12-14 14:21:19 +11:00
HGDataType getDataType(std::string in) {
if (isInt(in)) {
return HGDataType::Int;
}
if (isDouble(in)) {
return HGDataType::Double;
}
2025-12-14 14:49:32 +11:00
if (isString(in)) {
return HGDataType::String;
}
if (isChar(in)) {
return HGDataType::Char;
}
if (isBool(in)) {
return HGDataType::Bool;
}
2025-12-14 14:21:19 +11:00
return HGDataType::None;
}
HGNodeType getNodeType(std::string in) {
if (getDataType(in) != HGDataType::None) {
return HGNodeType::Value;
}
2025-12-14 15:38:36 +11:00
if (in == "+") {
return HGNodeType::Add;
}
2025-12-19 16:17:57 +11:00
if (in == "==") {
return HGNodeType::Equal;
}
2025-12-14 16:07:58 +11:00
if (in == "puts") {
return HGNodeType::Puts;
}
2025-12-14 17:17:19 +11:00
if (in == "if") {
return HGNodeType::If;
}
if (in == "{") {
return HGNodeType::CodeBlockStart;
}
if (in == "}") {
return HGNodeType::CodeBlockEnd;
}
2025-12-14 14:21:19 +11:00
return HGNodeType::None;
}
public:
Parser(std::vector<std::string> in) : tokensToParse(in) {}
HGNode parse() {
current = 0;
size = tokensToParse.size();
HGNode rootNode(HGNodeType::Root);
while (auto tokenopt = consume()) {
std::string token = tokenopt.value();
switch (getNodeType(token)) {
case HGNodeType::Value: {
switch (getDataType(token)) {
case HGDataType::Int: {
HGNode intNode(HGNodeType::Value);
2025-12-14 14:49:32 +11:00
intNode.setValue((int64_t) std::stoll(token));
2025-12-14 14:21:19 +11:00
rootNode.addNode(intNode);
break;
}
2025-12-14 14:49:32 +11:00
case HGDataType::Double: {
HGNode doubleNode(HGNodeType::Value);
doubleNode.setValue(std::stod(token));
rootNode.addNode(doubleNode);
break;
}
case HGDataType::String: {
HGNode stringNode(HGNodeType::Value);
stringNode.setValue(token.substr(1, token.size() - 2));
rootNode.addNode(stringNode);
break;
}
case HGDataType::Char: {
HGNode charNode(HGNodeType::Value);
charNode.setValue(token[1]);
rootNode.addNode(charNode);
break;
}
case HGDataType::Bool: {
HGNode boolNode(HGNodeType::Value);
boolNode.setValue(token == "true");
rootNode.addNode(boolNode);
break;
}
2025-12-14 14:21:19 +11:00
}
break;
}
2025-12-14 15:38:36 +11:00
case HGNodeType::Add: {
HGNode addNode(HGNodeType::Add);
addNode.addNode(rootNode.children.back());
rootNode.children.pop_back();
auto tokenopt = consume();
if (tokenopt) {
2025-12-14 16:07:58 +11:00
addNode.addNode(parseOneToken(tokenopt));
2025-12-14 15:38:36 +11:00
} else {
std::cout << "FEED ME MORE TOKENS\n";
2025-12-14 17:17:19 +11:00
exit(1);
2025-12-14 15:38:36 +11:00
}
rootNode.addNode(addNode);
break;
}
2025-12-19 16:17:57 +11:00
case HGNodeType::Equal: {
HGNode addNode(HGNodeType::Equal);
addNode.addNode(rootNode.children.back());
rootNode.children.pop_back();
auto tokenopt = consume();
if (tokenopt) {
addNode.addNode(parseOneToken(tokenopt));
} else {
std::cout << "FEED ME MORE TOKENS\n";
exit(1);
}
rootNode.addNode(addNode);
break;
}
2025-12-14 16:07:58 +11:00
case HGNodeType::Puts: {
HGNode putsNode(HGNodeType::Puts);
std::vector<std::string> tokens;
while (auto tokenopt = consume()) {
if (tokenopt.value() == "\n") {
break;
}
tokens.push_back(tokenopt.value());
}
auto children = Parser(tokens).parse();
for (auto& child : children.children) {
putsNode.addNode(child);
}
rootNode.addNode(putsNode);
break;
}
2025-12-14 17:17:19 +11:00
case HGNodeType::If: {
HGNode ifNode(HGNodeType::If);
std::vector<std::string> tokens;
while (auto tokenopt = consume()) {
if (tokenopt.value() == "\n") {
break;
}
tokens.push_back(tokenopt.value());
}
auto children = Parser(tokens).parse();
ifNode.addNode(children.children[0]);
tokens.clear();
size_t brackets = 1;
auto tokenopt = consume();
if (tokenopt) {
if (tokenopt.value() == "{") {
tokens.push_back(tokenopt.value());
while (auto tokenopt = consume()) {
tokens.push_back(tokenopt.value());
if (tokenopt.value() == "{") {
brackets++;
}
if (tokenopt.value() == "}") {
brackets--;
}
if (brackets == 0) {
break;
}
}
} else {
2025-12-15 11:37:27 +11:00
std::cout << "I want a code block instead of a " + tokenopt.value() + "\n";
2025-12-14 17:17:19 +11:00
exit(1);
}
} else {
std::cout << "FEED ME MORE TOKENSSSSS\n";
exit(1);
}
2025-12-15 11:37:27 +11:00
auto childCodeBlock = Parser(tokens).parse();
ifNode.addNode(childCodeBlock.children[0]);
rootNode.addNode(ifNode);
2025-12-14 17:17:19 +11:00
break;
}
case HGNodeType::CodeBlockStart: {
HGNode codeBlockNode(HGNodeType::CodeBlock);
2025-12-15 11:37:27 +11:00
size_t brackets = 1;
std::vector<std::string> tokens;
while (auto tokenopt = consume()) {
if (tokenopt.value() == "{") {
brackets++;
}
if (tokenopt.value() == "}") {
brackets--;
}
if (brackets == 0) {
break;
}
tokens.push_back(tokenopt.value());
}
codeBlockNode.children = Parser(tokens).parse().children;
rootNode.addNode(codeBlockNode);
2025-12-14 17:17:19 +11:00
// WIP
break;
}
2025-12-14 14:21:19 +11:00
}
}
return rootNode;
}
};
GroundProgram assembleProgram(HGNode& rootNode) {
GroundProgram gp = groundCreateProgram();
auto code = rootNode.generateCode();
for (int i = 0; i < code.size(); i++) {
for (const auto& inst : code[i].code) {
groundAddInstructionToProgram(&gp, inst);
}
}
return gp;
}
2025-12-13 18:07:26 +11:00
} // namespace Parser
class Lexer {
std::string input;
size_t size;
size_t current;
std::optional<char> peek(int ahead = 1) {
if (current + ahead < size) {
return input[current + ahead];
} else {
return {};
}
}
std::optional<char> consume() {
if (current < size) {
return input[current++];
} else {
return {};
}
}
public:
Lexer(std::string in) : input(in), size(in.size()) {};
std::vector<std::string> lex() {
current = 0;
std::vector<std::string> tokens;
std::string buf;
while (auto copt = consume()) {
char c = copt.value();
switch (c) {
// tokens which are not followed by anything
2025-12-14 16:07:58 +11:00
case '\n':
2025-12-13 18:07:26 +11:00
case '(':
case ')':
case '}':
{
if (!buf.empty()) {
tokens.push_back(buf);
buf.clear();
}
tokens.push_back(std::string(1, c));
break;
}
// tokens which may be followed by either themselves
// or an equals sign
case '+':
case '-':
{
std::string newToken(1, c);
auto tokenopt = peek();
if (tokenopt) {
char token = tokenopt.value();
if (token == c || token == '=') {
newToken += token;
consume();
}
}
if (!buf.empty()) {
tokens.push_back(buf);
buf.clear();
}
tokens.push_back(newToken);
break;
}
// tokens which may be followed by an equals sign
case '*':
case '/':
case '=':
{
std::string newToken(1, c);
auto tokenopt = peek();
if (tokenopt) {
char token = tokenopt.value();
if (token == '=') {
newToken += token;
consume();
}
}
if (!buf.empty()) {
tokens.push_back(buf);
buf.clear();
}
tokens.push_back(newToken);
break;
}
2025-12-14 17:17:19 +11:00
// tokens which need a newline inserted for them
case '{':
{
if (!buf.empty()) {
tokens.push_back(buf);
buf.clear();
}
tokens.push_back("\n");
2025-12-15 11:37:27 +11:00
tokens.push_back(std::string(1, c));
2025-12-14 17:17:19 +11:00
}
2025-12-13 18:07:26 +11:00
// tokens which do not need to be included
case ' ':
{
if (!buf.empty()) {
tokens.push_back(buf);
buf.clear();
}
break;
}
default:
{
buf += c;
}
}
}
if (!buf.empty()) {
tokens.push_back(buf);
}
return tokens;
}
};
} // namespace HighGround
int main(int argc, char** argv) {
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " (file)\n";
exit(1);
}
std::ifstream file(argv[1]);
std::ostringstream ss;
ss << file.rdbuf();
auto lexed = HighGround::Lexer(ss.str()).lex();
2025-12-14 14:21:19 +11:00
auto parsed = HighGround::Parser::Parser(lexed).parse();
GroundProgram program = HighGround::Parser::assembleProgram(parsed);
groundRunProgram(&program);
2025-12-13 18:07:26 +11:00
}