From ede2f06ef867cca97ceb4a45b73fd234960ffca9 Mon Sep 17 00:00:00 2001 From: Maxwell Jeffress Date: Wed, 4 Mar 2026 10:40:54 +1100 Subject: [PATCH] .grbc file serialization/deserialization --- src/main.c | 43 ++++++- src/serialize.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++++ src/serialize.h | 48 ++++++++ 3 files changed, 388 insertions(+), 5 deletions(-) create mode 100644 src/serialize.c create mode 100644 src/serialize.h diff --git a/src/main.c b/src/main.c index 582fca6..0829091 100644 --- a/src/main.c +++ b/src/main.c @@ -2,7 +2,9 @@ #include "interpreter.h" #include "compiler.h" #include "types.h" +#include "serialize.h" #include +#include char* getFileContents(const char* filename) { // https://stackoverflow.com/questions/3747086/reading-the-whole-text-file-into-a-char-array-in-c @@ -47,19 +49,42 @@ int main(int argc, char** argv) { } bool compile = false; + bool writeBytecode = false; + bool readBytecode = false; char* fileName = NULL; + char* outFileName = NULL; List groundArgs = createList(); for (int i = 1; i < argc; i++) { if (strcmp("--compile", argv[i]) == 0 || strcmp("-c", argv[i]) == 0) { + if (writeBytecode) { + printf("Cannot choose both bytecode and compilation"); + exit(1); + } compile = true; } else if (strcmp("--help", argv[i]) == 0 || strcmp("-h", argv[i]) == 0) { printf("GroundVM help\n"); - printf("Usage: %s [-c] [--compile] [-h] [--help]\n", argv[0]); + printf("Usage: %s [-c] [--compile] [-h] [--help] [-w ] [--writeBytecode ] [-b] [--bytecode]\n", argv[0]); printf("Options:\n"); printf(" -c or --compile: Outputs Linux x86_64 assembly instead of interpreting (WIP)\n"); printf(" -h or --help: Shows this help message\n"); + printf(" -w or --writebytecode : Outputs binary Ground bytecode"); + printf(" -b or --bytecode : Inputs binary Ground bytecode"); exit(0); + } else if (strcmp("--writebytecode", argv[i]) == 0 || strcmp("-w", argv[i]) == 0) { + if (compile) { + printf("Cannot choose both bytecode and compilation"); + exit(1); + } + writeBytecode = true; + if (i + 1 >= argc) { + printf("Usage: %s %s ", argv[0], argv[i]); + exit(1); + } + i++; + outFileName = argv[i]; + } else if (strcmp("--bytecode", argv[i]) == 0 || strcmp("-b", argv[i]) == 0) { + readBytecode = true; } else { if (fileName == NULL) { fileName = argv[i]; @@ -70,18 +95,26 @@ int main(int argc, char** argv) { } if (fileName == NULL) { - printf("Usage: %s [-c] [--compile] [-h] [--help]\n", argv[0]); + printf("Usage: %s [-c] [--compile] [-h] [--help] [-w ] [--writeBytecode ] [-b] [--bytecode]\n", argv[0]); printf("Error: No file name provided\n"); exit(1); } - char* file = getFileContents(fileName); - GroundProgram program = parseFile(file); - free(file); + GroundProgram program; + + if (readBytecode) { + deserializeProgramFromFile(fileName, &program); + } else { + char* file = getFileContents(fileName); + program = parseFile(file); + free(file); + } if (compile) { char* compiled = compileGroundProgram(&program); printf("%s\n", compiled); + } else if (writeBytecode) { + serializeProgramToFile(outFileName, &program); } else { GroundVariable* variables = NULL; GroundLabel* labels = NULL; diff --git a/src/serialize.c b/src/serialize.c new file mode 100644 index 0000000..7dbd588 --- /dev/null +++ b/src/serialize.c @@ -0,0 +1,302 @@ +#include "serialize.h" +#include "types.h" + +#include +#include +#include +#include +#include + +static bool writeBytes(FILE* f, const void* data, size_t n) { + return fwrite(data, 1, n, f) == n; +} + +static bool readBytes(FILE* f, void* data, size_t n) { + return fread(data, 1, n, f) == n; +} + +/* Convenience macros for writing/reading a single typed value. */ +#define WRITE(f, val) writeBytes((f), &(val), sizeof(val)) +#define READ(f, val) readBytes((f), &(val), sizeof(val)) + + +bool serializeWriteString(FILE* f, const char* s) { + if (s == NULL) { + uint32_t sentinel = UINT32_MAX; + return WRITE(f, sentinel); + } + uint32_t len = (uint32_t)strlen(s); + if (!WRITE(f, len)) return false; + if (len > 0 && !writeBytes(f, s, len)) return false; + return true; +} + +char* serializeReadString(FILE* f) { + uint32_t len; + if (!READ(f, len)) return NULL; + if (len == UINT32_MAX) return NULL; + char* s = malloc(len + 1); + if (!s) return NULL; + if (len > 0 && !readBytes(f, s, len)) { + free(s); + return NULL; + } + s[len] = '\0'; + return s; +} + +/* ----------------------------------------------------------------------- + * GroundValue + * + * Format: + * uint32_t type + * + * + * Only INT, DOUBLE, CHAR, BOOL, STRING, NONE are expected here. + * Any other type is treated as a serialization error. + * ----------------------------------------------------------------------- +*/ + +bool serializeWriteValue(FILE* f, const GroundValue* gv) { + uint32_t type = (uint32_t)gv->type; + if (!WRITE(f, type)) return false; + + switch (gv->type) { + case INT: + return WRITE(f, gv->data.intVal); + case DOUBLE: + return WRITE(f, gv->data.doubleVal); + case CHAR: + return WRITE(f, gv->data.charVal); + case BOOL: + return WRITE(f, gv->data.boolVal); + case STRING: + return serializeWriteString(f, gv->data.stringVal); + case NONE: + return true; + default: + /* LIST, FUNCTION, STRUCTVAL, CUSTOM, ERROR: + * These don't exist at serialization time. If you're hitting + * this, something has gone wrong well before we got here. */ + fprintf(stderr, "serializeWriteValue: unexpected type %d\n", gv->type); + return false; + } +} + +bool serializeReadValue(FILE* f, GroundValue* out) { + memset(out, 0, sizeof(*out)); + + uint32_t type; + if (!READ(f, type)) return false; + out->type = (GroundValueType)type; + + switch (out->type) { + case INT: + return READ(f, out->data.intVal); + case DOUBLE: + return READ(f, out->data.doubleVal); + case CHAR: + return READ(f, out->data.charVal); + case BOOL: + return READ(f, out->data.boolVal); + case STRING: { + char* s = serializeReadString(f); + /* NULL is a valid encoded value (the sentinel case), but + * we only wrote non-NULL strings, so treat NULL-read as error. */ + if (!s) return false; + out->data.stringVal = s; + return true; + } + case NONE: + return true; + default: + fprintf(stderr, "serializeReadValue: unexpected type %d\n", out->type); + return false; + } +} + +/* ----------------------------------------------------------------------- + * GroundArg + * + * Format: + * uint32_t argType + * serialized GroundValue + * length-prefixed refName string + * ----------------------------------------------------------------------- +*/ + +bool serializeWriteArg(FILE* f, const GroundArg* ga) { + uint32_t type = (uint32_t)ga->type; + if (!WRITE(f, type)) return false; + + if (ga->type == VALUE) { + return serializeWriteValue(f, &ga->value.value); + } else { + /* VALREF, DIRREF, LINEREF, LABEL, FNREF, TYPEREF — all carry a refName */ + return serializeWriteString(f, ga->value.refName); + } +} + +bool serializeReadArg(FILE* f, GroundArg* out) { + memset(out, 0, sizeof(*out)); + + uint32_t type; + if (!READ(f, type)) return false; + out->type = (GroundArgType)type; + + if (out->type == VALUE) { + return serializeReadValue(f, &out->value.value); + } else { + char* ref = serializeReadString(f); + if (!ref) return false; + out->value.refName = ref; + return true; + } +} + +/* ----------------------------------------------------------------------- + * GroundInstruction + * + * Format: + * uint32_t instType + * uint64_t argCount + * + * ----------------------------------------------------------------------- +*/ + +bool serializeWriteInstruction(FILE* f, const GroundInstruction* gi) { + uint32_t type = (uint32_t)gi->type; + if (!WRITE(f, type)) return false; + + uint64_t argc = (uint64_t)gi->args.length; + if (!WRITE(f, argc)) return false; + + for (size_t i = 0; i < gi->args.length; i++) { + if (!serializeWriteArg(f, &gi->args.args[i])) return false; + } + return true; +} + +bool serializeReadInstruction(FILE* f, GroundInstruction* out) { + uint32_t type; + if (!READ(f, type)) return false; + + *out = createGroundInstruction((GroundInstType)type); + + uint64_t argc; + if (!READ(f, argc)) return false; + + for (uint64_t i = 0; i < argc; i++) { + GroundArg arg; + if (!serializeReadArg(f, &arg)) { + /* Free whatever args we've already read before bailing. */ + freeGroundInstruction(out); + return false; + } + addArgToInstruction(out, arg); + } + return true; +} + +/* ----------------------------------------------------------------------- + * GroundProgram — top-level + * + * File layout: + * uint32_t magic (GROUND_MAGIC) + * uint32_t version (GROUND_VERSION) + * uint64_t instrCount + * + * ----------------------------------------------------------------------- +*/ + +bool serializeProgramToFile(const char* path, const GroundProgram* prog) { + FILE* f = fopen(path, "wb"); + if (!f) { + perror("serializeProgramToFile: fopen"); + return false; + } + + bool ok = true; + + /* Header */ + uint32_t magic = GROUND_MAGIC; + uint32_t version = GROUND_VERSION; + ok = ok && WRITE(f, magic); + ok = ok && WRITE(f, version); + + /* Instruction count, then instructions */ + uint64_t count = (uint64_t)prog->size; + ok = ok && WRITE(f, count); + + for (size_t i = 0; i < prog->size && ok; i++) { + ok = serializeWriteInstruction(f, &prog->instructions[i]); + } + + if (!ok) { + fprintf(stderr, "serializeProgramToFile: write error\n"); + } + + fclose(f); + return ok; +} + +bool deserializeProgramFromFile(const char* path, GroundProgram* out) { + memset(out, 0, sizeof(*out)); + + FILE* f = fopen(path, "rb"); + if (!f) { + perror("deserializeProgramFromFile: fopen"); + return false; + } + + bool ok = true; + + /* Validate header */ + uint32_t magic, version; + ok = ok && READ(f, magic); + ok = ok && READ(f, version); + + if (!ok || magic != GROUND_MAGIC) { + fprintf(stderr, "deserializeProgramFromFile: bad magic (got 0x%08X)\n", magic); + fclose(f); + return false; + } + if (version != GROUND_VERSION) { + fprintf(stderr, "deserializeProgramFromFile: unsupported version %u\n", version); + fclose(f); + return false; + } + + /* Read instruction count */ + uint64_t count; + if (!READ(f, count)) { + fclose(f); + return false; + } + + out->instructions = malloc(sizeof(GroundInstruction) * count); + if (!out->instructions && count > 0) { + fprintf(stderr, "deserializeProgramFromFile: malloc failed\n"); + fclose(f); + return false; + } + out->size = 0; /* incremented as we go so partial frees are safe */ + + for (uint64_t i = 0; i < count; i++) { + if (!serializeReadInstruction(f, &out->instructions[i])) { + fprintf(stderr, "deserializeProgramFromFile: failed at instruction %llu\n", + (unsigned long long)i); + /* Free everything successfully read so far. */ + for (size_t j = 0; j < out->size; j++) + freeGroundInstruction(&out->instructions[j]); + free(out->instructions); + memset(out, 0, sizeof(*out)); + fclose(f); + return false; + } + out->size++; + } + + fclose(f); + return true; +} diff --git a/src/serialize.h b/src/serialize.h new file mode 100644 index 0000000..a0f2ca9 --- /dev/null +++ b/src/serialize.h @@ -0,0 +1,48 @@ +#ifndef SERIALIZE_H +#define SERIALIZE_H + +#include +#include +#include "types.h" + +/* + * Magic number and version for Ground bytecode files. + * The magic bytes spell 'GRND' + */ +#define GROUND_MAGIC 0x47524E44u +#define GROUND_VERSION 1u + +/* + * File header written at the start of every .grbc file. + */ +typedef struct GroundBytecodeHeader { + uint32_t magic; + uint32_t version; +} GroundBytecodeHeader; + +/* + * Writes a length-prefixed UTF-8 string to (f). + * NULL is encoded as a sentinel length (UINT32_MAX). + * Returns true on success. + */ +bool serializeWriteString(FILE* f, const char* s); + +char* serializeReadString(FILE* f); + + +bool serializeWriteValue(FILE* f, const GroundValue* gv); +bool serializeReadValue(FILE* f, GroundValue* out); + + +bool serializeWriteArg(FILE* f, const GroundArg* ga); +bool serializeReadArg(FILE* f, GroundArg* out); + + +bool serializeWriteInstruction(FILE* f, const GroundInstruction* gi); +bool serializeReadInstruction(FILE* f, GroundInstruction* out); + +bool serializeProgramToFile(const char* path, const GroundProgram* prog); + +bool deserializeProgramFromFile(const char* path, GroundProgram* out); + +#endif