Stuff works a bit better now

2025-11-23 15:54:50 +11:00
parent d1711accde
commit 451de0affd
5 changed files with 253 additions and 17 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,5 +9,7 @@ add_executable(groundc
        src/main.c
        src/parser.c
        src/parser.h
+        src/lexer.c
+        src/lexer.h
        src/types.c
        src/types.h)
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,14 +1,13 @@
 #include "lexer.h"
-#include <stdlib.h>
-#include <stdio.h>
 #include <stdbool.h>
 #include <string.h>
+#include <stdio.h>

 void addTokenToLine(TokenLine* line, Token tok) {
    line->count++;
    Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));
    if (!newTokens) {
-        perror("Failed to allocate token");
+        perror("Failed to allocate memory for token");
        exit(EXIT_FAILURE);
    }
    line->tokens = newTokens;
@@ -19,13 +18,30 @@ void addLineToLexed(LexedFile* lf, TokenLine line) {
    lf->lineCount++;
    TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));
    if (!newLines) {
-        perror("Failed to allocate line");
+        perror("Failed to allocate memory for line");
        exit(EXIT_FAILURE);
    }
    lf->lines = newLines;
    lf->lines[lf->lineCount - 1] = line;
 }

+void freeTokenLine(TokenLine* line) {
+    for (size_t i = 0; i < line->count; i++) {
+        free(line->tokens[i].text);
+    }
+    free(line->tokens);
+}
+
+void freeLexedFile(LexedFile* lf) {
+    if (lf == NULL) return;
+    for (size_t i = 0; i < lf->lineCount; i++) {
+        freeTokenLine(&lf->lines[i]);
+    }
+    free(lf->lines);
+    lf->lines = NULL;
+    lf->lineCount = 0;
+}
+
 LexedFile lexFile(const char* fileContents) {
    LexedFile result = {0};
    result.lines = NULL;
@@ -35,7 +51,7 @@ LexedFile lexFile(const char* fileContents) {
    currentLine.tokens = NULL;
    currentLine.count = 0;
    
-    char buf[1024] = {0};
+    char buf[4096] = {0}; 
    size_t bufLen = 0;
    bool inString = false;
    bool inChar = false;
@@ -44,6 +60,12 @@ LexedFile lexFile(const char* fileContents) {
    for (size_t i = 0; fileContents[i] != '\0'; i++) {
        char c = fileContents[i];
        
+        // Safety check: prevent buffer overflow
+        if (bufLen >= sizeof(buf) - 1) {
+            fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);
+            exit(EXIT_FAILURE);
+        }
+        
        switch (c) {
            case '"':
                if (!isComment) {
@@ -74,11 +96,15 @@ LexedFile lexFile(const char* fileContents) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
-                        // Add tok to currentLine (need helper function)
+                        if (!tok.text) {
+                            perror("Failed to duplicate token string");
+                            exit(EXIT_FAILURE);
+                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
+                        memset(buf, 0, sizeof(buf));
                    }
-                    // Add line to result (need helper function)
+                    // Add line to result
                    addLineToLexed(&result, currentLine);
                    // Reset for next line
                    currentLine.tokens = NULL;
@@ -96,8 +122,13 @@ LexedFile lexFile(const char* fileContents) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
+                        if (!tok.text) {
+                            perror("Failed to duplicate token string");
+                            exit(EXIT_FAILURE);
+                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
+                        memset(buf, 0, sizeof(buf));
                    }
                    addLineToLexed(&result, currentLine);
                    currentLine.tokens = NULL;
@@ -108,19 +139,29 @@ LexedFile lexFile(const char* fileContents) {
                break;
                
            case ' ':
+            case '\t':  // Also handle tabs as whitespace
                if (!inString && !inChar) {
                    if (bufLen > 0 && !isComment) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
+                        if (!tok.text) {
+                            perror("Failed to duplicate token string");
+                            exit(EXIT_FAILURE);
+                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
+                        memset(buf, 0, sizeof(buf));
                    }
                } else {
                    buf[bufLen++] = c;
                }
                break;
                
+            case '\r':  // Handle Windows line endings
+                // Just skip carriage returns
+                break;
+                
            default:
                if (!isComment) {
                    buf[bufLen++] = c;
@@ -129,13 +170,19 @@ LexedFile lexFile(const char* fileContents) {
        }
    }
    
-    // Handle any remaining content
-    if (bufLen > 0) {
+    // Handle any remaining content at end of file
+    if (bufLen > 0 && !isComment) {
        buf[bufLen] = '\0';
        Token tok;
        tok.text = strdup(buf);
+        if (!tok.text) {
+            perror("Failed to duplicate token string");
+            exit(EXIT_FAILURE);
+        }
        addTokenToLine(&currentLine, tok);
    }
+    
+    // Add final line if it has content
    if (currentLine.count > 0) {
        addLineToLexed(&result, currentLine);
    }
--- a/src/main.c
+++ b/src/main.c
@@ -1,4 +1,3 @@
-#include "types.h"
 #include "parser.h"
 #include <stdio.h>

--- a/src/parser.c
+++ b/src/parser.c
@@ -1,11 +1,14 @@
 #include "parser.h"
-#include "types.h"
 #include "lexer.h"
+#include "types.h"
 #include <stdio.h>
+#include <string.h>
+#include <ctype.h>

 GroundProgram createGroundProgram() {
    GroundProgram gp;
    gp.size = 0;
+    gp.instructions = NULL;
    return gp;
 }

@@ -14,19 +17,204 @@ void addInstructionToProgram(GroundProgram* gp, GroundInstruction instruction) {
    GroundInstruction* ptr = realloc(gp->instructions, gp->size * sizeof(GroundInstruction));
    if (ptr == NULL) {
        perror("Couldn't allocate memory for instruction");
-        exit(1);
+        exit(EXIT_FAILURE);
    }
    gp->instructions = ptr;
    gp->instructions[gp->size - 1] = instruction;
 }

 void freeGroundProgram(GroundProgram* gp) {
-    for (int i = 0; i < gp->size; i++) {
+    for (size_t i = 0; i < gp->size; i++) {
        freeGroundInstruction(&gp->instructions[i]);
    }
+    free(gp->instructions);
+    gp->instructions = NULL;
+    gp->size = 0;
 }

-GroundProgram parseFile(LexedFile file) {
-    GroundProgram gp;
-    return gp;
+// Helper: Check if a string is an integer
+static bool isInteger(const char* str) {
+    if (str == NULL || *str == '\0') return false;
+    
+    size_t i = 0;
+    if (str[0] == '-' || str[0] == '+') i++;
+    
+    if (str[i] == '\0') return false;
+    
+    for (; str[i] != '\0'; i++) {
+        if (!isdigit(str[i])) return false;
+    }
+    return true;
+}
+
+// Helper: Check if a string is a double
+static bool isDouble(const char* str) {
+    if (str == NULL || *str == '\0') return false;
+    
+    bool hasDecimal = false;
+    size_t i = 0;
+    
+    if (str[0] == '-' || str[0] == '+') i++;
+    
+    if (str[i] == '\0') return false;
+    
+    for (; str[i] != '\0'; i++) {
+        if (str[i] == '.') {
+            if (hasDecimal) return false;
+            hasDecimal = true;
+        } else if (!isdigit(str[i])) {
+            return false;
+        }
+    }
+    return hasDecimal;
+}
+
+// Helper: Parse an argument token into a GroundArg
+static GroundArg parseArgument(const char* token) {
+    if (token[0] == '"') {
+        // String literal - remove quotes
+        size_t len = strlen(token);
+        char* strVal = strndup(token + 1, len - 2);
+        GroundValue val = createStringGroundValue(strVal);
+        free(strVal);
+        return createValueGroundArg(val);
+    }
+    else if (token[0] == '\'') {
+        // Char literal
+        char charVal = token[1];
+        return createValueGroundArg(createCharGroundValue(charVal));
+    }
+    else if (strcmp(token, "true") == 0) {
+        return createValueGroundArg(createBoolGroundValue(true));
+    }
+    else if (strcmp(token, "false") == 0) {
+        return createValueGroundArg(createBoolGroundValue(false));
+    }
+    else if (token[0] == '$') {
+        // Value reference
+        return createRefGroundArg(VALREF, token + 1);
+    }
+    else if (token[0] == '&') {
+        // Direct reference
+        return createRefGroundArg(DIRREF, token + 1);
+    }
+    else if (token[0] == '%') {
+        // Line reference or label
+        return createRefGroundArg(LINEREF, token + 1);
+    }
+    else if (token[0] == '!') {
+        // Function reference
+        return createRefGroundArg(FNREF, token + 1);
+    }
+    else if (token[0] == '-') {
+        // Could be type reference or negative number
+        if (strlen(token) > 1 && !isdigit(token[1])) {
+            // Type reference (e.g., -int, -string)
+            return createRefGroundArg(LABEL, token + 1);  // Using LABEL for type refs
+        }
+        // Fall through to number parsing
+    }
+    
+    // Try to parse as number
+    if (isInteger(token)) {
+        int64_t intVal = atoll(token);
+        return createValueGroundArg(createIntGroundValue(intVal));
+    }
+    else if (isDouble(token)) {
+        double dblVal = atof(token);
+        return createValueGroundArg(createDoubleGroundValue(dblVal));
+    }
+    
+    // Unknown token type
+    fprintf(stderr, "Warning: Unknown argument type for token: %s\n", token);
+    return createRefGroundArg(VALREF, token);
+}
+
+// Helper: Convert instruction string to GroundInstType
+static GroundInstType getInstructionType(const char* inst) {
+    if (strcmp(inst, "if") == 0) return IF;
+    if (strcmp(inst, "jump") == 0) return JUMP;
+    if (strcmp(inst, "end") == 0) return END;
+    if (strcmp(inst, "input") == 0 || strcmp(inst, "stdin") == 0) return INPUT;
+    if (strcmp(inst, "print") == 0 || strcmp(inst, "stdout") == 0) return PRINT;
+    if (strcmp(inst, "println") == 0 || strcmp(inst, "stdlnout") == 0) return PRINTLN;
+    if (strcmp(inst, "set") == 0) return SET;
+    if (strcmp(inst, "gettype") == 0) return GETTYPE;
+    if (strcmp(inst, "exists") == 0) return EXISTS;
+    if (strcmp(inst, "setlist") == 0) return SETLIST;
+    if (strcmp(inst, "setlistat") == 0) return SETLISTAT;
+    if (strcmp(inst, "getlistat") == 0) return GETLISTAT;
+    if (strcmp(inst, "getlistsize") == 0) return GETLISTSIZE;
+    if (strcmp(inst, "listappend") == 0) return LISTAPPEND;
+    if (strcmp(inst, "getstrsize") == 0) return GETSTRSIZE;
+    if (strcmp(inst, "getstrcharat") == 0) return GETSTRCHARAT;
+    if (strcmp(inst, "add") == 0) return ADD;
+    if (strcmp(inst, "subtract") == 0) return SUBTRACT;
+    if (strcmp(inst, "multiply") == 0) return MULTIPLY;
+    if (strcmp(inst, "divide") == 0) return DIVIDE;
+    if (strcmp(inst, "equal") == 0) return EQUAL;
+    if (strcmp(inst, "inequal") == 0) return INEQUAL;
+    if (strcmp(inst, "not") == 0) return NOT;
+    if (strcmp(inst, "greater") == 0) return GREATER;
+    if (strcmp(inst, "lesser") == 0) return LESSER;
+    if (strcmp(inst, "stoi") == 0) return STOI;
+    if (strcmp(inst, "stod") == 0) return STOD;
+    if (strcmp(inst, "tostring") == 0) return TOSTRING;
+    if (strcmp(inst, "fun") == 0) return FUN;
+    if (strcmp(inst, "return") == 0) return RETURN;
+    if (strcmp(inst, "endfun") == 0) return ENDFUN;
+    if (strcmp(inst, "pusharg") == 0) return PUSHARG;
+    if (strcmp(inst, "call") == 0) return CALL;
+    if (strcmp(inst, "struct") == 0) return STRUCT;
+    if (strcmp(inst, "endstruct") == 0) return ENDSTRUCT;
+    if (strcmp(inst, "init") == 0) return INIT;
+    if (strcmp(inst, "use") == 0) return USE;
+    if (strcmp(inst, "extern") == 0) return EXTERN;
+    
+    fprintf(stderr, "Error: Unknown instruction: %s\n", inst);
+    exit(EXIT_FAILURE);
+}
+
+GroundProgram parseFile(const char* file) {
+    GroundProgram program = createGroundProgram();
+    
+    // First, lex the file
+    LexedFile lexed = lexFile(file);
+    
+    // Parse each line
+    for (size_t lineNum = 0; lineNum < lexed.lineCount; lineNum++) {
+        TokenLine line = lexed.lines[lineNum];
+        
+        // Skip empty lines
+        if (line.count == 0) {
+            continue;
+        }
+        
+        // Check if first token is a label
+        size_t tokenStart = 0;
+        if (line.tokens[0].text[0] == '@') {
+            // TODO: Handle labels - you might want to store them separately
+            // For now, skip to next token
+            tokenStart = 1;
+            if (tokenStart >= line.count) continue;
+        }
+        
+        // First non-label token is the instruction
+        const char* instToken = line.tokens[tokenStart].text;
+        GroundInstType instType = getInstructionType(instToken);
+        GroundInstruction inst = createGroundInstruction(instType);
+        
+        // Parse remaining tokens as arguments
+        for (size_t i = tokenStart + 1; i < line.count; i++) {
+            GroundArg arg = parseArgument(line.tokens[i].text);
+            addArgToInstruction(&inst, arg);
+        }
+        
+        addInstructionToProgram(&program, inst);
+    }
+    
+    // Clean up lexed data
+    freeLexedFile(&lexed);
+    
+    return program;
 }
--- a/src/parser.h
+++ b/src/parser.h
@@ -22,6 +22,6 @@ void addInstructionToProgram(GroundProgram* gp, GroundInstruction instruction);
 void freeGroundProgram(GroundProgram* gp);

 // Parses the file into a GroundProgram
-GroundProgram parseFile(LexedFile file);
+GroundProgram parseFile(const char* file);

 #endif