cground/src/lexer.c

#include "lexer.h"
#include <stdbool.h>
#include <string.h>
#include <stdio.h>

void addTokenToLine(TokenLine* line, Token tok) {
    line->count++;
    Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));
    if (!newTokens) {
        perror("Failed to allocate memory for token");
        exit(EXIT_FAILURE);
    }
    line->tokens = newTokens;
    line->tokens[line->count - 1] = tok;
}

void addLineToLexed(LexedFile* lf, TokenLine line) {
    lf->lineCount++;
    TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));
    if (!newLines) {
        perror("Failed to allocate memory for line");
        exit(EXIT_FAILURE);
    }
    lf->lines = newLines;
    lf->lines[lf->lineCount - 1] = line;
}

void freeTokenLine(TokenLine* line) {
    for (size_t i = 0; i < line->count; i++) {
        free(line->tokens[i].text);
    }
    free(line->tokens);
}

void freeLexedFile(LexedFile* lf) {
    if (lf == NULL) return;
    for (size_t i = 0; i < lf->lineCount; i++) {
        freeTokenLine(&lf->lines[i]);
    }
    free(lf->lines);
    lf->lines = NULL;
    lf->lineCount = 0;
}

LexedFile lexFile(const char* fileContents) {
    LexedFile result = {0};
    result.lines = NULL;
    result.lineCount = 0;

    TokenLine currentLine = {0};
    currentLine.tokens = NULL;
    currentLine.count = 0;

    char buf[4096] = {0};
    size_t bufLen = 0;
    bool inString = false;
    bool inChar = false;
    bool isComment = false;

    for (size_t i = 0; fileContents[i] != '\0'; i++) {
        char c = fileContents[i];

        // Safety check: prevent buffer overflow
        if (bufLen >= sizeof(buf) - 1) {
            fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);
            exit(EXIT_FAILURE);
        }

        switch (c) {
            case '"':
                if (!isComment) {
                    if (inChar) {
                        buf[bufLen++] = c;
                    } else {
                        inString = !inString;
                        buf[bufLen++] = c;
                    }
                }
                break;

            case '\'':
                if (!isComment) {
                    if (inString) {
                        buf[bufLen++] = c;
                    } else {
                        inChar = !inChar;
                        buf[bufLen++] = c;
                    }
                }
                break;

            case '\n':
                if (!inString && !inChar) {
                    // Add current token to line if exists
                    if (bufLen > 0) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                    // Add line to result
                    addLineToLexed(&result, currentLine);
                    // Reset for next line
                    currentLine.tokens = NULL;
                    currentLine.count = 0;
                    isComment = false;
                } else if (!isComment) {
                    buf[bufLen++] = c;
                }
                break;

            case '#':
                if (!inString && !inChar) {
                    isComment = true;
                    if (bufLen > 0) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                    addLineToLexed(&result, currentLine);
                    currentLine.tokens = NULL;
                    currentLine.count = 0;
                } else {
                    buf[bufLen++] = c;
                }
                break;

            case ' ':
            case '\t':  // Also handle tabs as whitespace
                if (!inString && !inChar) {
                    if (bufLen > 0 && !isComment) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                } else {
                    buf[bufLen++] = c;
                }
                break;

            case '\r':  // Handle Windows line endings
                // Just skip carriage returns
                break;

            default:
                if (!isComment) {
                    buf[bufLen++] = c;
                }
                break;
        }
    }

    // Handle any remaining content at end of file
    if (bufLen > 0 && !isComment) {
        buf[bufLen] = '\0';
        Token tok;
        tok.text = strdup(buf);
        if (!tok.text) {
            perror("Failed to duplicate token string");
            exit(EXIT_FAILURE);
        }
        addTokenToLine(&currentLine, tok);
    }

    // Add final line if it has content
    if (currentLine.count > 0) {
        addLineToLexed(&result, currentLine);
    }

    return result;
}