src/lexer.c

#include "lexer.h"
#include <stdbool.h>
#include <string.h>
#include <stdio.h>

void addTokenToLine(TokenLine* line, Token tok) {
    line->count++;
    Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));
    if (!newTokens) {
        perror("Failed to allocate memory for token");
        exit(EXIT_FAILURE);
    }
    line->tokens = newTokens;
    line->tokens[line->count - 1] = tok;
}

void addLineToLexed(LexedFile* lf, TokenLine line) {
    lf->lineCount++;
    TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));
    if (!newLines) {
        perror("Failed to allocate memory for line");
        exit(EXIT_FAILURE);
    }
    lf->lines = newLines;
    lf->lines[lf->lineCount - 1] = line;
}

void freeTokenLine(TokenLine* line) {
    for (size_t i = 0; i < line->count; i++) {
        free(line->tokens[i].text);
    }
    free(line->tokens);
}

void freeLexedFile(LexedFile* lf) {
    if (lf == NULL) return;
    for (size_t i = 0; i < lf->lineCount; i++) {
        freeTokenLine(&lf->lines[i]);
    }
    free(lf->lines);
    lf->lines = NULL;
    lf->lineCount = 0;
}

LexedFile lexFile(const char* fileContents) {
    LexedFile result = {0};
    result.lines = NULL;
    result.lineCount = 0;
    
    TokenLine currentLine = {0};
    currentLine.tokens = NULL;
    currentLine.count = 0;
    
    char buf[4096] = {0}; 
    size_t bufLen = 0;
    bool inString = false;
    bool inChar = false;
    bool isComment = false;
    
    for (size_t i = 0; fileContents[i] != '\0'; i++) {
        char c = fileContents[i];
        
        // Safety check: prevent buffer overflow
        if (bufLen >= sizeof(buf) - 1) {
            fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);
            exit(EXIT_FAILURE);
        }
        
        switch (c) {
            case '"':
                if (!isComment) {
                    if (inChar) {
                        buf[bufLen++] = c;
                    } else {
                        inString = !inString;
                        buf[bufLen++] = c;
                    }
                }
                break;
                
            case '\'':
                if (!isComment) {
                    if (inString) {
                        buf[bufLen++] = c;
                    } else {
                        inChar = !inChar;
                        buf[bufLen++] = c;
                    }
                }
                break;
                
            case '\n':
                if (!inString && !inChar) {
                    // Add current token to line if exists
                    if (bufLen > 0) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                    // Add line to result
                    addLineToLexed(&result, currentLine);
                    // Reset for next line
                    currentLine.tokens = NULL;
                    currentLine.count = 0;
                    isComment = false;
                } else if (!isComment) {
                    buf[bufLen++] = c;
                }
                break;
                
            case '#':
                if (!inString && !inChar) {
                    isComment = true;
                    if (bufLen > 0) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                    addLineToLexed(&result, currentLine);
                    currentLine.tokens = NULL;
                    currentLine.count = 0;
                } else {
                    buf[bufLen++] = c;
                }
                break;
                
            case ' ':
            case '\t':  // Also handle tabs as whitespace
                if (!inString && !inChar) {
                    if (bufLen > 0 && !isComment) {
                        buf[bufLen] = '\0';
                        Token tok;
                        tok.text = strdup(buf);
                        if (!tok.text) {
                            perror("Failed to duplicate token string");
                            exit(EXIT_FAILURE);
                        }
                        addTokenToLine(&currentLine, tok);
                        bufLen = 0;
                        memset(buf, 0, sizeof(buf));
                    }
                } else {
                    buf[bufLen++] = c;
                }
                break;
                
            case '\r':  // Handle Windows line endings
                // Just skip carriage returns
                break;
                
            default:
                if (!isComment) {
                    buf[bufLen++] = c;
                }
                break;
        }
    }
    
    // Handle any remaining content at end of file
    if (bufLen > 0 && !isComment) {
        buf[bufLen] = '\0';
        Token tok;
        tok.text = strdup(buf);
        if (!tok.text) {
            perror("Failed to duplicate token string");
            exit(EXIT_FAILURE);
        }
        addTokenToLine(&currentLine, tok);
    }
    
    // Add final line if it has content
    if (currentLine.count > 0) {
        addLineToLexed(&result, currentLine);
    }
    
    return result;
}
Initial commit 2025-11-23 13:37:08 +11:00			`#include "lexer.h"`
			`#include <stdbool.h>`
			`#include <string.h>`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`#include <stdio.h>`
Initial commit 2025-11-23 13:37:08 +11:00
			`void addTokenToLine(TokenLine* line, Token tok) {`
			`line->count++;`
			`Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));`
			`if (!newTokens) {`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`perror("Failed to allocate memory for token");`
Initial commit 2025-11-23 13:37:08 +11:00			`exit(EXIT_FAILURE);`
			`}`
			`line->tokens = newTokens;`
			`line->tokens[line->count - 1] = tok;`
			`}`

			`void addLineToLexed(LexedFile* lf, TokenLine line) {`
			`lf->lineCount++;`
			`TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));`
			`if (!newLines) {`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`perror("Failed to allocate memory for line");`
Initial commit 2025-11-23 13:37:08 +11:00			`exit(EXIT_FAILURE);`
			`}`
			`lf->lines = newLines;`
			`lf->lines[lf->lineCount - 1] = line;`
			`}`

Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`void freeTokenLine(TokenLine* line) {`
			`for (size_t i = 0; i < line->count; i++) {`
			`free(line->tokens[i].text);`
			`}`
			`free(line->tokens);`
			`}`

			`void freeLexedFile(LexedFile* lf) {`
			`if (lf == NULL) return;`
			`for (size_t i = 0; i < lf->lineCount; i++) {`
			`freeTokenLine(&lf->lines[i]);`
			`}`
			`free(lf->lines);`
			`lf->lines = NULL;`
			`lf->lineCount = 0;`
			`}`

Initial commit 2025-11-23 13:37:08 +11:00			`LexedFile lexFile(const char* fileContents) {`
			`LexedFile result = {0};`
			`result.lines = NULL;`
			`result.lineCount = 0;`

			`TokenLine currentLine = {0};`
			`currentLine.tokens = NULL;`
			`currentLine.count = 0;`

Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`char buf[4096] = {0};`
Initial commit 2025-11-23 13:37:08 +11:00			`size_t bufLen = 0;`
			`bool inString = false;`
			`bool inChar = false;`
			`bool isComment = false;`

			`for (size_t i = 0; fileContents[i] != '\0'; i++) {`
			`char c = fileContents[i];`

Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`// Safety check: prevent buffer overflow`
			`if (bufLen >= sizeof(buf) - 1) {`
			`fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);`
			`exit(EXIT_FAILURE);`
			`}`

Initial commit 2025-11-23 13:37:08 +11:00			`switch (c) {`
			`case '"':`
			`if (!isComment) {`
			`if (inChar) {`
			`buf[bufLen++] = c;`
			`} else {`
			`inString = !inString;`
			`buf[bufLen++] = c;`
			`}`
			`}`
			`break;`

			`case '\'':`
			`if (!isComment) {`
			`if (inString) {`
			`buf[bufLen++] = c;`
			`} else {`
			`inChar = !inChar;`
			`buf[bufLen++] = c;`
			`}`
			`}`
			`break;`

			`case '\n':`
			`if (!inString && !inChar) {`
			`// Add current token to line if exists`
			`if (bufLen > 0) {`
			`buf[bufLen] = '\0';`
			`Token tok;`
			`tok.text = strdup(buf);`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`if (!tok.text) {`
			`perror("Failed to duplicate token string");`
			`exit(EXIT_FAILURE);`
			`}`
Initial commit 2025-11-23 13:37:08 +11:00			`addTokenToLine(&currentLine, tok);`
			`bufLen = 0;`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`memset(buf, 0, sizeof(buf));`
Initial commit 2025-11-23 13:37:08 +11:00			`}`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`// Add line to result`
Initial commit 2025-11-23 13:37:08 +11:00			`addLineToLexed(&result, currentLine);`
			`// Reset for next line`
			`currentLine.tokens = NULL;`
			`currentLine.count = 0;`
			`isComment = false;`
			`} else if (!isComment) {`
			`buf[bufLen++] = c;`
			`}`
			`break;`

			`case '#':`
			`if (!inString && !inChar) {`
			`isComment = true;`
			`if (bufLen > 0) {`
			`buf[bufLen] = '\0';`
			`Token tok;`
			`tok.text = strdup(buf);`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`if (!tok.text) {`
			`perror("Failed to duplicate token string");`
			`exit(EXIT_FAILURE);`
			`}`
Initial commit 2025-11-23 13:37:08 +11:00			`addTokenToLine(&currentLine, tok);`
			`bufLen = 0;`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`memset(buf, 0, sizeof(buf));`
Initial commit 2025-11-23 13:37:08 +11:00			`}`
			`addLineToLexed(&result, currentLine);`
			`currentLine.tokens = NULL;`
			`currentLine.count = 0;`
			`} else {`
			`buf[bufLen++] = c;`
			`}`
			`break;`

			`case ' ':`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`case '\t': // Also handle tabs as whitespace`
Initial commit 2025-11-23 13:37:08 +11:00			`if (!inString && !inChar) {`
			`if (bufLen > 0 && !isComment) {`
			`buf[bufLen] = '\0';`
			`Token tok;`
			`tok.text = strdup(buf);`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`if (!tok.text) {`
			`perror("Failed to duplicate token string");`
			`exit(EXIT_FAILURE);`
			`}`
Initial commit 2025-11-23 13:37:08 +11:00			`addTokenToLine(&currentLine, tok);`
			`bufLen = 0;`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`memset(buf, 0, sizeof(buf));`
Initial commit 2025-11-23 13:37:08 +11:00			`}`
			`} else {`
			`buf[bufLen++] = c;`
			`}`
			`break;`

Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`case '\r': // Handle Windows line endings`
			`// Just skip carriage returns`
			`break;`

Initial commit 2025-11-23 13:37:08 +11:00			`default:`
			`if (!isComment) {`
			`buf[bufLen++] = c;`
			`}`
			`break;`
			`}`
			`}`

Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`// Handle any remaining content at end of file`
			`if (bufLen > 0 && !isComment) {`
Initial commit 2025-11-23 13:37:08 +11:00			`buf[bufLen] = '\0';`
			`Token tok;`
			`tok.text = strdup(buf);`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00			`if (!tok.text) {`
			`perror("Failed to duplicate token string");`
			`exit(EXIT_FAILURE);`
			`}`
Initial commit 2025-11-23 13:37:08 +11:00			`addTokenToLine(&currentLine, tok);`
			`}`
Stuff works a bit better now 2025-11-23 15:54:50 +11:00
			`// Add final line if it has content`
Initial commit 2025-11-23 13:37:08 +11:00			`if (currentLine.count > 0) {`
			`addLineToLexed(&result, currentLine);`
			`}`

			`return result;`
			`}`