Files
cground/src/lexer.c

192 lines
5.9 KiB
C

#include "lexer.h"
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
void addTokenToLine(TokenLine* line, Token tok) {
line->count++;
Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));
if (!newTokens) {
perror("Failed to allocate memory for token");
exit(EXIT_FAILURE);
}
line->tokens = newTokens;
line->tokens[line->count - 1] = tok;
}
void addLineToLexed(LexedFile* lf, TokenLine line) {
lf->lineCount++;
TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));
if (!newLines) {
perror("Failed to allocate memory for line");
exit(EXIT_FAILURE);
}
lf->lines = newLines;
lf->lines[lf->lineCount - 1] = line;
}
void freeTokenLine(TokenLine* line) {
for (size_t i = 0; i < line->count; i++) {
free(line->tokens[i].text);
}
free(line->tokens);
}
void freeLexedFile(LexedFile* lf) {
if (lf == NULL) return;
for (size_t i = 0; i < lf->lineCount; i++) {
freeTokenLine(&lf->lines[i]);
}
free(lf->lines);
lf->lines = NULL;
lf->lineCount = 0;
}
LexedFile lexFile(const char* fileContents) {
LexedFile result = {0};
result.lines = NULL;
result.lineCount = 0;
TokenLine currentLine = {0};
currentLine.tokens = NULL;
currentLine.count = 0;
char buf[4096] = {0};
size_t bufLen = 0;
bool inString = false;
bool inChar = false;
bool isComment = false;
for (size_t i = 0; fileContents[i] != '\0'; i++) {
char c = fileContents[i];
// Safety check: prevent buffer overflow
if (bufLen >= sizeof(buf) - 1) {
fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);
exit(EXIT_FAILURE);
}
switch (c) {
case '"':
if (!isComment) {
if (inChar) {
buf[bufLen++] = c;
} else {
inString = !inString;
buf[bufLen++] = c;
}
}
break;
case '\'':
if (!isComment) {
if (inString) {
buf[bufLen++] = c;
} else {
inChar = !inChar;
buf[bufLen++] = c;
}
}
break;
case '\n':
if (!inString && !inChar) {
// Add current token to line if exists
if (bufLen > 0) {
buf[bufLen] = '\0';
Token tok;
tok.text = strdup(buf);
if (!tok.text) {
perror("Failed to duplicate token string");
exit(EXIT_FAILURE);
}
addTokenToLine(&currentLine, tok);
bufLen = 0;
memset(buf, 0, sizeof(buf));
}
// Add line to result
addLineToLexed(&result, currentLine);
// Reset for next line
currentLine.tokens = NULL;
currentLine.count = 0;
isComment = false;
} else if (!isComment) {
buf[bufLen++] = c;
}
break;
case '#':
if (!inString && !inChar) {
isComment = true;
if (bufLen > 0) {
buf[bufLen] = '\0';
Token tok;
tok.text = strdup(buf);
if (!tok.text) {
perror("Failed to duplicate token string");
exit(EXIT_FAILURE);
}
addTokenToLine(&currentLine, tok);
bufLen = 0;
memset(buf, 0, sizeof(buf));
}
addLineToLexed(&result, currentLine);
currentLine.tokens = NULL;
currentLine.count = 0;
} else {
buf[bufLen++] = c;
}
break;
case ' ':
case '\t': // Also handle tabs as whitespace
if (!inString && !inChar) {
if (bufLen > 0 && !isComment) {
buf[bufLen] = '\0';
Token tok;
tok.text = strdup(buf);
if (!tok.text) {
perror("Failed to duplicate token string");
exit(EXIT_FAILURE);
}
addTokenToLine(&currentLine, tok);
bufLen = 0;
memset(buf, 0, sizeof(buf));
}
} else {
buf[bufLen++] = c;
}
break;
case '\r': // Handle Windows line endings
// Just skip carriage returns
break;
default:
if (!isComment) {
buf[bufLen++] = c;
}
break;
}
}
// Handle any remaining content at end of file
if (bufLen > 0 && !isComment) {
buf[bufLen] = '\0';
Token tok;
tok.text = strdup(buf);
if (!tok.text) {
perror("Failed to duplicate token string");
exit(EXIT_FAILURE);
}
addTokenToLine(&currentLine, tok);
}
// Add final line if it has content
if (currentLine.count > 0) {
addLineToLexed(&result, currentLine);
}
return result;
}