2025-11-23 13:37:08 +11:00
|
|
|
#include "lexer.h"
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <string.h>
|
2025-11-23 15:54:50 +11:00
|
|
|
#include <stdio.h>
|
2025-11-23 13:37:08 +11:00
|
|
|
|
|
|
|
|
void addTokenToLine(TokenLine* line, Token tok) {
|
|
|
|
|
line->count++;
|
|
|
|
|
Token* newTokens = realloc(line->tokens, line->count * sizeof(Token));
|
|
|
|
|
if (!newTokens) {
|
2025-11-23 15:54:50 +11:00
|
|
|
perror("Failed to allocate memory for token");
|
2025-11-23 13:37:08 +11:00
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
line->tokens = newTokens;
|
|
|
|
|
line->tokens[line->count - 1] = tok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addLineToLexed(LexedFile* lf, TokenLine line) {
|
|
|
|
|
lf->lineCount++;
|
|
|
|
|
TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine));
|
|
|
|
|
if (!newLines) {
|
2025-11-23 15:54:50 +11:00
|
|
|
perror("Failed to allocate memory for line");
|
2025-11-23 13:37:08 +11:00
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
lf->lines = newLines;
|
|
|
|
|
lf->lines[lf->lineCount - 1] = line;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-23 15:54:50 +11:00
|
|
|
void freeTokenLine(TokenLine* line) {
|
|
|
|
|
for (size_t i = 0; i < line->count; i++) {
|
|
|
|
|
free(line->tokens[i].text);
|
|
|
|
|
}
|
|
|
|
|
free(line->tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void freeLexedFile(LexedFile* lf) {
|
|
|
|
|
if (lf == NULL) return;
|
|
|
|
|
for (size_t i = 0; i < lf->lineCount; i++) {
|
|
|
|
|
freeTokenLine(&lf->lines[i]);
|
|
|
|
|
}
|
|
|
|
|
free(lf->lines);
|
|
|
|
|
lf->lines = NULL;
|
|
|
|
|
lf->lineCount = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-23 13:37:08 +11:00
|
|
|
LexedFile lexFile(const char* fileContents) {
|
|
|
|
|
LexedFile result = {0};
|
|
|
|
|
result.lines = NULL;
|
|
|
|
|
result.lineCount = 0;
|
|
|
|
|
|
|
|
|
|
TokenLine currentLine = {0};
|
|
|
|
|
currentLine.tokens = NULL;
|
|
|
|
|
currentLine.count = 0;
|
|
|
|
|
|
2025-11-23 15:54:50 +11:00
|
|
|
char buf[4096] = {0};
|
2025-11-23 13:37:08 +11:00
|
|
|
size_t bufLen = 0;
|
|
|
|
|
bool inString = false;
|
|
|
|
|
bool inChar = false;
|
|
|
|
|
bool isComment = false;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; fileContents[i] != '\0'; i++) {
|
|
|
|
|
char c = fileContents[i];
|
|
|
|
|
|
2025-11-23 15:54:50 +11:00
|
|
|
// Safety check: prevent buffer overflow
|
|
|
|
|
if (bufLen >= sizeof(buf) - 1) {
|
|
|
|
|
fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1);
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-23 13:37:08 +11:00
|
|
|
switch (c) {
|
|
|
|
|
case '"':
|
|
|
|
|
if (!isComment) {
|
|
|
|
|
if (inChar) {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
} else {
|
|
|
|
|
inString = !inString;
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
if (!isComment) {
|
|
|
|
|
if (inString) {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
} else {
|
|
|
|
|
inChar = !inChar;
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
|
if (!inString && !inChar) {
|
|
|
|
|
// Add current token to line if exists
|
|
|
|
|
if (bufLen > 0) {
|
|
|
|
|
buf[bufLen] = '\0';
|
|
|
|
|
Token tok;
|
|
|
|
|
tok.text = strdup(buf);
|
2025-11-23 15:54:50 +11:00
|
|
|
if (!tok.text) {
|
|
|
|
|
perror("Failed to duplicate token string");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
2025-11-23 13:37:08 +11:00
|
|
|
addTokenToLine(¤tLine, tok);
|
|
|
|
|
bufLen = 0;
|
2025-11-23 15:54:50 +11:00
|
|
|
memset(buf, 0, sizeof(buf));
|
2025-11-23 13:37:08 +11:00
|
|
|
}
|
2025-11-23 15:54:50 +11:00
|
|
|
// Add line to result
|
2025-11-23 13:37:08 +11:00
|
|
|
addLineToLexed(&result, currentLine);
|
|
|
|
|
// Reset for next line
|
|
|
|
|
currentLine.tokens = NULL;
|
|
|
|
|
currentLine.count = 0;
|
|
|
|
|
isComment = false;
|
|
|
|
|
} else if (!isComment) {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '#':
|
|
|
|
|
if (!inString && !inChar) {
|
|
|
|
|
isComment = true;
|
|
|
|
|
if (bufLen > 0) {
|
|
|
|
|
buf[bufLen] = '\0';
|
|
|
|
|
Token tok;
|
|
|
|
|
tok.text = strdup(buf);
|
2025-11-23 15:54:50 +11:00
|
|
|
if (!tok.text) {
|
|
|
|
|
perror("Failed to duplicate token string");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
2025-11-23 13:37:08 +11:00
|
|
|
addTokenToLine(¤tLine, tok);
|
|
|
|
|
bufLen = 0;
|
2025-11-23 15:54:50 +11:00
|
|
|
memset(buf, 0, sizeof(buf));
|
2025-11-23 13:37:08 +11:00
|
|
|
}
|
|
|
|
|
addLineToLexed(&result, currentLine);
|
|
|
|
|
currentLine.tokens = NULL;
|
|
|
|
|
currentLine.count = 0;
|
|
|
|
|
} else {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ' ':
|
2025-11-23 15:54:50 +11:00
|
|
|
case '\t': // Also handle tabs as whitespace
|
2025-11-23 13:37:08 +11:00
|
|
|
if (!inString && !inChar) {
|
|
|
|
|
if (bufLen > 0 && !isComment) {
|
|
|
|
|
buf[bufLen] = '\0';
|
|
|
|
|
Token tok;
|
|
|
|
|
tok.text = strdup(buf);
|
2025-11-23 15:54:50 +11:00
|
|
|
if (!tok.text) {
|
|
|
|
|
perror("Failed to duplicate token string");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
2025-11-23 13:37:08 +11:00
|
|
|
addTokenToLine(¤tLine, tok);
|
|
|
|
|
bufLen = 0;
|
2025-11-23 15:54:50 +11:00
|
|
|
memset(buf, 0, sizeof(buf));
|
2025-11-23 13:37:08 +11:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2025-11-23 15:54:50 +11:00
|
|
|
case '\r': // Handle Windows line endings
|
|
|
|
|
// Just skip carriage returns
|
|
|
|
|
break;
|
|
|
|
|
|
2025-11-23 13:37:08 +11:00
|
|
|
default:
|
|
|
|
|
if (!isComment) {
|
|
|
|
|
buf[bufLen++] = c;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-23 15:54:50 +11:00
|
|
|
// Handle any remaining content at end of file
|
|
|
|
|
if (bufLen > 0 && !isComment) {
|
2025-11-23 13:37:08 +11:00
|
|
|
buf[bufLen] = '\0';
|
|
|
|
|
Token tok;
|
|
|
|
|
tok.text = strdup(buf);
|
2025-11-23 15:54:50 +11:00
|
|
|
if (!tok.text) {
|
|
|
|
|
perror("Failed to duplicate token string");
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
}
|
2025-11-23 13:37:08 +11:00
|
|
|
addTokenToLine(¤tLine, tok);
|
|
|
|
|
}
|
2025-11-23 15:54:50 +11:00
|
|
|
|
|
|
|
|
// Add final line if it has content
|
2025-11-23 13:37:08 +11:00
|
|
|
if (currentLine.count > 0) {
|
|
|
|
|
addLineToLexed(&result, currentLine);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|