2025-12-26 12:28:45 +11:00
|
|
|
#include "lexer.h"
|
2025-12-26 18:56:51 +11:00
|
|
|
#include "exception.h"
|
2025-12-26 12:28:45 +11:00
|
|
|
#include <string.h>
|
2025-12-26 18:56:51 +11:00
|
|
|
#include <ctype.h>
|
2025-12-26 12:28:45 +11:00
|
|
|
#include <stdlib.h>
|
2025-12-26 18:56:51 +11:00
|
|
|
#include <stdio.h>
|
2025-12-26 12:28:45 +11:00
|
|
|
|
|
|
|
|
Lexer initLexer(char *source) {
|
|
|
|
|
Lexer newLexer = {
|
|
|
|
|
.source = source,
|
|
|
|
|
.position = -1,
|
|
|
|
|
.readPosition = 0,
|
|
|
|
|
.lineNumber = 1,
|
|
|
|
|
.currentChar = '\0'
|
|
|
|
|
};
|
2025-12-26 18:56:51 +11:00
|
|
|
readChar(&newLexer);
|
2025-12-26 12:28:45 +11:00
|
|
|
return newLexer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void readChar(Lexer *lexer) {
|
|
|
|
|
if (lexer->readPosition >= strlen(lexer->source))
|
|
|
|
|
lexer->currentChar = '\0';
|
|
|
|
|
else
|
|
|
|
|
lexer->currentChar = lexer->source[lexer->readPosition];
|
|
|
|
|
|
|
|
|
|
lexer->position = lexer->readPosition;
|
|
|
|
|
lexer->readPosition++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void skipWhitespace(Lexer *lexer) {
|
2025-12-26 18:56:51 +11:00
|
|
|
bool whitespace = true;
|
|
|
|
|
|
|
|
|
|
while (whitespace)
|
2025-12-26 12:28:45 +11:00
|
|
|
{
|
2025-12-26 18:56:51 +11:00
|
|
|
switch (lexer->currentChar)
|
|
|
|
|
{
|
|
|
|
|
case ' ':
|
|
|
|
|
case '\t':
|
|
|
|
|
case '\n':
|
|
|
|
|
case '\r':
|
|
|
|
|
if (lexer->currentChar == '\n')
|
|
|
|
|
lexer->lineNumber++;
|
|
|
|
|
|
|
|
|
|
readChar(lexer);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
whitespace = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-26 12:28:45 +11:00
|
|
|
|
2025-12-26 18:56:51 +11:00
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Token readNumber(Lexer *lexer) {
|
|
|
|
|
int startPos = lexer->position;
|
|
|
|
|
int dotCount = 0;
|
|
|
|
|
int strLength = 0;
|
|
|
|
|
|
|
|
|
|
size_t outputCap = 8;
|
|
|
|
|
char *output = malloc(outputCap);
|
|
|
|
|
|
|
|
|
|
if (!output) {
|
|
|
|
|
exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position);
|
|
|
|
|
exit(1);
|
2025-12-26 12:28:45 +11:00
|
|
|
}
|
2025-12-26 18:56:51 +11:00
|
|
|
|
|
|
|
|
while (isdigit(lexer->currentChar) || lexer->currentChar == '.')
|
|
|
|
|
{
|
|
|
|
|
if (lexer->currentChar == '.') {
|
|
|
|
|
dotCount++;
|
|
|
|
|
|
|
|
|
|
if (dotCount > 1) {
|
|
|
|
|
exceptionMessage(EXCEPTION_MALFORMED_NUMBER, "Too many dots in decimal.", lexer->lineNumber, lexer->position);
|
|
|
|
|
|
|
|
|
|
char buffer[strLength+1];
|
|
|
|
|
strncpy(buffer, lexer->source + startPos, strLength+1);
|
|
|
|
|
buffer[strLength+1] = 0;
|
|
|
|
|
|
|
|
|
|
return NEW_TOKEN(lexer, TOKEN_ILLEGAL, buffer);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// allocate more memory if we reach the end of our buffer
|
|
|
|
|
if (strLength + 1 >= outputCap) {
|
|
|
|
|
char *temp = realloc(output, outputCap*=2);
|
|
|
|
|
|
|
|
|
|
if (!temp) {
|
|
|
|
|
exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position);
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
output = temp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
output[strLength] = lexer->source[lexer->position];
|
|
|
|
|
strLength++;
|
|
|
|
|
readChar(lexer);
|
|
|
|
|
|
|
|
|
|
if (lexer->currentChar == '\0')
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
output[strLength] = 0;
|
2025-12-26 12:28:45 +11:00
|
|
|
|
2025-12-26 18:56:51 +11:00
|
|
|
if (dotCount == 0)
|
|
|
|
|
return NEW_TOKEN(lexer, TOKEN_INT, output);
|
|
|
|
|
else
|
|
|
|
|
return NEW_TOKEN(lexer, TOKEN_FLOAT, output);
|
2025-12-26 12:28:45 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Token nextToken(Lexer *lexer) {
|
2025-12-26 18:56:51 +11:00
|
|
|
Token tok;
|
2025-12-26 12:28:45 +11:00
|
|
|
skipWhitespace(lexer);
|
|
|
|
|
|
|
|
|
|
switch (lexer->currentChar)
|
|
|
|
|
{
|
|
|
|
|
case '+':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_PLUS, "+");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '-':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_MINUS, "-");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '*':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_ASTERISK, "*");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '/':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_SLASH, "/");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '%':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_MODULUS, "%");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '^':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_POW, "^");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case '(':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_LPAREN, "(");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
case ')':
|
2025-12-26 18:56:51 +11:00
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_RPAREN, ")");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
2025-12-26 18:56:51 +11:00
|
|
|
|
|
|
|
|
case '\0': // EOF
|
|
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_EOF, "EOF");
|
2025-12-26 12:28:45 +11:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
2025-12-26 18:56:51 +11:00
|
|
|
if (isdigit(lexer->currentChar)) {
|
|
|
|
|
tok = readNumber(lexer);
|
|
|
|
|
} else {
|
|
|
|
|
tok = NEW_TOKEN(lexer, TOKEN_ILLEGAL, &lexer->currentChar);
|
|
|
|
|
}
|
2025-12-26 12:28:45 +11:00
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-26 18:56:51 +11:00
|
|
|
readChar(lexer);
|
|
|
|
|
return tok;
|
2025-12-26 12:28:45 +11:00
|
|
|
}
|