Files
VMBL/src/sylt/lexer.c

161 lines
3.1 KiB
C
Raw Normal View History

#include "lexer.h"
2025-12-26 18:56:51 +11:00
#include "exception.h"
#include <string.h>
2025-12-26 18:56:51 +11:00
#include <ctype.h>
#include <stdlib.h>
2025-12-26 18:56:51 +11:00
#include <stdio.h>
Lexer initLexer(char *source) {
Lexer newLexer = {
.source = source,
.position = -1,
.readPosition = 0,
.lineNumber = 1,
.currentChar = '\0'
};
2025-12-26 18:56:51 +11:00
readChar(&newLexer);
return newLexer;
}
void readChar(Lexer *lexer) {
if (lexer->readPosition >= strlen(lexer->source))
lexer->currentChar = '\0';
else
lexer->currentChar = lexer->source[lexer->readPosition];
lexer->position = lexer->readPosition;
lexer->readPosition++;
}
void skipWhitespace(Lexer *lexer) {
2025-12-26 18:56:51 +11:00
bool whitespace = true;
while (whitespace)
{
2025-12-26 18:56:51 +11:00
switch (lexer->currentChar)
{
case ' ':
case '\t':
case '\n':
case '\r':
if (lexer->currentChar == '\n')
lexer->lineNumber++;
readChar(lexer);
break;
default:
whitespace = false;
break;
}
}
2025-12-26 18:56:51 +11:00
}
Token readNumber(Lexer *lexer) {
int startPos = lexer->position;
int dotCount = 0;
int strLength = 0;
size_t outputCap = 8;
char *output = malloc(outputCap);
if (!output) {
exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position);
exit(1);
}
2025-12-26 18:56:51 +11:00
while (isdigit(lexer->currentChar) || lexer->currentChar == '.')
{
if (lexer->currentChar == '.') {
dotCount++;
if (dotCount > 1) {
exceptionMessage(EXCEPTION_MALFORMED_NUMBER, "Too many dots in decimal.", lexer->lineNumber, lexer->position);
char buffer[strLength+1];
strncpy(buffer, lexer->source + startPos, strLength+1);
buffer[strLength+1] = 0;
return NEW_TOKEN(lexer, TOKEN_ILLEGAL, buffer);
}
}
// allocate more memory if we reach the end of our buffer
if (strLength + 1 >= outputCap) {
char *temp = realloc(output, outputCap*=2);
if (!temp) {
exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position);
exit(1);
}
output = temp;
}
output[strLength] = lexer->source[lexer->position];
strLength++;
readChar(lexer);
if (lexer->currentChar == '\0')
break;
}
output[strLength] = 0;
2025-12-26 18:56:51 +11:00
if (dotCount == 0)
return NEW_TOKEN(lexer, TOKEN_INT, output);
else
return NEW_TOKEN(lexer, TOKEN_FLOAT, output);
}
Token nextToken(Lexer *lexer) {
2025-12-26 18:56:51 +11:00
Token tok;
skipWhitespace(lexer);
switch (lexer->currentChar)
{
case '+':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_PLUS, "+");
break;
case '-':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_MINUS, "-");
break;
case '*':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_ASTERISK, "*");
break;
case '/':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_SLASH, "/");
break;
case '%':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_MODULUS, "%");
break;
case '^':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_POW, "^");
break;
case '(':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_LPAREN, "(");
break;
case ')':
2025-12-26 18:56:51 +11:00
tok = NEW_TOKEN(lexer, TOKEN_RPAREN, ")");
break;
2025-12-26 18:56:51 +11:00
case '\0': // EOF
tok = NEW_TOKEN(lexer, TOKEN_EOF, "EOF");
break;
default:
2025-12-26 18:56:51 +11:00
if (isdigit(lexer->currentChar)) {
tok = readNumber(lexer);
} else {
tok = NEW_TOKEN(lexer, TOKEN_ILLEGAL, &lexer->currentChar);
}
break;
}
2025-12-26 18:56:51 +11:00
readChar(lexer);
return tok;
}