#include "lexer.h" #include "exception.h" #include #include #include #include Lexer initLexer(char *source) { Lexer newLexer = { .source = source, .position = -1, .readPosition = 0, .lineNumber = 1, .currentChar = '\0' }; readChar(&newLexer); return newLexer; } void readChar(Lexer *lexer) { if (lexer->readPosition >= strlen(lexer->source)) lexer->currentChar = '\0'; else lexer->currentChar = lexer->source[lexer->readPosition]; lexer->position = lexer->readPosition; lexer->readPosition++; } void skipWhitespace(Lexer *lexer) { bool whitespace = true; while (whitespace) { switch (lexer->currentChar) { case ' ': case '\t': case '\n': case '\r': if (lexer->currentChar == '\n') lexer->lineNumber++; readChar(lexer); break; default: whitespace = false; break; } } } Token readNumber(Lexer *lexer) { int startPos = lexer->position; int dotCount = 0; int strLength = 0; size_t outputCap = 8; char *output = malloc(outputCap); if (!output) { exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position); exit(1); } while (isdigit(lexer->currentChar) || lexer->currentChar == '.') { if (lexer->currentChar == '.') { dotCount++; if (dotCount > 1) { exceptionMessage(EXCEPTION_MALFORMED_NUMBER, "Too many dots in decimal.", lexer->lineNumber, lexer->position); char buffer[strLength+1]; strncpy(buffer, lexer->source + startPos, strLength+1); buffer[strLength+1] = 0; return NEW_TOKEN(lexer, TOKEN_ILLEGAL, buffer); } } // allocate more memory if we reach the end of our buffer if (strLength + 1 >= outputCap) { char *temp = realloc(output, outputCap*=2); if (!temp) { exceptionMessage(EXCEPTION_MEMORY_ALLOCATION_FAILURE, "Lexer failed to allocate memory.", lexer->lineNumber, lexer->position); exit(1); } output = temp; } output[strLength] = lexer->source[lexer->position]; strLength++; readChar(lexer); if (lexer->currentChar == '\0') break; } output[strLength] = 0; if (dotCount == 0) return NEW_TOKEN(lexer, TOKEN_INT, output); else return NEW_TOKEN(lexer, TOKEN_FLOAT, output); } Token nextToken(Lexer *lexer) { Token tok; skipWhitespace(lexer); switch (lexer->currentChar) { case '+': tok = NEW_TOKEN(lexer, TOKEN_PLUS, "+"); break; case '-': tok = NEW_TOKEN(lexer, TOKEN_MINUS, "-"); break; case '*': tok = NEW_TOKEN(lexer, TOKEN_ASTERISK, "*"); break; case '/': tok = NEW_TOKEN(lexer, TOKEN_SLASH, "/"); break; case '%': tok = NEW_TOKEN(lexer, TOKEN_MODULUS, "%"); break; case '^': tok = NEW_TOKEN(lexer, TOKEN_POW, "^"); break; case '(': tok = NEW_TOKEN(lexer, TOKEN_LPAREN, "("); break; case ')': tok = NEW_TOKEN(lexer, TOKEN_RPAREN, ")"); break; case '\0': // EOF tok = NEW_TOKEN(lexer, TOKEN_EOF, "EOF"); break; default: if (isdigit(lexer->currentChar)) { tok = readNumber(lexer); } else { tok = NEW_TOKEN(lexer, TOKEN_ILLEGAL, &lexer->currentChar); } break; } readChar(lexer); return tok; }