From 14e3fd82a5af5997135c96f13b2cfac29ced0f9c Mon Sep 17 00:00:00 2001 From: Maxwell Jeffress Date: Sat, 21 Feb 2026 14:46:03 +1100 Subject: [PATCH] Refactor token matching logic --- src/lexer/lexer.c | 147 +++++++++++++++++++++++++++------------------- src/lexer/lexer.h | 13 ++++ 2 files changed, 99 insertions(+), 61 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index fdde2b8..158ed92 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -6,19 +6,89 @@ #include "../include/ansii.h" #include +#define SUPER_SILLY_MODE + +struct _SolsTokenTypeMap SolsTokenTypeMap[] = { + {"puts", STT_KW_STRUCT}, + {"if", STT_KW_IF}, + {"while", STT_KW_WHILE}, + {"def", STT_KW_DEF}, + {"struct", STT_KW_STRUCT}, + {"{", STT_OPEN_CURLY}, + {"}", STT_CLOSE_CURLY}, + {"(", STT_OPEN_PAREN}, + {")", STT_CLOSE_PAREN}, + {"+", STT_OP_ADD}, + {"-", STT_OP_DIV}, + {"*", STT_OP_MUL}, + {"/", STT_OP_DIV}, + {"=", STT_OP_SET}, + {"+=", STT_OP_ADDTO}, + {"-=", STT_OP_SUBTO}, + {"*=", STT_OP_MULTO}, + {"/=", STT_OP_DIVTO}, + {"++", STT_OP_INCREMENT}, + {"--", STT_OP_DECREMENT}, + {"==", STT_OP_EQUAL}, + {"!=", STT_OP_INEQUAL}, + {">", STT_OP_GREATER}, + {"<", STT_OP_LESSER}, + {">=", STT_OP_EQGREATER}, + {"<=", STT_OP_EQLESSER}, + // Shh, this is our little secret + // Your reward for actually reading the source code + // Enable this by adding -DSUPER_SILLY_MODE to your + // compile flags (not recommended for production) + #ifdef SUPER_SILLY_MODE + {"plus", STT_OP_ADD}, + {"minus", STT_OP_SUB}, + {"times", STT_OP_MUL}, + {"dividedby", STT_OP_DIV}, + {"then", STT_OPEN_CURLY}, + {"do", STT_OPEN_CURLY}, + {"end", STT_CLOSE_CURLY}, + {"is", STT_OP_SET}, + {"equals", STT_OP_EQUAL}, + {"greaterthan", STT_OP_GREATER}, + {"lesserthan", STT_OP_LESSER}, + {"increment", STT_OP_INCREMENT}, + {"decrement", STT_OP_DECREMENT}, + {"adds", STT_OP_ADDTO}, + {"subtracts", STT_OP_SUBTO}, + {"multiplies", STT_OP_MULTO}, + {"divides", STT_OP_DIVTO}, + #endif +}; + +ResultType(SolsTokenType, Nothing) getTokenType(const char* input) { + size_t mapsize = sizeof(SolsTokenTypeMap) / sizeof(struct _SolsTokenTypeMap); + for (size_t i = 0; i < mapsize; i++) { + if (strcmp(input, SolsTokenTypeMap[i].str) == 0) { + return Success(SolsTokenType, Nothing, SolsTokenTypeMap[i].type); + } + } + return Error(SolsTokenType, Nothing, {}); +} + ResultType(SolsLexer, charptr) createLexer(char* input) { + + // Copy input into the new lexer struct char* inputcopy = malloc(strlen(input) + 1); if (inputcopy == NULL) { return Error(SolsLexer, charptr, "Couldn't copy string into lexer (in createLexer() function)"); } strcpy(inputcopy, input); + + // Create SolsTokens ResultType(SolsTokens, charptr) tokens = createSolsTokens(); if (tokens.error) { Estr e = CREATE_ESTR(tokens.as.error); APPEND_ESTR(e, " (in createLexer() function)"); return Error(SolsLexer, charptr, e.str); } + + // Construct and return lexer SolsLexer lexer = { .input = inputcopy, .inputsize = strlen(inputcopy), @@ -29,23 +99,31 @@ ResultType(SolsLexer, charptr) createLexer(char* input) { } ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) { + + // Bounds and null checking if (lexer->input == NULL) { return Error(char, Nothing, {}); } if (lexer->current + ahead > lexer->inputsize) { return Error(char, Nothing, {}); } + + // Char is within bounds, return it return Success(char, Nothing, lexer->input[lexer->current + ahead]); } ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) { + + // Bounds and null checking if (lexer->input == NULL) { return Error(char, Nothing, {}); } if (lexer->current + 1 > lexer->inputsize) { return Error(char, Nothing, {}); } + + // Char is within bounds, return and increment return Success(char, Nothing, lexer->input[lexer->current++]); } @@ -58,13 +136,17 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) { strncpy(tokencopy, token + 1, strlen(token) - 2); tokencopy[strlen(token) - 2] = '\0'; + // Create a literal ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy); + // Free our copy of the string, createSolsLiteral creates a copy free(tokencopy); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } + + // Construct and return the token SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success @@ -242,67 +324,10 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) { return Success(SolsToken, charptr, tok); } - // FIXME do tihs better sometime - if (strcmp(token, "puts") == 0) { - return Success(SolsToken, charptr, {STT_KW_PUTS}); - } - if (strcmp(token, "if") == 0) { - return Success(SolsToken, charptr, {STT_KW_IF}); - } - if (strcmp(token, "while") == 0) { - return Success(SolsToken, charptr, {STT_KW_WHILE}); - } - if (strcmp(token, "def") == 0) { - return Success(SolsToken, charptr, {STT_KW_DEF}); - } - if (strcmp(token, "struct") == 0) { - return Success(SolsToken, charptr, {STT_KW_STRUCT}); - } - // Shh, this is our little secret now - if (strcmp(token, "{") == 0 || strcmp(token, "then") == 0) { - return Success(SolsToken, charptr, {STT_OPEN_CURLY}); - } - if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) { - return Success(SolsToken, charptr, {STT_CLOSE_CURLY}); - } - if (strcmp(token, "(") == 0) { - return Success(SolsToken, charptr, {STT_OPEN_PAREN}); - } - if (strcmp(token, ")") == 0) { - return Success(SolsToken, charptr, {STT_CLOSE_PAREN}); - } - if (strcmp(token, "+") == 0 || strcmp(token, "plus") == 0) { - return Success(SolsToken, charptr, {STT_OP_ADD}); - } - if (strcmp(token, "-") == 0 || strcmp(token, "minus") == 0) { - return Success(SolsToken, charptr, {STT_OP_SUB}); - } - if (strcmp(token, "*") == 0 || strcmp(token, "times") == 0) { - return Success(SolsToken, charptr, {STT_OP_MUL}); - } - if (strcmp(token, "/") == 0 || strcmp(token, "dividedby") == 0) { - return Success(SolsToken, charptr, {STT_OP_DIV}); - } - if (strcmp(token, "=") == 0 || strcmp(token, "is") == 0) { - return Success(SolsToken, charptr, {STT_OP_SET}); - } - if (strcmp(token, "+=") == 0) { - return Success(SolsToken, charptr, {STT_OP_ADDTO}); - } - if (strcmp(token, "-=") == 0) { - return Success(SolsToken, charptr, {STT_OP_SUBTO}); - } - if (strcmp(token, "*=") == 0) { - return Success(SolsToken, charptr, {STT_OP_MULTO}); - } - if (strcmp(token, "/=") == 0) { - return Success(SolsToken, charptr, {STT_OP_DIVTO}); - } - if (strcmp(token, "++") == 0) { - return Success(SolsToken, charptr, {STT_OP_INCREMENT}); - } - if (strcmp(token, "--") == 0) { - return Success(SolsToken, charptr, {STT_OP_DECREMENT}); + // Find if it's a reserved keyword/operator + ResultType(SolsTokenType, Nothing) result = getTokenType(token); + if (!result.error) { + return Success(SolsToken, charptr, {result.as.success}); } // No appropriate token found, it's an identifier (I hope) diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 96d5f8e..6efa42b 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -12,6 +12,10 @@ #include "SolsToken.h" #include "SolsLiteral.h" +// A map containing all corresponding strs and token types. +// Use the getTokenType() function to search this +extern struct _SolsTokenTypeMap {char* str; SolsTokenType type;} SolsTokenTypeMap[]; + // Represents the current state of the lexer. // .input is the Solstice program as written by the user. // .output is the lexed Solstice program, which is constructed by the lex() function. @@ -56,8 +60,17 @@ ResultType(char, Nothing) lexerConsume(SolsLexer* lexer); // Failure: char* detailing what went wrong (usually memory failure) ResultType(SolsToken, charptr) identifyToken(const char* token); +Result(SolsTokenType, Nothing); + +// Helper function to convert a char* into a SolsTokenType using the SolsTokenTypeMap. +// Returns: +// Success: The corresponding SolsTokenType +// Failure: Nothing (meaning the token is likely an identifier) +ResultType(SolsTokenType, Nothing) getTokenType(const char* input); + // Helper function to lex type signatures into tokens // FIXME this function is a work in progress ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer); + #endif