From 14e3fd82a5af5997135c96f13b2cfac29ced0f9c Mon Sep 17 00:00:00 2001
From: Maxwell Jeffress <maxwelljeffress@proton.me>
Date: Sat, 21 Feb 2026 14:46:03 +1100
Subject: [PATCH] Refactor token matching logic

---
 src/lexer/lexer.c | 147 +++++++++++++++++++++++++++-------------------
 src/lexer/lexer.h |  13 ++++
 2 files changed, 99 insertions(+), 61 deletions(-)

diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
index fdde2b8..158ed92 100644
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -6,19 +6,89 @@
 #include "../include/ansii.h"
 #include <ctype.h>
 
+#define SUPER_SILLY_MODE
+
+struct _SolsTokenTypeMap SolsTokenTypeMap[] = {
+    {"puts", STT_KW_STRUCT},
+    {"if", STT_KW_IF},
+    {"while", STT_KW_WHILE},
+    {"def", STT_KW_DEF},
+    {"struct", STT_KW_STRUCT},
+    {"{", STT_OPEN_CURLY},
+    {"}", STT_CLOSE_CURLY},
+    {"(", STT_OPEN_PAREN},
+    {")", STT_CLOSE_PAREN},
+    {"+", STT_OP_ADD},
+    {"-", STT_OP_DIV},
+    {"*", STT_OP_MUL},
+    {"/", STT_OP_DIV},
+    {"=", STT_OP_SET},
+    {"+=", STT_OP_ADDTO},
+    {"-=", STT_OP_SUBTO},
+    {"*=", STT_OP_MULTO},
+    {"/=", STT_OP_DIVTO},
+    {"++", STT_OP_INCREMENT},
+    {"--", STT_OP_DECREMENT},
+    {"==", STT_OP_EQUAL},
+    {"!=", STT_OP_INEQUAL},
+    {">", STT_OP_GREATER},
+    {"<", STT_OP_LESSER},
+    {">=", STT_OP_EQGREATER},
+    {"<=", STT_OP_EQLESSER},
+    // Shh, this is our little secret
+    // Your reward for actually reading the source code
+    // Enable this by adding -DSUPER_SILLY_MODE to your 
+    // compile flags (not recommended for production)
+    #ifdef SUPER_SILLY_MODE
+    {"plus", STT_OP_ADD},
+    {"minus", STT_OP_SUB},
+    {"times", STT_OP_MUL},
+    {"dividedby", STT_OP_DIV},
+    {"then", STT_OPEN_CURLY},
+    {"do", STT_OPEN_CURLY},
+    {"end", STT_CLOSE_CURLY},
+    {"is", STT_OP_SET},
+    {"equals", STT_OP_EQUAL},
+    {"greaterthan", STT_OP_GREATER},
+    {"lesserthan", STT_OP_LESSER},
+    {"increment", STT_OP_INCREMENT},
+    {"decrement", STT_OP_DECREMENT},
+    {"adds", STT_OP_ADDTO},
+    {"subtracts", STT_OP_SUBTO},
+    {"multiplies", STT_OP_MULTO},
+    {"divides", STT_OP_DIVTO},
+    #endif
+};
+
+ResultType(SolsTokenType, Nothing) getTokenType(const char* input) {
+    size_t mapsize = sizeof(SolsTokenTypeMap) / sizeof(struct _SolsTokenTypeMap);
+    for (size_t i = 0; i < mapsize; i++) {
+        if (strcmp(input, SolsTokenTypeMap[i].str) == 0) {
+            return Success(SolsTokenType, Nothing, SolsTokenTypeMap[i].type);
+        }
+    }
+    return Error(SolsTokenType, Nothing, {});
+}
+
 
 ResultType(SolsLexer, charptr) createLexer(char* input) {
+
+    // Copy input into the new lexer struct
     char* inputcopy = malloc(strlen(input) + 1);
     if (inputcopy == NULL) {
         return Error(SolsLexer, charptr, "Couldn't copy string into lexer (in createLexer() function)");
     }
     strcpy(inputcopy, input);
+
+    // Create SolsTokens
     ResultType(SolsTokens, charptr) tokens = createSolsTokens();
     if (tokens.error) {
         Estr e = CREATE_ESTR(tokens.as.error);
         APPEND_ESTR(e, " (in createLexer() function)");
         return Error(SolsLexer, charptr, e.str);
     }
+
+    // Construct and return lexer
     SolsLexer lexer = {
         .input = inputcopy,
         .inputsize = strlen(inputcopy),
@@ -29,23 +99,31 @@ ResultType(SolsLexer, charptr) createLexer(char* input) {
 }
 
 ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) {
+
+    // Bounds and null checking
     if (lexer->input == NULL) {
         return Error(char, Nothing, {});
     }
     if (lexer->current + ahead > lexer->inputsize) {
         return Error(char, Nothing, {});
     }
+
+    // Char is within bounds, return it
     return Success(char, Nothing, lexer->input[lexer->current + ahead]);
 }
 
 
 ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) {
+
+    // Bounds and null checking
     if (lexer->input == NULL) {
         return Error(char, Nothing, {});
     }
     if (lexer->current + 1 > lexer->inputsize) {
         return Error(char, Nothing, {});
     }
+
+    // Char is within bounds, return and increment
     return Success(char, Nothing, lexer->input[lexer->current++]);
 }
 
@@ -58,13 +136,17 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) {
             strncpy(tokencopy, token + 1, strlen(token) - 2);
             tokencopy[strlen(token) - 2] = '\0';
 
+            // Create a literal
             ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy);
+            // Free our copy of the string, createSolsLiteral creates a copy
             free(tokencopy);
             if (literal.error) {
                 Estr str = CREATE_ESTR(literal.as.error);
                 APPEND_ESTR(str, " (in identifyToken() function)");
                 return Error(SolsToken, charptr, str.str);
             }
+
+            // Construct and return the token
             SolsToken tok = {
                 .type = STT_LITERAL,
                 .as.literal = literal.as.success
@@ -242,67 +324,10 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) {
         return Success(SolsToken, charptr, tok);
     }
 
-    // FIXME do tihs better sometime
-    if (strcmp(token, "puts") == 0) {
-        return Success(SolsToken, charptr, {STT_KW_PUTS});
-    }
-    if (strcmp(token, "if") == 0) {
-        return Success(SolsToken, charptr, {STT_KW_IF});
-    }
-    if (strcmp(token, "while") == 0) {
-        return Success(SolsToken, charptr, {STT_KW_WHILE});
-    }
-    if (strcmp(token, "def") == 0) {
-        return Success(SolsToken, charptr, {STT_KW_DEF});
-    }
-    if (strcmp(token, "struct") == 0) {
-        return Success(SolsToken, charptr, {STT_KW_STRUCT});
-    }
-    // Shh, this is our little secret now
-    if (strcmp(token, "{") == 0 || strcmp(token, "then") == 0) {
-        return Success(SolsToken, charptr, {STT_OPEN_CURLY});
-    }
-    if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) {
-        return Success(SolsToken, charptr, {STT_CLOSE_CURLY});
-    }
-    if (strcmp(token, "(") == 0) {
-        return Success(SolsToken, charptr, {STT_OPEN_PAREN});
-    }
-    if (strcmp(token, ")") == 0) {
-        return Success(SolsToken, charptr, {STT_CLOSE_PAREN});
-    }
-    if (strcmp(token, "+") == 0 || strcmp(token, "plus") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_ADD});
-    }
-    if (strcmp(token, "-") == 0 || strcmp(token, "minus") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_SUB});
-    }
-    if (strcmp(token, "*") == 0 || strcmp(token, "times") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_MUL});
-    }
-    if (strcmp(token, "/") == 0 || strcmp(token, "dividedby") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_DIV});
-    }
-    if (strcmp(token, "=") == 0 || strcmp(token, "is") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_SET});
-    }
-    if (strcmp(token, "+=") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_ADDTO});
-    }
-    if (strcmp(token, "-=") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_SUBTO});
-    }
-    if (strcmp(token, "*=") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_MULTO});
-    }
-    if (strcmp(token, "/=") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_DIVTO});
-    }
-    if (strcmp(token, "++") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_INCREMENT});
-    }
-    if (strcmp(token, "--") == 0) {
-        return Success(SolsToken, charptr, {STT_OP_DECREMENT});
+    // Find if it's a reserved keyword/operator
+    ResultType(SolsTokenType, Nothing) result = getTokenType(token);
+    if (!result.error) {
+        return Success(SolsToken, charptr, {result.as.success});
     }
 
     // No appropriate token found, it's an identifier (I hope)
diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h
index 96d5f8e..6efa42b 100644
--- a/src/lexer/lexer.h
+++ b/src/lexer/lexer.h
@@ -12,6 +12,10 @@
 #include "SolsToken.h"
 #include "SolsLiteral.h"
 
+// A map containing all corresponding strs and token types.
+// Use the getTokenType() function to search this
+extern struct _SolsTokenTypeMap {char* str; SolsTokenType type;} SolsTokenTypeMap[];
+
 // Represents the current state of the lexer.
 // .input is the Solstice program as written by the user.
 // .output is the lexed Solstice program, which is constructed by the lex() function.
@@ -56,8 +60,17 @@ ResultType(char, Nothing) lexerConsume(SolsLexer* lexer);
 //    Failure: char* detailing what went wrong (usually memory failure)
 ResultType(SolsToken, charptr) identifyToken(const char* token);
 
+Result(SolsTokenType, Nothing);
+
+// Helper function to convert a char* into a SolsTokenType using the SolsTokenTypeMap.
+// Returns:
+//    Success: The corresponding SolsTokenType
+//    Failure: Nothing (meaning the token is likely an identifier)
+ResultType(SolsTokenType, Nothing) getTokenType(const char* input);
+
 // Helper function to lex type signatures into tokens
 // FIXME this function is a work in progress
 ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer);
 
+
 #endif