Refactor token matching logic

This commit is contained in:
2026-02-21 14:46:03 +11:00
parent f325b8deef
commit 14e3fd82a5
2 changed files with 99 additions and 61 deletions

View File

@@ -6,19 +6,89 @@
#include "../include/ansii.h"
#include <ctype.h>
#define SUPER_SILLY_MODE
struct _SolsTokenTypeMap SolsTokenTypeMap[] = {
{"puts", STT_KW_STRUCT},
{"if", STT_KW_IF},
{"while", STT_KW_WHILE},
{"def", STT_KW_DEF},
{"struct", STT_KW_STRUCT},
{"{", STT_OPEN_CURLY},
{"}", STT_CLOSE_CURLY},
{"(", STT_OPEN_PAREN},
{")", STT_CLOSE_PAREN},
{"+", STT_OP_ADD},
{"-", STT_OP_DIV},
{"*", STT_OP_MUL},
{"/", STT_OP_DIV},
{"=", STT_OP_SET},
{"+=", STT_OP_ADDTO},
{"-=", STT_OP_SUBTO},
{"*=", STT_OP_MULTO},
{"/=", STT_OP_DIVTO},
{"++", STT_OP_INCREMENT},
{"--", STT_OP_DECREMENT},
{"==", STT_OP_EQUAL},
{"!=", STT_OP_INEQUAL},
{">", STT_OP_GREATER},
{"<", STT_OP_LESSER},
{">=", STT_OP_EQGREATER},
{"<=", STT_OP_EQLESSER},
// Shh, this is our little secret
// Your reward for actually reading the source code
// Enable this by adding -DSUPER_SILLY_MODE to your
// compile flags (not recommended for production)
#ifdef SUPER_SILLY_MODE
{"plus", STT_OP_ADD},
{"minus", STT_OP_SUB},
{"times", STT_OP_MUL},
{"dividedby", STT_OP_DIV},
{"then", STT_OPEN_CURLY},
{"do", STT_OPEN_CURLY},
{"end", STT_CLOSE_CURLY},
{"is", STT_OP_SET},
{"equals", STT_OP_EQUAL},
{"greaterthan", STT_OP_GREATER},
{"lesserthan", STT_OP_LESSER},
{"increment", STT_OP_INCREMENT},
{"decrement", STT_OP_DECREMENT},
{"adds", STT_OP_ADDTO},
{"subtracts", STT_OP_SUBTO},
{"multiplies", STT_OP_MULTO},
{"divides", STT_OP_DIVTO},
#endif
};
ResultType(SolsTokenType, Nothing) getTokenType(const char* input) {
size_t mapsize = sizeof(SolsTokenTypeMap) / sizeof(struct _SolsTokenTypeMap);
for (size_t i = 0; i < mapsize; i++) {
if (strcmp(input, SolsTokenTypeMap[i].str) == 0) {
return Success(SolsTokenType, Nothing, SolsTokenTypeMap[i].type);
}
}
return Error(SolsTokenType, Nothing, {});
}
ResultType(SolsLexer, charptr) createLexer(char* input) {
// Copy input into the new lexer struct
char* inputcopy = malloc(strlen(input) + 1);
if (inputcopy == NULL) {
return Error(SolsLexer, charptr, "Couldn't copy string into lexer (in createLexer() function)");
}
strcpy(inputcopy, input);
// Create SolsTokens
ResultType(SolsTokens, charptr) tokens = createSolsTokens();
if (tokens.error) {
Estr e = CREATE_ESTR(tokens.as.error);
APPEND_ESTR(e, " (in createLexer() function)");
return Error(SolsLexer, charptr, e.str);
}
// Construct and return lexer
SolsLexer lexer = {
.input = inputcopy,
.inputsize = strlen(inputcopy),
@@ -29,23 +99,31 @@ ResultType(SolsLexer, charptr) createLexer(char* input) {
}
ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) {
// Bounds and null checking
if (lexer->input == NULL) {
return Error(char, Nothing, {});
}
if (lexer->current + ahead > lexer->inputsize) {
return Error(char, Nothing, {});
}
// Char is within bounds, return it
return Success(char, Nothing, lexer->input[lexer->current + ahead]);
}
ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) {
// Bounds and null checking
if (lexer->input == NULL) {
return Error(char, Nothing, {});
}
if (lexer->current + 1 > lexer->inputsize) {
return Error(char, Nothing, {});
}
// Char is within bounds, return and increment
return Success(char, Nothing, lexer->input[lexer->current++]);
}
@@ -58,13 +136,17 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) {
strncpy(tokencopy, token + 1, strlen(token) - 2);
tokencopy[strlen(token) - 2] = '\0';
// Create a literal
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy);
// Free our copy of the string, createSolsLiteral creates a copy
free(tokencopy);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
// Construct and return the token
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
@@ -242,67 +324,10 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) {
return Success(SolsToken, charptr, tok);
}
// FIXME do tihs better sometime
if (strcmp(token, "puts") == 0) {
return Success(SolsToken, charptr, {STT_KW_PUTS});
}
if (strcmp(token, "if") == 0) {
return Success(SolsToken, charptr, {STT_KW_IF});
}
if (strcmp(token, "while") == 0) {
return Success(SolsToken, charptr, {STT_KW_WHILE});
}
if (strcmp(token, "def") == 0) {
return Success(SolsToken, charptr, {STT_KW_DEF});
}
if (strcmp(token, "struct") == 0) {
return Success(SolsToken, charptr, {STT_KW_STRUCT});
}
// Shh, this is our little secret now
if (strcmp(token, "{") == 0 || strcmp(token, "then") == 0) {
return Success(SolsToken, charptr, {STT_OPEN_CURLY});
}
if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) {
return Success(SolsToken, charptr, {STT_CLOSE_CURLY});
}
if (strcmp(token, "(") == 0) {
return Success(SolsToken, charptr, {STT_OPEN_PAREN});
}
if (strcmp(token, ")") == 0) {
return Success(SolsToken, charptr, {STT_CLOSE_PAREN});
}
if (strcmp(token, "+") == 0 || strcmp(token, "plus") == 0) {
return Success(SolsToken, charptr, {STT_OP_ADD});
}
if (strcmp(token, "-") == 0 || strcmp(token, "minus") == 0) {
return Success(SolsToken, charptr, {STT_OP_SUB});
}
if (strcmp(token, "*") == 0 || strcmp(token, "times") == 0) {
return Success(SolsToken, charptr, {STT_OP_MUL});
}
if (strcmp(token, "/") == 0 || strcmp(token, "dividedby") == 0) {
return Success(SolsToken, charptr, {STT_OP_DIV});
}
if (strcmp(token, "=") == 0 || strcmp(token, "is") == 0) {
return Success(SolsToken, charptr, {STT_OP_SET});
}
if (strcmp(token, "+=") == 0) {
return Success(SolsToken, charptr, {STT_OP_ADDTO});
}
if (strcmp(token, "-=") == 0) {
return Success(SolsToken, charptr, {STT_OP_SUBTO});
}
if (strcmp(token, "*=") == 0) {
return Success(SolsToken, charptr, {STT_OP_MULTO});
}
if (strcmp(token, "/=") == 0) {
return Success(SolsToken, charptr, {STT_OP_DIVTO});
}
if (strcmp(token, "++") == 0) {
return Success(SolsToken, charptr, {STT_OP_INCREMENT});
}
if (strcmp(token, "--") == 0) {
return Success(SolsToken, charptr, {STT_OP_DECREMENT});
// Find if it's a reserved keyword/operator
ResultType(SolsTokenType, Nothing) result = getTokenType(token);
if (!result.error) {
return Success(SolsToken, charptr, {result.as.success});
}
// No appropriate token found, it's an identifier (I hope)

View File

@@ -12,6 +12,10 @@
#include "SolsToken.h"
#include "SolsLiteral.h"
// A map containing all corresponding strs and token types.
// Use the getTokenType() function to search this
extern struct _SolsTokenTypeMap {char* str; SolsTokenType type;} SolsTokenTypeMap[];
// Represents the current state of the lexer.
// .input is the Solstice program as written by the user.
// .output is the lexed Solstice program, which is constructed by the lex() function.
@@ -56,8 +60,17 @@ ResultType(char, Nothing) lexerConsume(SolsLexer* lexer);
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsToken, charptr) identifyToken(const char* token);
Result(SolsTokenType, Nothing);
// Helper function to convert a char* into a SolsTokenType using the SolsTokenTypeMap.
// Returns:
// Success: The corresponding SolsTokenType
// Failure: Nothing (meaning the token is likely an identifier)
ResultType(SolsTokenType, Nothing) getTokenType(const char* input);
// Helper function to lex type signatures into tokens
// FIXME this function is a work in progress
ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer);
#endif