diff --git a/src/lexer/SolsToken.h b/src/lexer/SolsToken.h index d5c9f12..3de61e4 100644 --- a/src/lexer/SolsToken.h +++ b/src/lexer/SolsToken.h @@ -7,7 +7,7 @@ #include typedef enum SolsTokenType { - STT_IDENTIFIER, STT_LITERAL, STT_TYPE, STT_OPEN_CURLY, STT_CLOSE_CURLY, STT_OPEN_PAREN, STT_CLOSE_PAREN, STT_OP_ADD, STT_OP_SUB, STT_OP_MUL, STT_OP_DIV, STT_OP_SET, STT_OP_GREATER, STT_OP_LESSER, STT_OP_EQUAL, STT_OP_INEQUAL, STT_OP_EQGREATER, STT_OP_EQLESSER, STT_KW_DEF, STT_KW_STRUCT, STT_KW_PUTS, STT_KW_GROUND + STT_IDENTIFIER, STT_LITERAL, STT_TYPE, STT_OPEN_CURLY, STT_CLOSE_CURLY, STT_OPEN_PAREN, STT_CLOSE_PAREN, STT_OP_ADD, STT_OP_SUB, STT_OP_MUL, STT_OP_DIV, STT_OP_SET, STT_OP_GREATER, STT_OP_LESSER, STT_OP_EQUAL, STT_OP_INEQUAL, STT_OP_EQGREATER, STT_OP_EQLESSER, STT_KW_DEF, STT_KW_STRUCT, STT_KW_PUTS, STT_KW_IF, STT_KW_WHILE, STT_KW_NEW, STT_KW_GROUND } SolsTokenType; typedef char* charptr; diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 6c063ba..89488f2 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -1,8 +1,10 @@ #include "lexer.h" +#include "SolsLiteral.h" #include "SolsToken.h" #include "../include/error.h" #include "../include/estr.h" + ResultType(SolsLexer, charptr) createLexer(char* input) { char* inputcopy = malloc(strlen(input) + 1); if (inputcopy == NULL) { @@ -17,18 +19,183 @@ ResultType(SolsLexer, charptr) createLexer(char* input) { } SolsLexer lexer = { .input = inputcopy, + .inputsize = strlen(inputcopy), .output = tokens.as.success, .current = 0, }; return Success(SolsLexer, charptr, lexer); } +ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) { + if (lexer->input == NULL) { + return Error(char, Nothing, {}); + } + if (lexer->current + ahead > lexer->inputsize) { + return Error(char, Nothing, {}); + } + return Success(char, Nothing, lexer->input[lexer->current + ahead]); +} + + +ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) { + if (lexer->input == NULL) { + return Error(char, Nothing, {}); + } + if (lexer->current + 1 > lexer->inputsize) { + return Error(char, Nothing, {}); + } + return Success(char, Nothing, lexer->input[lexer->current++]); +} + +ResultType(SolsToken, charptr) identifyToken(const char* token) { + if (token[0] == '"') { + if (token[strlen(token) - 1] == '"') { + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, token); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + }; + return Success(SolsToken, charptr, {STT_LITERAL}); + } + return Error(SolsToken, charptr, "Unterminated string ()"); + } + + // FIXME do tihs better sometime + if (strcmp(token, "puts") == 0) { + return Success(SolsToken, charptr, {STT_KW_PUTS}); + } + if (strcmp(token, "if") == 0) { + return Success(SolsToken, charptr, {STT_KW_IF}); + } + if (strcmp(token, "while") == 0) { + return Success(SolsToken, charptr, {STT_KW_WHILE}); + } + if (strcmp(token, "def") == 0) { + return Success(SolsToken, charptr, {STT_KW_DEF}); + } + if (strcmp(token, "struct") == 0) { + return Success(SolsToken, charptr, {STT_KW_DEF}); + } + // Shh, this is our little secret now + if (strcmp(token, "{") == 0 || strcmp(token, "then")) { + return Success(SolsToken, charptr, {STT_OPEN_CURLY}); + } + if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) { + return Success(SolsToken, charptr, {STT_CLOSE_CURLY}); + } + if (strcmp(token, "(") == 0) { + return Success(SolsToken, charptr, {STT_OPEN_PAREN}); + } + if (strcmp(token, ")") == 0) { + return Success(SolsToken, charptr, {STT_CLOSE_PAREN}); + } + if (strcmp(token, "+") == 0 || strcmp(token, "plus") == 0) { + return Success(SolsToken, charptr, {STT_OP_ADD}); + } + if (strcmp(token, "-") == 0 || strcmp(token, "minus") == 0) { + return Success(SolsToken, charptr, {STT_OP_SUB}); + } + if (strcmp(token, "*") == 0 || strcmp(token, "times") == 0) { + return Success(SolsToken, charptr, {STT_OP_MUL}); + } + if (strcmp(token, "/") == 0 || strcmp(token, "dividedby") == 0) { + return Success(SolsToken, charptr, {STT_OP_DIV}); + } + if (strcmp(token, "=") == 0 || strcmp(token, "is") == 0) { + return Success(SolsToken, charptr, {STT_OP_SET}); + } + + return Success(SolsToken, charptr, {STT_IDENTIFIER}); + +} + ResultType(voidptr, charptr) lex(SolsLexer* lexer) { if (lexer->input == NULL) { return Error(voidptr, charptr, "Lexer is not initialised"); } + ResultType(SolsTokens, charptr) tokens = createSolsTokens(); + if (tokens.error) { + Estr e = CREATE_ESTR(tokens.as.error); + APPEND_ESTR(e, " (in createSolsTokens() function)"); + return Error(voidptr, charptr, e.str); + } + + lexer->output = tokens.as.success; lexer->current = 0; + Estr buf = CREATE_ESTR(""); + bool inString = false; + + size_t lineNum = 1; + size_t lineStart = 0; + Estr currentLine = CREATE_ESTR(""); + + for (; lineStart < lexer->inputsize; lineStart++) { + if (lexer->input[lineStart] == '\n') { + break; + } + char buf[] = {lexer->input[lineStart], '\0'}; + APPEND_ESTR(currentLine, buf); + } + + for (;;) { + ResultType(char, Nothing) chr = lexerConsume(lexer); + + if (chr.error) { + break; + } + + if (chr.as.success == '\n') { + for (; lineStart < lexer->inputsize; lineStart++) { + if (lexer->input[lineStart] == '\n') { + break; + } + char buf[] = {lexer->input[lineStart], '\0'}; + APPEND_ESTR(currentLine, buf); + } + lineNum ++; + } + + if (inString) { + char str[2] = { chr.as.success, '\0' }; + APPEND_ESTR(buf, str); + if (chr.as.success == '"') { + inString = false; + } + } + + switch (chr.as.success) { + case '"': { + inString = true; + APPEND_ESTR(buf, "\""); + break; + } + case ' ': { + if (!buf.destroyed) { + ResultType(SolsToken, charptr) result = identifyToken(buf.str); + if (result.error) { + Estr error = CREATE_ESTR("Parsing Error:\n"); + APPEND_ESTR(error, "On line "); + char buf[256]; + snprintf(buf, sizeof(buf), "%zu", lineNum); + APPEND_ESTR(error, buf); + APPEND_ESTR(error, "\n"); + APPEND_ESTR(error, currentLine.str); + APPEND_ESTR(error, "\n"); + APPEND_ESTR(error, "Why: "); + APPEND_ESTR(error, result.as.error); + APPEND_ESTR(error, "\n"); + return Error(voidptr, charptr, error.str); + } + } + } + } + } + return Success(voidptr, charptr, NULL); } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index abe5850..8cc6874 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -17,15 +17,31 @@ // .current represents the current character from .input being lexed. typedef struct SolsLexer { char* input; + size_t inputsize; SolsTokens output; size_t current; } SolsLexer; -// Creates a lexer for use by the lex() function. +// An empty struct lmao +typedef struct Nothing {} Nothing; + Result(SolsLexer, charptr); + +// Creates a lexer for use by the lex() function. ResultType(SolsLexer, charptr) createLexer(char* input); // Uses the provided lexer to scan the code, and create tokens. ResultType(voidptr, charptr) lex(SolsLexer* lexer); +Result(char, Nothing); + +// Peeks at the next token in the lexer. +ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead); + +// Consumes the next token in the lexer. +ResultType(char, Nothing) lexerConsume(SolsLexer* lexer); + +// Helper function to classify tokens +ResultType(SolsToken, charptr) identifyToken(const char* token); + #endif diff --git a/src/main.c b/src/main.c index 71fa585..fe5e574 100644 --- a/src/main.c +++ b/src/main.c @@ -2,10 +2,14 @@ #include int main() { - ResultType(SolsLexer, charptr) lexer = createLexer("puts \"dingus\""); + ResultType(SolsLexer, charptr) lexer = createLexer("puts \"dingus\"\n"); if (lexer.error) { printf("Error while creating lexer: %s", lexer.as.error); } - lex(&lexer.as.success); + ResultType(voidptr, charptr) lexed = lex(&lexer.as.success); + if (lexed.error) { + printf("%s", lexed.as.error); + } + return 0; }