From 9e35c323d3502b87d961c39ef39303d56fab12a0 Mon Sep 17 00:00:00 2001 From: Maxwell Jeffress Date: Sun, 15 Feb 2026 14:55:02 +1100 Subject: [PATCH] progress --- build.c | 4 +- src/include/ansii.h | 67 ++++++++++++++ src/lexer/SolsLiteral.c | 2 +- src/lexer/SolsToken.c | 1 + src/lexer/SolsToken.h | 2 +- src/lexer/lexer.c | 192 +++++++++++++++++++++++++++++++++++++--- src/main.c | 2 +- 7 files changed, 254 insertions(+), 16 deletions(-) create mode 100644 src/include/ansii.h diff --git a/build.c b/build.c index f113e1d..b5949a6 100755 --- a/build.c +++ b/build.c @@ -7,10 +7,10 @@ if [[ ! -v OUTPUT ]]; then fi if [[ ! -v CFLAGS ]]; then - CFLAGS="" + CFLAGS="-O3 -Wall -Wextra -pedantic" fi -gcc "$0" -o "$OUTPUT" +gcc "$0" -o "$OUTPUT" $CFLAGS if not [ $# -lt 1 ]; then exec "./$OUTPUT" "$@" diff --git a/src/include/ansii.h b/src/include/ansii.h new file mode 100644 index 0000000..fc40739 --- /dev/null +++ b/src/include/ansii.h @@ -0,0 +1,67 @@ +// ansii.h - made by SpookyDervish +// version 1.0.0 +// do with this whatever you want +// +// example usage with printf: printf(ESC_BOLD ESC_RED_FG "hi\n"); + +#ifndef ANSII_H +#define ANSII_H + +#define ESC_RESET "\x1b[0m" +#define ESC_BOLD "\x1b[1m" +#define ESC_DIM "\x1b[2m" +#define ESC_ITALIC "\x1b[3m" +#define ESC_UNDERLINE "\x1b[4m" +#define ESC_BLINKING "\x1b[5m" +#define ESC_REVERSE "\x1b[7m" +#define ESC_HIDDEN "\x1b[8m" +#define ESC_STRIKETHROUGH "\x1b[8m" + +#define ESC_TERMINAL_BELL "\a" + +#define ESC_BLACK_FG "\x1b[30m" +#define ESC_RED_FG "\x1b[31m" +#define ESC_GREEN_FG "\x1b[32m" +#define ESC_YELLOW_FG "\x1b[33m" +#define ESC_BLUE_FG "\x1b[34m" +#define ESC_MAGENTA_FG "\x1b[35m" +#define ESC_CYAN_FG "\x1b[36m" +#define ESC_WHITE_FG "\x1b[37m" + +#define ESC_BLACK_FG "\x1b[30m" +#define ESC_RED_FG "\x1b[31m" +#define ESC_GREEN_FG "\x1b[32m" +#define ESC_YELLOW_FG "\x1b[33m" +#define ESC_BLUE_FG "\x1b[34m" +#define ESC_MAGENTA_FG "\x1b[35m" +#define ESC_CYAN_FG "\x1b[36m" +#define ESC_WHITE_FG "\x1b[37m" +#define ESC_BRIGHT_BLACK_FG "\x1b[90m" +#define ESC_BRIGHT_RED_FG "\x1b[91m" +#define ESC_BRIGHT_GREEN_FG "\x1b[92m" +#define ESC_BRIGHT_YELLOW_FG "\x1b[93m" +#define ESC_BRIGHT_BLUE_FG "\x1b[94m" +#define ESC_BRIGHT_MAGENTA_FG "\x1b[95m" +#define ESC_BRIGHT_CYAN_FG "\x1b[96m" +#define ESC_BRIGHT_WHITE_FG "\x1b[97m" + +#define ESC_BLACK_BG "\x1b[40m" +#define ESC_RED_BG "\x1b[41m" +#define ESC_GREEN_BG "\x1b[42m" +#define ESC_YELLOW_BG "\x1b[43m" +#define ESC_BLUE_BG "\x1b[44m" +#define ESC_MAGENTA_BG "\x1b[45m" +#define ESC_CYAN_BG "\x1b[46m" +#define ESC_WHITE_BG "\x1b[47m" +#define ESC_BRIGHT_BLACK_BG "\x1b[100m" +#define ESC_BRIGHT_RED_BG "\x1b[101m" +#define ESC_BRIGHT_GREEN_BG "\x1b[102m" +#define ESC_BRIGHT_YELLOW_BG "\x1b[103m" +#define ESC_BRIGHT_BLUE_BG "\x1b[104m" +#define ESC_BRIGHT_MAGENTA_BG "\x1b[105m" +#define ESC_BRIGHT_CYAN_BG "\x1b[106m" +#define ESC_BRIGHT_WHITE_BG "\x1b[107m" + +#define ESC_DEFAULT_FG "\x1b[39m" + +#endif // !ANSII_H \ No newline at end of file diff --git a/src/lexer/SolsLiteral.c b/src/lexer/SolsLiteral.c index 5ba3c54..372c141 100644 --- a/src/lexer/SolsLiteral.c +++ b/src/lexer/SolsLiteral.c @@ -37,8 +37,8 @@ ResultType(SolsLiteral, charptr) createSolsLiteral(SolsLiteralType type, ...) { strcpy(literal.as.stringv, input); } } - return Success(SolsLiteral, charptr, literal); va_end(args); + return Success(SolsLiteral, charptr, literal); } void freeSolsLiteral(SolsLiteral* lit) { diff --git a/src/lexer/SolsToken.c b/src/lexer/SolsToken.c index 2ad14d9..5cf7943 100644 --- a/src/lexer/SolsToken.c +++ b/src/lexer/SolsToken.c @@ -81,6 +81,7 @@ ResultType(voidptr, charptr) addTokenToSolsTokens(SolsTokens* tokens, SolsToken if (tmp == NULL) { return Error(voidptr, charptr, "Failed to allocate memory (in addTokenToSolsTokens() function)"); } + tokens->at = tmp; } tokens->at[tokens->count] = token; tokens->count++; diff --git a/src/lexer/SolsToken.h b/src/lexer/SolsToken.h index b211589..c1aa7f0 100644 --- a/src/lexer/SolsToken.h +++ b/src/lexer/SolsToken.h @@ -7,7 +7,7 @@ #include typedef enum SolsTokenType { - STT_IDENTIFIER, STT_LITERAL, STT_TYPE, STT_OPEN_CURLY, STT_CLOSE_CURLY, STT_OPEN_PAREN, STT_CLOSE_PAREN, STT_OP_ADD, STT_OP_SUB, STT_OP_MUL, STT_OP_DIV, STT_OP_SET, STT_OP_GREATER, STT_OP_LESSER, STT_OP_EQUAL, STT_OP_INEQUAL, STT_OP_EQGREATER, STT_OP_EQLESSER, STT_KW_DEF, STT_KW_STRUCT, STT_KW_PUTS, STT_KW_IF, STT_KW_WHILE, STT_KW_NEW, STT_KW_GROUND + STT_IDENTIFIER, STT_LITERAL, STT_TYPE, STT_DOT, STT_OPEN_CURLY, STT_CLOSE_CURLY, STT_OPEN_PAREN, STT_CLOSE_PAREN, STT_OP_ADD, STT_OP_SUB, STT_OP_MUL, STT_OP_DIV, STT_OP_SET, STT_OP_GREATER, STT_OP_LESSER, STT_OP_EQUAL, STT_OP_INEQUAL, STT_OP_EQGREATER, STT_OP_EQLESSER, STT_KW_DEF, STT_KW_STRUCT, STT_KW_PUTS, STT_KW_IF, STT_KW_WHILE, STT_KW_NEW, STT_KW_GROUND } SolsTokenType; typedef char* charptr; diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 4b5af25..986506b 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -3,6 +3,8 @@ #include "SolsToken.h" #include "../include/error.h" #include "../include/estr.h" +#include "../include/ansii.h" +#include ResultType(SolsLexer, charptr) createLexer(char* input) { @@ -48,10 +50,16 @@ ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) { } ResultType(SolsToken, charptr) identifyToken(const char* token) { - printf("Passed token: '%s'\n", token); + // Process strings if (token[0] == '"') { if (token[strlen(token) - 1] == '"') { - ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, token); + // Cut out the quotes + char* tokencopy = malloc(strlen(token) + 1); + strncpy(tokencopy, token + 1, strlen(token) - 2); + tokencopy[strlen(token) - 2] = '\0'; + + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy); + free(tokencopy); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); @@ -59,12 +67,110 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) { } SolsToken tok = { .type = STT_LITERAL, + .as.literal = literal.as.success }; - return Success(SolsToken, charptr, {STT_LITERAL}); + return Success(SolsToken, charptr, tok); } return Error(SolsToken, charptr, "Unterminated string (in identifyToken() function)"); } + // Process characters + if (token[0] == '\'') { + if (strlen(token) != 3) { + return Error(SolsToken, charptr, "Characters can only hold one character at a time (try using \"this\" for strings?)"); + } + if (token[2] == '\'') { + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_CHAR, token[1]); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + .as.literal = literal.as.success + }; + return Success(SolsToken, charptr, tok); + } else { + return Error(SolsToken, charptr, "Unterminated character (in identifyToken() function)"); + } + } + + // Process integers and floats + if (isdigit(token[0]) || token[0] == '-') { + size_t len = strlen(token); + bool isInt = true; + bool isDouble = false; + for (size_t i = 1; i < len; i++) { + if (isInt && token[i] == '.') { + isInt = false; + isDouble = true; + continue; + } + if (!isdigit(token[i])) { + isInt = false; + isDouble = false; + } + } + if (isInt) { + int64_t newInt = atoll(token); + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_INT, newInt); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + .as.literal = literal.as.success + }; + return Success(SolsToken, charptr, tok); + } + + if (isDouble) { + double newDouble = atof(token); + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_DOUBLE, newDouble); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + .as.literal = literal.as.success + }; + return Success(SolsToken, charptr, tok); + } + } + + // Handle boolean (true/false) + if (strcmp(token, "true") == 0) { + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, true); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + .as.literal = literal.as.success + }; + return Success(SolsToken, charptr, tok); + } + if (strcmp(token, "false") == 0) { + ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, false); + if (literal.error) { + Estr str = CREATE_ESTR(literal.as.error); + APPEND_ESTR(str, " (in identifyToken() function)"); + return Error(SolsToken, charptr, str.str); + } + SolsToken tok = { + .type = STT_LITERAL, + .as.literal = literal.as.success + }; + return Success(SolsToken, charptr, tok); + } + // FIXME do tihs better sometime if (strcmp(token, "puts") == 0) { return Success(SolsToken, charptr, {STT_KW_PUTS}); @@ -79,10 +185,10 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) { return Success(SolsToken, charptr, {STT_KW_DEF}); } if (strcmp(token, "struct") == 0) { - return Success(SolsToken, charptr, {STT_KW_DEF}); + return Success(SolsToken, charptr, {STT_KW_STRUCT}); } // Shh, this is our little secret now - if (strcmp(token, "{") == 0 || strcmp(token, "then")) { + if (strcmp(token, "{") == 0 || strcmp(token, "then") == 0) { return Success(SolsToken, charptr, {STT_OPEN_CURLY}); } if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) { @@ -110,19 +216,30 @@ ResultType(SolsToken, charptr) identifyToken(const char* token) { return Success(SolsToken, charptr, {STT_OP_SET}); } - return Success(SolsToken, charptr, {STT_IDENTIFIER}); + // No appropriate token found, it's an identifier (I hope) + SolsToken id = { + .type = STT_IDENTIFIER, + .as.idName = malloc(strlen(token) + 1) + }; + + if (id.as.idName == NULL) { + return Error(SolsToken, charptr, "Couldn't allocate memory to copy string (in identifyToken() function)"); + } + strcpy(id.as.idName, token); + + return Success(SolsToken, charptr, id); } char* createParsingError(size_t lineNum, char* line, char* why) { - Estr error = CREATE_ESTR("\e[0;34mParsing Error \e[0;36mon line "); + Estr error = CREATE_ESTR(ESC_RESET ESC_BOLD ESC_RED_FG "Lexing Error " ESC_RESET ESC_YELLOW_FG "on line "); char buf[256]; snprintf(buf, sizeof(buf), "%zu", lineNum); APPEND_ESTR(error, buf); - APPEND_ESTR(error, ":\n\n "); + APPEND_ESTR(error, ":\n\n" ESC_RESET ESC_BLUE_FG " "); APPEND_ESTR(error, line); APPEND_ESTR(error, "\n\n"); - APPEND_ESTR(error, "-> "); + APPEND_ESTR(error, ESC_RESET ESC_MAGENTA_FG "-> "); APPEND_ESTR(error, why); APPEND_ESTR(error, "\n"); return error.str; @@ -154,8 +271,8 @@ ResultType(voidptr, charptr) lex(SolsLexer* lexer) { if (lexer->input[lineStart] == '\n') { break; } - char buf[] = {lexer->input[lineStart], '\0'}; - APPEND_ESTR(currentLine, buf); + char tmp[] = {lexer->input[lineStart], '\0'}; + APPEND_ESTR(currentLine, tmp); } for (;;) { @@ -191,6 +308,59 @@ ResultType(voidptr, charptr) lex(SolsLexer* lexer) { APPEND_ESTR(buf, "\""); break; } + + // These characters require themselves added seperately from the previous token. + case '{': + case '}': + case '(': + case ')': + case ',': + case ':': + { + if (strcmp(buf.str, "") != 0) { + ResultType(SolsToken, charptr) result = identifyToken(buf.str); + if (result.error) { + return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); + } + addTokenToSolsTokens(&lexer->output, result.as.success); + DESTROY_ESTR(buf); + buf = CREATE_ESTR(""); + } + char tmp[] = {chr.as.success, '\0'}; + ResultType(SolsToken, charptr) result = identifyToken(tmp); + if (result.error) { + return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); + } + addTokenToSolsTokens(&lexer->output, result.as.success); + break; + } + + // '.' requires checking whether it's a number or an identifier after + case '.': { + ResultType(char, Nothing) peek = lexerPeek(lexer, 1); + if (peek.error) { + return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, "Expecting token after '.'")); + } + if (isdigit(peek.as.success)) { + char tmp[] = {peek.as.success, '\0'}; + APPEND_ESTR(buf, tmp); + lexerConsume(lexer); + } else { + if (strcmp(buf.str, "") != 0) { + ResultType(SolsToken, charptr) result = identifyToken(buf.str); + if (result.error) { + return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); + } + addTokenToSolsTokens(&lexer->output, result.as.success); + DESTROY_ESTR(buf); + buf = CREATE_ESTR(""); + } + addTokenToSolsTokens(&lexer->output, (SolsToken) {STT_DOT}); + } + break; + } + + // This whitespace splits the program and does not get appended as it's own token. case '\n': case ' ': { if (strcmp(buf.str, "") != 0) { diff --git a/src/main.c b/src/main.c index 66f6221..3040f3a 100644 --- a/src/main.c +++ b/src/main.c @@ -2,7 +2,7 @@ #include int main() { - ResultType(SolsLexer, charptr) lexer = createLexer("puts \"dingus\"\n"); + ResultType(SolsLexer, charptr) lexer = createLexer("fds \""); if (lexer.error) { printf("Error while creating lexer: %s", lexer.as.error); }