#include "lexer.h" #include "SolsLiteral.h" #include "SolsToken.h" #include "../include/error.h" #include "../include/estr.h" #include "../include/ansii.h" #include ResultType(SolsLexer, charptr) createLexer(char* input) { char* inputcopy = malloc(strlen(input) + 1); if (inputcopy == NULL) { return Error(SolsLexer, charptr, "Couldn't copy string into lexer (in createLexer() function)"); } strcpy(inputcopy, input); ResultType(SolsTokens, charptr) tokens = createSolsTokens(); if (tokens.error) { Estr e = CREATE_ESTR(tokens.as.error); APPEND_ESTR(e, " (in createLexer() function)"); return Error(SolsLexer, charptr, e.str); } SolsLexer lexer = { .input = inputcopy, .inputsize = strlen(inputcopy), .output = tokens.as.success, .current = 0, }; return Success(SolsLexer, charptr, lexer); } ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) { if (lexer->input == NULL) { return Error(char, Nothing, {}); } if (lexer->current + ahead > lexer->inputsize) { return Error(char, Nothing, {}); } return Success(char, Nothing, lexer->input[lexer->current + ahead]); } ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) { if (lexer->input == NULL) { return Error(char, Nothing, {}); } if (lexer->current + 1 > lexer->inputsize) { return Error(char, Nothing, {}); } return Success(char, Nothing, lexer->input[lexer->current++]); } ResultType(SolsToken, charptr) identifyToken(const char* token) { // Process strings if (token[0] == '"') { if (token[strlen(token) - 1] == '"') { // Cut out the quotes char* tokencopy = malloc(strlen(token) + 1); strncpy(tokencopy, token + 1, strlen(token) - 2); tokencopy[strlen(token) - 2] = '\0'; ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy); free(tokencopy); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } return Error(SolsToken, charptr, "Unterminated string (in identifyToken() function)"); } // Process characters if (token[0] == '\'') { if (strlen(token) != 3) { return Error(SolsToken, charptr, "Characters can only hold one character at a time (try using \"this\" for strings?)"); } if (token[2] == '\'') { ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_CHAR, token[1]); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } else { return Error(SolsToken, charptr, "Unterminated character (in identifyToken() function)"); } } // Process integers and floats if (isdigit(token[0]) || token[0] == '-') { size_t len = strlen(token); bool isInt = true; bool isDouble = false; for (size_t i = 1; i < len; i++) { if (isInt && token[i] == '.') { isInt = false; isDouble = true; continue; } if (!isdigit(token[i])) { isInt = false; isDouble = false; } } if (isInt) { int64_t newInt = atoll(token); ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_INT, newInt); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } if (isDouble) { double newDouble = atof(token); ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_DOUBLE, newDouble); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } } // Handle boolean (true/false) if (strcmp(token, "true") == 0) { ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, true); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } if (strcmp(token, "false") == 0) { ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, false); if (literal.error) { Estr str = CREATE_ESTR(literal.as.error); APPEND_ESTR(str, " (in identifyToken() function)"); return Error(SolsToken, charptr, str.str); } SolsToken tok = { .type = STT_LITERAL, .as.literal = literal.as.success }; return Success(SolsToken, charptr, tok); } // Process base types if (strcmp(token, "int") == 0) { ResultType(SolsType, charptr) type = createSolsType(STT_INT); if (type.error) { Estr e = CREATE_ESTR(type.as.error); APPEND_ESTR(e, " (in identifyToken() function)"); return Error(SolsToken, charptr, e.str); } SolsToken tok = { .type = STT_TYPE, .as.type = type.as.success }; return Success(SolsToken, charptr, tok); } if (strcmp(token, "double") == 0) { ResultType(SolsType, charptr) type = createSolsType(STT_DOUBLE); if (type.error) { Estr e = CREATE_ESTR(type.as.error); APPEND_ESTR(e, " (in identifyToken() function)"); return Error(SolsToken, charptr, e.str); } SolsToken tok = { .type = STT_TYPE, .as.type = type.as.success }; return Success(SolsToken, charptr, tok); } if (strcmp(token, "string") == 0) { ResultType(SolsType, charptr) type = createSolsType(STT_STRING); if (type.error) { Estr e = CREATE_ESTR(type.as.error); APPEND_ESTR(e, " (in identifyToken() function)"); return Error(SolsToken, charptr, e.str); } SolsToken tok = { .type = STT_TYPE, .as.type = type.as.success }; return Success(SolsToken, charptr, tok); } if (strcmp(token, "char") == 0) { ResultType(SolsType, charptr) type = createSolsType(STT_CHAR); if (type.error) { Estr e = CREATE_ESTR(type.as.error); APPEND_ESTR(e, " (in identifyToken() function)"); return Error(SolsToken, charptr, e.str); } SolsToken tok = { .type = STT_TYPE, .as.type = type.as.success }; return Success(SolsToken, charptr, tok); } if (strcmp(token, "bool") == 0) { ResultType(SolsType, charptr) type = createSolsType(STT_BOOL); if (type.error) { Estr e = CREATE_ESTR(type.as.error); APPEND_ESTR(e, " (in identifyToken() function)"); return Error(SolsToken, charptr, e.str); } SolsToken tok = { .type = STT_TYPE, .as.type = type.as.success }; return Success(SolsToken, charptr, tok); } // FIXME do tihs better sometime if (strcmp(token, "puts") == 0) { return Success(SolsToken, charptr, {STT_KW_PUTS}); } if (strcmp(token, "if") == 0) { return Success(SolsToken, charptr, {STT_KW_IF}); } if (strcmp(token, "while") == 0) { return Success(SolsToken, charptr, {STT_KW_WHILE}); } if (strcmp(token, "def") == 0) { return Success(SolsToken, charptr, {STT_KW_DEF}); } if (strcmp(token, "struct") == 0) { return Success(SolsToken, charptr, {STT_KW_STRUCT}); } // Shh, this is our little secret now if (strcmp(token, "{") == 0 || strcmp(token, "then") == 0) { return Success(SolsToken, charptr, {STT_OPEN_CURLY}); } if (strcmp(token, "}") == 0 || strcmp(token, "end") == 0) { return Success(SolsToken, charptr, {STT_CLOSE_CURLY}); } if (strcmp(token, "(") == 0) { return Success(SolsToken, charptr, {STT_OPEN_PAREN}); } if (strcmp(token, ")") == 0) { return Success(SolsToken, charptr, {STT_CLOSE_PAREN}); } if (strcmp(token, "+") == 0 || strcmp(token, "plus") == 0) { return Success(SolsToken, charptr, {STT_OP_ADD}); } if (strcmp(token, "-") == 0 || strcmp(token, "minus") == 0) { return Success(SolsToken, charptr, {STT_OP_SUB}); } if (strcmp(token, "*") == 0 || strcmp(token, "times") == 0) { return Success(SolsToken, charptr, {STT_OP_MUL}); } if (strcmp(token, "/") == 0 || strcmp(token, "dividedby") == 0) { return Success(SolsToken, charptr, {STT_OP_DIV}); } if (strcmp(token, "=") == 0 || strcmp(token, "is") == 0) { return Success(SolsToken, charptr, {STT_OP_SET}); } if (strcmp(token, "+=") == 0) { return Success(SolsToken, charptr, {STT_OP_ADDTO}); } if (strcmp(token, "-=") == 0) { return Success(SolsToken, charptr, {STT_OP_SUBTO}); } if (strcmp(token, "*=") == 0) { return Success(SolsToken, charptr, {STT_OP_MULTO}); } if (strcmp(token, "/=") == 0) { return Success(SolsToken, charptr, {STT_OP_DIVTO}); } if (strcmp(token, "++") == 0) { return Success(SolsToken, charptr, {STT_OP_INCREMENT}); } if (strcmp(token, "--") == 0) { return Success(SolsToken, charptr, {STT_OP_DECREMENT}); } // No appropriate token found, it's an identifier (I hope) SolsToken id = { .type = STT_IDENTIFIER, .as.idName = malloc(strlen(token) + 1) }; if (id.as.idName == NULL) { return Error(SolsToken, charptr, "Couldn't allocate memory to copy string (in identifyToken() function)"); } strcpy(id.as.idName, token); return Success(SolsToken, charptr, id); } char* createParsingError(size_t lineNum, char* line, char* why) { Estr error = CREATE_ESTR(ESC_RESET ESC_BOLD ESC_RED_FG "Lexing Error " ESC_RESET ESC_YELLOW_FG "on line "); char buf[256]; snprintf(buf, sizeof(buf), "%zu", lineNum); APPEND_ESTR(error, buf); APPEND_ESTR(error, ":\n\n" ESC_RESET ESC_BLUE_FG " "); APPEND_ESTR(error, line); APPEND_ESTR(error, "\n\n"); APPEND_ESTR(error, ESC_RESET ESC_MAGENTA_FG "-> "); APPEND_ESTR(error, why); APPEND_ESTR(error, "\n"); return error.str; } ResultType(voidptr, charptr) lex(SolsLexer* lexer) { if (lexer->input == NULL) { return Error(voidptr, charptr, "Lexer is not initialised"); } ResultType(SolsTokens, charptr) tokens = createSolsTokens(); if (tokens.error) { Estr e = CREATE_ESTR(tokens.as.error); APPEND_ESTR(e, " (in createSolsTokens() function)"); return Error(voidptr, charptr, e.str); } lexer->output = tokens.as.success; lexer->current = 0; Estr buf = CREATE_ESTR(""); bool inString = false; size_t lineNum = 1; size_t lineStart = 0; Estr currentLine = CREATE_ESTR(""); for (; lineStart < lexer->inputsize; lineStart++) { if (lexer->input[lineStart] == '\n') { break; } char tmp[] = {lexer->input[lineStart], '\0'}; APPEND_ESTR(currentLine, tmp); } for (;;) { ResultType(char, Nothing) chr = lexerConsume(lexer); if (chr.error) { break; } if (chr.as.success == '\n') { for (; lineStart < lexer->inputsize; lineStart++) { if (lexer->input[lineStart] == '\n') { break; } char buf[] = {lexer->input[lineStart], '\0'}; APPEND_ESTR(currentLine, buf); } lineNum ++; } if (inString) { char str[2] = { chr.as.success, '\0' }; APPEND_ESTR(buf, str); if (chr.as.success == '"') { inString = false; } continue; } switch (chr.as.success) { case '"': { inString = true; APPEND_ESTR(buf, "\""); break; } // These characters require themselves added seperately from the previous token. case '{': case '}': case '(': case ')': case ',': case ':': { if (strcmp(buf.str, "") != 0) { ResultType(SolsToken, charptr) result = identifyToken(buf.str); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); DESTROY_ESTR(buf); buf = CREATE_ESTR(""); } char tmp[] = {chr.as.success, '\0'}; ResultType(SolsToken, charptr) result = identifyToken(tmp); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); break; } // These characters may be repeated, or followed by an equals sign. case '+': case '-': { if (strcmp(buf.str, "") != 0) { ResultType(SolsToken, charptr) result = identifyToken(buf.str); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); DESTROY_ESTR(buf); buf = CREATE_ESTR(""); } ResultType(char, Nothing) next = lexerPeek(lexer, 1); if (next.error || next.as.success != chr.as.success || next.as.success != '=') { char tmp[] = {chr.as.success, '\0'}; ResultType(SolsToken, charptr) result = identifyToken(tmp); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); } if (next.as.success == '=') { char tmp[] = {chr.as.success, '=', '\0'}; ResultType(SolsToken, charptr) result = identifyToken(tmp); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); lexerConsume(lexer); } if (next.as.success == chr.as.success) { char tmp[] = {chr.as.success, chr.as.success, '\0'}; ResultType(SolsToken, charptr) result = identifyToken(tmp); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); lexerConsume(lexer); } break; } // '.' requires checking whether it's a number or an identifier after case '.': { ResultType(char, Nothing) peek = lexerPeek(lexer, 1); if (peek.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, "Expecting token after '.'")); } if (isdigit(peek.as.success)) { char tmp[] = {peek.as.success, '\0'}; APPEND_ESTR(buf, tmp); lexerConsume(lexer); } else { if (strcmp(buf.str, "") != 0) { ResultType(SolsToken, charptr) result = identifyToken(buf.str); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); DESTROY_ESTR(buf); buf = CREATE_ESTR(""); } addTokenToSolsTokens(&lexer->output, (SolsToken) {.type = STT_DOT}); } break; } // This whitespace splits the program and does not get appended as it's own token. case '\n': case ' ': { if (strcmp(buf.str, "") != 0) { ResultType(SolsToken, charptr) result = identifyToken(buf.str); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } addTokenToSolsTokens(&lexer->output, result.as.success); DESTROY_ESTR(buf); buf = CREATE_ESTR(""); } break; } default: { char newchar[] = {chr.as.success, '\0'}; APPEND_ESTR(buf, newchar); break; } } // Check whether we need to parse types if (strcmp(buf.str, "fun") == 0) { if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') { // do stuff } } if (strcmp(buf.str, "template") == 0 ) { if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') { } } if (strcmp(buf.str, "object") == 0 ) { if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') { } } } if (strcmp(buf.str, "") != 0) { ResultType(SolsToken, charptr) result = identifyToken(buf.str); if (result.error) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, result.as.error)); } DESTROY_ESTR(buf); } if (inString) { return Error(voidptr, charptr, createParsingError(lineNum, currentLine.str, "Unterminated string")); } return Success(voidptr, charptr, NULL); } ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer) { return Error(Nothing, charptr, "WIP (in processTypeSignature() function)"); }