Friendship ended with C++, C is my new best friend

This commit is contained in:
2026-03-01 16:00:03 +11:00
parent 139be30e2d
commit 38473f0e01
32 changed files with 5996 additions and 2518 deletions

51
src/lexer/SolsLiteral.c Normal file
View File

@@ -0,0 +1,51 @@
#include "SolsLiteral.h"
#include <stdarg.h>
#include <string.h>
ResultType(SolsLiteral, charptr) createSolsLiteral(SolsLiteralType type, ...) {
va_list args;
va_start(args, type);
SolsLiteral literal = {
.type = type
};
switch (type) {
case SLT_INT: {
literal.as.intv = va_arg(args, int64_t);
break;
}
case SLT_DOUBLE: {
literal.as.doublev = va_arg(args, double);
break;
}
case SLT_BOOL: {
literal.as.boolv = (bool) va_arg(args, int);
break;
}
case SLT_CHAR: {
literal.as.charv = (char) va_arg(args, int);
break;
}
case SLT_STRING: {
char* input = va_arg(args, char*);
if (input == NULL) {
va_end(args);
return Error(SolsLiteral, charptr, "Unexpected NULL value (in createSolsLiteral() function)");
}
literal.as.stringv = malloc(strlen(input) + 1);
if (literal.as.stringv == NULL) {
va_end(args);
return Error(SolsLiteral, charptr, "Couldn't allocate memory (in createSolsLiteral() function)");
}
strcpy(literal.as.stringv, input);
break;
}
}
va_end(args);
return Success(SolsLiteral, charptr, literal);
}
void freeSolsLiteral(SolsLiteral* lit) {
if (lit->type == SLT_STRING && lit->as.stringv != NULL) {
free(lit->as.stringv);
}
}

46
src/lexer/SolsLiteral.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef SOLSLITERAL_H
#define SOLSLITERAL_H
#include <inttypes.h>
#include <stdarg.h>
#include "../include/error.h"
#include "../include/nothing.h"
typedef char* charptr;
typedef enum SolsLiteralType {
SLT_INT, SLT_STRING, SLT_DOUBLE, SLT_BOOL, SLT_CHAR
} SolsLiteralType;
// Stores literal values which will be added to the Ground code.
// Not much explaining needed here.
typedef struct SolsLiteral {
SolsLiteralType type;
union {
int64_t intv;
char* stringv;
double doublev;
bool boolv;
char charv;
} as;
} SolsLiteral;
Result(SolsLiteral, charptr);
// Creates a SolsLiteral, based on the type provided.
// SolsLiteralType -> C type:
// SLT_INT -> int64_t
// SLT_STRING -> char*
// SLT_DOUBLE -> double
// SLT_BOOL -> bool
// SL_CHAR -> char
// An error will only be returned if there is an issue copying a provided char*.
// There is no way to detect incorrectly provided types, so ensure that the right type
// is provided!!!!
ResultType(SolsLiteral, charptr) createSolsLiteral(SolsLiteralType type, ...);
// Frees a SolsLiteral. Primarily concerned with freeing .as.stringv
void freeSolsLiteral(SolsLiteral* lit);
#endif

93
src/lexer/SolsToken.c Normal file
View File

@@ -0,0 +1,93 @@
#include "SolsToken.h"
#include "SolsLiteral.h"
#include "../include/error.h"
#include <stdarg.h>
#include <string.h>
ResultType(SolsToken, charptr) createSolsToken(SolsTokenType type, ...) {
va_list args;
va_start(args, type);
SolsToken token = {
.type = type
};
if (type == STT_IDENTIFIER) {
char* name = va_arg(args, char*);
if (name == NULL) {
va_end(args);
return Error(SolsToken, charptr, "String passed is NULL (in createSolsToken() function)");
}
token.as.idName = malloc(strlen(name) + 1);
if (token.as.idName == NULL) {
va_end(args);
return Error(SolsToken, charptr, "Couldn't allocate memory (in createSolsToken() function)");
}
strcpy(token.as.idName, name);
}
if (type == STT_KW_GROUND) {
char* ground = va_arg(args, char*);
if (ground == NULL) {
va_end(args);
return Error(SolsToken, charptr, "String passed is NULL (in createSolsToken() function)");
}
token.as.inlineGround = malloc(strlen(ground) + 1);
if (token.as.inlineGround == NULL) {
va_end(args);
return Error(SolsToken, charptr, "Couldn't allocate memory (in createSolsToken() function)");
}
strcpy(token.as.inlineGround, ground);
}
if (type == STT_LITERAL) {
token.as.literal = va_arg(args, SolsLiteral);
}
if (type == STT_TYPE) {
token.as.type = va_arg(args, SolsType);
}
va_end(args);
return Success(SolsToken, charptr, token);
}
void freeSolsToken(SolsToken* token) {
if (token->type == STT_IDENTIFIER && token->as.idName != NULL) {
free(token->as.idName);
}
if (token->type == STT_KW_GROUND && token->as.inlineGround != NULL) {
free(token->as.inlineGround);
}
if (token->type == STT_LITERAL) {
freeSolsLiteral(&token->as.literal);
}
if (token->type == STT_TYPE) {
freeSolsType(&token->as.type);
}
}
ResultType(SolsTokens, charptr) createSolsTokens() {
SolsTokens tokens = {
.at = malloc(sizeof(SolsToken) * 32),
.capacity = 32,
.count = 0
};
if (tokens.at == NULL) {
return Error(SolsTokens, charptr, "Failed to allocate memory (in createSolsTokens() function)");
}
return Success(SolsTokens, charptr, tokens);
}
ResultType(Nothing, charptr) addTokenToSolsTokens(SolsTokens* tokens, SolsToken token) {
if (tokens->capacity < tokens->count + 1) {
tokens->capacity *= 2;
SolsToken* tmp = realloc(tokens->at, sizeof(SolsToken) * tokens->capacity);
if (tmp == NULL) {
return Error(Nothing, charptr, "Failed to allocate memory (in addTokenToSolsTokens() function)");
}
tokens->at = tmp;
}
tokens->at[tokens->count] = token;
tokens->count++;
return Success(Nothing, charptr, {});
}

83
src/lexer/SolsToken.h Normal file
View File

@@ -0,0 +1,83 @@
#ifndef SOLSTOKEN_H
#define SOLSTOKEN_H
#include <stdarg.h>
#include "../include/error.h"
#include "../include/nothing.h"
#include "SolsType.h"
#include "SolsLiteral.h"
typedef enum SolsTokenType {
STT_IDENTIFIER, STT_LITERAL, STT_TYPE, STT_DOT, STT_OPEN_CURLY, STT_CLOSE_CURLY, STT_OPEN_PAREN, STT_CLOSE_PAREN, STT_OP_ADD, STT_OP_SUB, STT_OP_MUL, STT_OP_DIV, STT_OP_ADDTO, STT_OP_SUBTO, STT_OP_MULTO, STT_OP_DIVTO, STT_OP_INCREMENT, STT_OP_DECREMENT, STT_OP_SET, STT_OP_GREATER, STT_OP_LESSER, STT_OP_EQUAL, STT_OP_INEQUAL, STT_OP_EQGREATER, STT_OP_EQLESSER, STT_KW_DEF, STT_KW_LAMBDA, STT_KW_RETURN, STT_KW_USE, STT_KW_STRUCT, STT_KW_PUTS, STT_KW_IF, STT_KW_WHILE, STT_KW_NEW, STT_KW_GROUND, STT_LINE_END, STT_COMMA
} SolsTokenType;
typedef char* charptr;
// Stores information about the line that the token/node is on, for printing if an error
// occurs.
// .num is the line number, .content is the line's contents.
typedef struct LineInfo {
size_t num;
char* content;
} LineInfo;
// Represents a token lexed by the lex() function.
// Most token types exclusively use the .type field, however some tokens require storing
// more data, inside the .as union.
// Those tokens are:
// STT_LITERAL: A literal value. Uses field .as.literal
// STT_TYPE: A type descriptor. Uses field .as.type
// STT_IDENTIFIER: An identifier. Uses field .as.idName
// STT_KW_GROUND: Ground code embedded inside Solstice. Uses field .as.inlineGround
typedef struct SolsToken {
SolsTokenType type;
union {
SolsLiteral literal;
SolsType type;
char* idName;
char* inlineGround;
} as;
LineInfo line;
} SolsToken;
Result(SolsToken, charptr);
// Creates a SolsToken. If the type passed in is STT_LITERAL, STT_TYPE, STT_IDENTIFIER or
// STT_KW_GROUND, the function expects another argument, corresponding to the data type
// the token holds. See the SolsToken struct for more information.
// Returns:
// Success: The created SolsToken
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsToken, charptr) createSolsToken(SolsTokenType type, ...);
// Frees a SolsToken, specifically the .as field elements.
void freeSolsToken(SolsToken* token);
// Represents a Solstice program, seperated into tokens.
// .at is a pointer to the tokens
// .count is how many tokens are currently being stored
// .capacity is how many tokens worth of memory is allocated
typedef struct SolsTokens {
SolsToken* at;
size_t count;
size_t capacity;
} SolsTokens;
Result(SolsTokens, charptr);
// Creates a SolsTokens holder.
// Returns:
// Success: Constructed SolsTokens
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsTokens, charptr) createSolsTokens();
// Adds a token to SolsTokens. Used by the lex() function.
// Returns:
// Success: Nothing
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(Nothing, charptr) addTokenToSolsTokens(SolsTokens* tokens, SolsToken token);
#endif

176
src/lexer/SolsType.c Normal file
View File

@@ -0,0 +1,176 @@
#include "SolsType.h"
#include "lexer.h"
#include "../include/error.h"
#include "../include/estr.h"
#include <groundvm.h>
#include <string.h>
ResultType(SolsType, charptr) createSolsType(SolsTypeType in) {
SolsTypeField* ptr = malloc(sizeof(SolsTypeField) * 32);
if (ptr == NULL) {
return Error(SolsType, charptr, "Couldn't allocate memory (in createSolsType() function)");
}
SolsType type = { .type = in, .children.capacity = 32, .children.at = ptr };
return Success(SolsType, charptr, type);
}
ResultType(SolsType, charptr) copySolsType(SolsType* type) {
SolsType ret = { .type = type->type, .children.count = type->children.count, .children.capacity = type->children.capacity};
// Allocate memory
SolsTypeField* ptr = malloc(sizeof(SolsTypeField) * type->children.capacity);
if (ptr == NULL) {
return Error(SolsType, charptr, "Couldn't allocate memory (in copySolsType() function)");
}
ret.children.at = ptr;
// Deep copy values
for (size_t i = 0; i < type->children.count; i++) {
// Copy the SolsType value
ResultType(SolsType, charptr) copied = copySolsType(&type->children.at[i].type);
if (copied.error) {
Estr err = CREATE_ESTR(copied.as.error);
APPEND_ESTR(err, " (in addChildToSolsType() function)");
return Error(SolsType, charptr, err.str);
}
ret.children.at[i].type = copied.as.success;
// Copy the name
if (type->children.at[i].name == NULL) {
ret.children.at[i].name = NULL;
} else {
ret.children.at[i].name = malloc(strlen(type->children.at[i].name) + 1);
if (ret.children.at[i].name == NULL) {
return Error(SolsType, charptr, "Couldn't allocate memory (in copySolsType() function)");
}
strcpy(ret.children.at[i].name, type->children.at[i].name);
}
}
return Success(SolsType, charptr, ret);
}
ResultType(Nothing, charptr) addChildToSolsType(SolsType* type, SolsType child, const char* name) {
if (type->children.capacity < type->children.count + 1) {
type->children.capacity *= 2;
SolsTypeField* ptr = realloc(type->children.at, sizeof(SolsTypeField) * type->children.capacity);
if (ptr == NULL) {
return Error(Nothing, charptr, "Couldn't allocate memory (in addChildToSolsType() function)");
}
type->children.at = ptr;
}
ResultType(SolsType, charptr) copied = copySolsType(&child);
if (copied.error) {
Estr err = CREATE_ESTR(copied.as.error);
APPEND_ESTR(err, " (in addChildToSolsType() function)");
return Error(Nothing, charptr, err.str);
}
type->children.at[type->children.count].type = copied.as.success;
if (name == NULL) {
type->children.at[type->children.count].name = NULL;
} else {
type->children.at[type->children.count].name = malloc(strlen(name) + 1);
strcpy(type->children.at[type->children.count].name, name);
}
type->children.count++;
return Success(Nothing, charptr, {});
}
void freeSolsType(SolsType* type) {
for (size_t i = 0; i < type->children.count; i++) {
// Free the name
if (type->children.at[i].name != NULL) {
free(type->children.at[i].name);
}
// Free the child SolsTypes
freeSolsType(&type->children.at[i].type);
}
// Free the field itself
free(type->children.at);
type->children.at = NULL;
// Set count and capacity to zero
type->children.count = 0;
type->children.capacity = 0;
}
bool compareTypes(SolsType* left, SolsType* right) {
if (left->type != right->type) {
return false;
}
switch (left->type) {
case STT_OBJECT: {
if (left->children.count != right->children.count) {
return false;
}
for (size_t i = 0; i < left->children.count; i++) {
if (strcmp(left->children.at[i].name, right->children.at[i].name) != 0) {
return false;
}
if (compareTypes(&left->children.at[i].type, &right->children.at[i].type) == false) {
return false;
}
}
return true;
}
case STT_TEMPLATE: {
if (left->children.count != right->children.count) {
return false;
}
for (size_t i = 0; i < left->children.count; i++) {
if (strcmp(left->children.at[i].name, right->children.at[i].name) != 0) {
return false;
}
if (compareTypes(&left->children.at[i].type, &right->children.at[i].type) == false) {
return false;
}
}
return true;
}
case STT_FUN: {
if (left->children.count != right->children.count) {
return false;
}
for (size_t i = 0; i < left->children.count; i++) {
if (compareTypes(&left->children.at[i].type, &right->children.at[i].type) == false) {
return false;
}
}
return true;
}
default: return true;
}
}
ResultType(GroundArg, charptr) createGroundArgFromSolsType(SolsType* type) {
switch (type->type) {
case STT_INT: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "int"));
}
case STT_DOUBLE: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "double"));
}
case STT_STRING: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "string"));
}
case STT_BOOL: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "bool"));
}
case STT_CHAR: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "char"));
}
case STT_FUN: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "function"));
}
case STT_TEMPLATE: {
return Success(GroundArg, charptr, groundCreateReference(TYPEREF, "struct"));
}
case STT_OBJECT: {
// FIXME Do this later
return Error(GroundArg, charptr, "FIXME");
}
}
return Error(GroundArg, charptr, "How did we get here?");
}

105
src/lexer/SolsType.h Normal file
View File

@@ -0,0 +1,105 @@
#ifndef SOLSTYPE_H
#define SOLSTYPE_H
#include <stdlib.h>
#include <groundvm.h>
#include "../include/error.h"
#include "../include/nothing.h"
typedef enum SolsTypeType {
STT_INT, STT_STRING, STT_DOUBLE, STT_BOOL, STT_CHAR, STT_FUN, STT_TEMPLATE, STT_OBJECT
} SolsTypeType;
// Definition of charptr for Result() and ResultType() macros
typedef char* charptr;
struct SolsTypeField;
// Holds type information for a struct, object or function.
// Say, for example, your type signature looks like this:
// object(string x, fun(int) y)
// This is stored like this:
// SolsType {
// type: STT_OBJECT
// children: [
// {
// type: {
// type: STT_STRING
// }
// name: "x"
// }
// {
// type: {
// type: STT_FUN
// children: [
// {
// type: {
// type: STT_INT
// }
// }
// ]
// }
// name: "y"
// }
// ]
// }
//
// (Sorry for the long explaination, but it's worth it so you know how the type system works.)
//
typedef struct SolsType {
SolsTypeType type;
// For use when type is identified with a name
char* identifierType;
// For use in functions
struct SolsType* returnType;
// For use by fun, template, object
struct {
struct SolsTypeField* at;
size_t count;
size_t capacity;
} children;
} SolsType;
// Assists with holding child types in the SolsType struct.
typedef struct SolsTypeField {
SolsType type;
char* name;
} SolsTypeField;
Result(SolsType, charptr);
// Creates a SolsType, with the provided type type.
// Use the "addChildToSolsType()" function to add children, in case this type has children.
// Returns:
// Success: The constructed SolsType
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsType, charptr) createSolsType(SolsTypeType in);
Result(Nothing, charptr);
// Adds a child SolsType to a given SolsType.
// Returns:
// Success: Nothing
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(Nothing, charptr) addChildToSolsType(SolsType* type, SolsType child, const char* name);
// Makes a deep copy of a SolsType.
ResultType(SolsType, charptr) copySolsType(SolsType* type);
Result(GroundArg, charptr);
// Represents a SolsType as a GroundArg (in typeref form)
ResultType(GroundArg, charptr) createGroundArgFromSolsType(SolsType* type);
// Frees a SolsType
void freeSolsType(SolsType* type);
// Compares two SolsTypes
bool compareTypes(SolsType* left, SolsType* right);
#endif

844
src/lexer/lexer.c Normal file
View File

@@ -0,0 +1,844 @@
#include "lexer.h"
#include "SolsLiteral.h"
#include "SolsToken.h"
#include "../include/error.h"
#include "../include/estr.h"
#include "../include/ansii.h"
#include <ctype.h>
struct _SolsTokenTypeMap SolsTokenTypeMap[] = {
{"puts", STT_KW_PUTS},
{"if", STT_KW_IF},
{"while", STT_KW_WHILE},
{"def", STT_KW_DEF},
{"lambda", STT_KW_LAMBDA},
{"return", STT_KW_RETURN},
{"use", STT_KW_USE},
{"struct", STT_KW_STRUCT},
{"ground", STT_KW_GROUND},
{"{", STT_OPEN_CURLY},
{"}", STT_CLOSE_CURLY},
{"(", STT_OPEN_PAREN},
{")", STT_CLOSE_PAREN},
{"+", STT_OP_ADD},
{"-", STT_OP_SUB},
{"*", STT_OP_MUL},
{"/", STT_OP_DIV},
{"=", STT_OP_SET},
{"+=", STT_OP_ADDTO},
{"-=", STT_OP_SUBTO},
{"*=", STT_OP_MULTO},
{"/=", STT_OP_DIVTO},
{"++", STT_OP_INCREMENT},
{"--", STT_OP_DECREMENT},
{"==", STT_OP_EQUAL},
{"!=", STT_OP_INEQUAL},
{">", STT_OP_GREATER},
{"<", STT_OP_LESSER},
{">=", STT_OP_EQGREATER},
{"<=", STT_OP_EQLESSER},
{"\n", STT_LINE_END},
{";", STT_LINE_END},
{",", STT_COMMA},
// Shh, this is our little secret
// Your reward for actually reading the source code
// Enable this by adding -DSUPER_SILLY_MODE to your
// compile flags (not recommended for production)
#ifdef SUPER_SILLY_MODE
{"plus", STT_OP_ADD},
{"minus", STT_OP_SUB},
{"times", STT_OP_MUL},
{"dividedby", STT_OP_DIV},
{"then", STT_OPEN_CURLY},
{"do", STT_OPEN_CURLY},
{"end", STT_CLOSE_CURLY},
{"is", STT_OP_SET},
{"equals", STT_OP_EQUAL},
{"greaterthan", STT_OP_GREATER},
{"lesserthan", STT_OP_LESSER},
{"increment", STT_OP_INCREMENT},
{"decrement", STT_OP_DECREMENT},
{"adds", STT_OP_ADDTO},
{"subtracts", STT_OP_SUBTO},
{"multiplies", STT_OP_MULTO},
{"divides", STT_OP_DIVTO},
#endif
};
ResultType(SolsTokenType, Nothing) getTokenType(const char* input) {
size_t mapsize = sizeof(SolsTokenTypeMap) / sizeof(struct _SolsTokenTypeMap);
for (size_t i = 0; i < mapsize; i++) {
if (strcmp(input, SolsTokenTypeMap[i].str) == 0) {
return Success(SolsTokenType, Nothing, SolsTokenTypeMap[i].type);
}
}
return Error(SolsTokenType, Nothing, {});
}
static ResultType(Nothing, charptr) handleGround(SolsLexer* lexer, SolsToken* token, size_t* lineNum, Estr* currentLine, char currentChr, bool* skipDelimiter) {
bool foundBrace = false;
if (currentChr == '{') {
foundBrace = true;
*skipDelimiter = true;
} else {
while (true) {
ResultType(char, Nothing) peek = lexerPeek(lexer, 1);
if (peek.error) break;
if (isspace(peek.as.success)) {
char c = lexerConsume(lexer).as.success;
if (c == '\n') {
(*lineNum)++;
DESTROY_ESTR((*currentLine));
*currentLine = CREATE_ESTR("");
size_t lineStart = lexer->current;
for (size_t i = lineStart; i < lexer->inputsize; i++) {
if (lexer->input[i] == '\n') break;
char buf_tmp[] = {lexer->input[i], '\0'};
APPEND_ESTR((*currentLine), buf_tmp);
}
}
} else if (peek.as.success == '{') {
lexerConsume(lexer);
foundBrace = true;
break;
} else {
break;
}
}
}
if (!foundBrace) {
return Error(Nothing, charptr, "Expected '{' after 'ground'");
}
Estr groundBuf = CREATE_ESTR("");
int depth = 1;
while (depth > 0) {
ResultType(char, Nothing) next = lexerConsume(lexer);
if (next.error) {
DESTROY_ESTR(groundBuf);
return Error(Nothing, charptr, "Unterminated 'ground' block");
}
if (next.as.success == '{') depth++;
if (next.as.success == '}') {
depth--;
if (depth == 0) break;
}
char tmp[] = {next.as.success, '\0'};
APPEND_ESTR(groundBuf, tmp);
if (next.as.success == '\n') {
(*lineNum)++;
DESTROY_ESTR((*currentLine));
*currentLine = CREATE_ESTR("");
size_t lineStart = lexer->current;
for (size_t i = lineStart; i < lexer->inputsize; i++) {
if (lexer->input[i] == '\n') break;
char buf_tmp[] = {lexer->input[i], '\0'};
APPEND_ESTR((*currentLine), buf_tmp);
}
}
}
token->as.inlineGround = malloc(strlen(groundBuf.str) + 1);
if (token->as.inlineGround == NULL) {
DESTROY_ESTR(groundBuf);
return Error(Nothing, charptr, "Memory allocation failed (in handleGround() function)");
}
strcpy(token->as.inlineGround, groundBuf.str);
DESTROY_ESTR(groundBuf);
return Success(Nothing, charptr, {});
}
static ResultType(Nothing, charptr) identifyAndAdd(SolsLexer* lexer, Estr* buf, size_t* lineNum, Estr* currentLine, char currentChr, bool* skipDelimiter) {
if (strcmp(buf->str, "") == 0) return Success(Nothing, charptr, {});
ResultType(SolsToken, charptr) result = identifyToken(buf->str);
if (result.error) {
return Error(Nothing, charptr, result.as.error);
}
result.as.success.line.num = *lineNum;
result.as.success.line.content = malloc(strlen(currentLine->str) + 1);
if (result.as.success.line.content == NULL) {
return Error(Nothing, charptr, "Couldn't allocate memory to store line information in token (in identifyAndAdd() function)");
}
strcpy(result.as.success.line.content, currentLine->str);
if (result.as.success.type == STT_KW_GROUND) {
ResultType(Nothing, charptr) res = handleGround(lexer, &result.as.success, lineNum, currentLine, currentChr, skipDelimiter);
if (res.error) return res;
}
addTokenToSolsTokens(&lexer->output, result.as.success);
DESTROY_ESTR((*buf));
*buf = CREATE_ESTR("");
return Success(Nothing, charptr, {});
}
ResultType(SolsLexer, charptr) createLexer(char* input) {
// Copy input into the new lexer struct
char* inputcopy = malloc(strlen(input) + 1);
if (inputcopy == NULL) {
return Error(SolsLexer, charptr, "Couldn't copy string into lexer (in createLexer() function)");
}
strcpy(inputcopy, input);
// Create SolsTokens
ResultType(SolsTokens, charptr) tokens = createSolsTokens();
if (tokens.error) {
Estr e = CREATE_ESTR(tokens.as.error);
APPEND_ESTR(e, " (in createLexer() function)");
return Error(SolsLexer, charptr, e.str);
}
// Construct and return lexer
SolsLexer lexer = {
.input = inputcopy,
.inputsize = strlen(inputcopy),
.output = tokens.as.success,
.current = 0,
};
return Success(SolsLexer, charptr, lexer);
}
ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead) {
// Reduce by 1 so peeking at the next token with 1 works
ahead--;
// Bounds and null checking
if (lexer->input == NULL) {
return Error(char, Nothing, {});
}
if (lexer->current + ahead >= lexer->inputsize) {
return Error(char, Nothing, {});
}
// Char is within bounds, return it
return Success(char, Nothing, lexer->input[lexer->current + ahead]);
}
ResultType(char, Nothing) lexerConsume(SolsLexer* lexer) {
// Bounds and null checking
if (lexer->input == NULL) {
return Error(char, Nothing, {});
}
if (lexer->current + 1 > lexer->inputsize) {
return Error(char, Nothing, {});
}
// Char is within bounds, return and increment
return Success(char, Nothing, lexer->input[lexer->current++]);
}
ResultType(SolsToken, charptr) identifyToken(const char* token) {
// Process strings
if (token[0] == '"') {
if (token[strlen(token) - 1] == '"') {
// Cut out the quotes
char* tokencopy = malloc(strlen(token) + 1);
strncpy(tokencopy, token + 1, strlen(token) - 2);
tokencopy[strlen(token) - 2] = '\0';
// Create a literal
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_STRING, tokencopy);
// Free our copy of the string, createSolsLiteral creates a copy
free(tokencopy);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
// Construct and return the token
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
}
return Error(SolsToken, charptr, "Unterminated string (in identifyToken() function)");
}
// Process characters
if (token[0] == '\'') {
if (strlen(token) != 3) {
return Error(SolsToken, charptr, "Characters can only hold one character at a time (try using \"this\" for strings?)");
}
if (token[2] == '\'') {
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_CHAR, token[1]);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
} else {
return Error(SolsToken, charptr, "Unterminated character (in identifyToken() function)");
}
}
// Process integers and floats
if (isdigit(token[0]) || (token[0] == '-' && strlen(token) > 1 && (isdigit(token[1]) || token[1] == '.'))) {
size_t len = strlen(token);
bool isInt = true;
bool isDouble = false;
for (size_t i = 1; i < len; i++) {
if (isInt && token[i] == '.') {
isInt = false;
isDouble = true;
continue;
}
if (!isdigit(token[i])) {
isInt = false;
isDouble = false;
}
}
if (isInt) {
int64_t newInt = atoll(token);
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_INT, newInt);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
}
if (isDouble) {
double newDouble = atof(token);
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_DOUBLE, newDouble);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
}
}
// Handle boolean (true/false)
if (strcmp(token, "true") == 0) {
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, true);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
}
if (strcmp(token, "false") == 0) {
ResultType(SolsLiteral, charptr) literal = createSolsLiteral(SLT_BOOL, false);
if (literal.error) {
Estr str = CREATE_ESTR(literal.as.error);
APPEND_ESTR(str, " (in identifyToken() function)");
return Error(SolsToken, charptr, str.str);
}
SolsToken tok = {
.type = STT_LITERAL,
.as.literal = literal.as.success
};
return Success(SolsToken, charptr, tok);
}
// Process base types
if (strcmp(token, "int") == 0) {
ResultType(SolsType, charptr) type = createSolsType(STT_INT);
if (type.error) {
Estr e = CREATE_ESTR(type.as.error);
APPEND_ESTR(e, " (in identifyToken() function)");
return Error(SolsToken, charptr, e.str);
}
SolsToken tok = {
.type = STT_TYPE,
.as.type = type.as.success
};
return Success(SolsToken, charptr, tok);
}
if (strcmp(token, "double") == 0) {
ResultType(SolsType, charptr) type = createSolsType(STT_DOUBLE);
if (type.error) {
Estr e = CREATE_ESTR(type.as.error);
APPEND_ESTR(e, " (in identifyToken() function)");
return Error(SolsToken, charptr, e.str);
}
SolsToken tok = {
.type = STT_TYPE,
.as.type = type.as.success
};
return Success(SolsToken, charptr, tok);
}
if (strcmp(token, "string") == 0) {
ResultType(SolsType, charptr) type = createSolsType(STT_STRING);
if (type.error) {
Estr e = CREATE_ESTR(type.as.error);
APPEND_ESTR(e, " (in identifyToken() function)");
return Error(SolsToken, charptr, e.str);
}
SolsToken tok = {
.type = STT_TYPE,
.as.type = type.as.success
};
return Success(SolsToken, charptr, tok);
}
if (strcmp(token, "char") == 0) {
ResultType(SolsType, charptr) type = createSolsType(STT_CHAR);
if (type.error) {
Estr e = CREATE_ESTR(type.as.error);
APPEND_ESTR(e, " (in identifyToken() function)");
return Error(SolsToken, charptr, e.str);
}
SolsToken tok = {
.type = STT_TYPE,
.as.type = type.as.success
};
return Success(SolsToken, charptr, tok);
}
if (strcmp(token, "bool") == 0) {
ResultType(SolsType, charptr) type = createSolsType(STT_BOOL);
if (type.error) {
Estr e = CREATE_ESTR(type.as.error);
APPEND_ESTR(e, " (in identifyToken() function)");
return Error(SolsToken, charptr, e.str);
}
SolsToken tok = {
.type = STT_TYPE,
.as.type = type.as.success
};
return Success(SolsToken, charptr, tok);
}
// Find if it's a reserved keyword/operator
ResultType(SolsTokenType, Nothing) result = getTokenType(token);
if (!result.error) {
return Success(SolsToken, charptr, {result.as.success});
}
// No appropriate token found, it's an identifier (I hope)
SolsToken id = {
.type = STT_IDENTIFIER,
.as.idName = malloc(strlen(token) + 1)
};
if (id.as.idName == NULL) {
return Error(SolsToken, charptr, "Couldn't allocate memory to copy string (in identifyToken() function)");
}
strcpy(id.as.idName, token);
return Success(SolsToken, charptr, id);
}
char* createLexingError(size_t lineNum, char* line, char* why) {
Estr error = CREATE_ESTR(ESC_RESET ESC_BOLD ESC_RED_FG "Lexing Error " ESC_RESET ESC_YELLOW_FG "on line ");
char buf[256];
snprintf(buf, sizeof(buf), "%zu", lineNum);
APPEND_ESTR(error, buf);
APPEND_ESTR(error, ":\n\n" ESC_RESET ESC_BLUE_FG " ");
APPEND_ESTR(error, line);
APPEND_ESTR(error, "\n\n");
APPEND_ESTR(error, ESC_RESET ESC_MAGENTA_FG "-> ");
APPEND_ESTR(error, why);
APPEND_ESTR(error, "\n");
return error.str;
}
ResultType(Nothing, charptr) lex(SolsLexer* lexer) {
if (lexer->input == NULL) {
return Error(Nothing, charptr, "Lexer is not initialised");
}
Estr buf = CREATE_ESTR("");
bool inString = false;
size_t lineNum = 1;
size_t lineStart = 0;
Estr currentLine = CREATE_ESTR("");
for (; lineStart < lexer->inputsize; lineStart++) {
if (lexer->input[lineStart] == '\n') {
break;
}
char tmp[] = {lexer->input[lineStart], '\0'};
APPEND_ESTR(currentLine, tmp);
}
bool skipDelimiter = false;
for (;;) {
ResultType(char, Nothing) chr = lexerConsume(lexer);
if (chr.error) {
break;
}
skipDelimiter = false;
if (chr.as.success == '/' && !inString) {
ResultType(char, Nothing) peek = lexerPeek(lexer, 1);
if (!peek.error && peek.as.success == '/') {
// Consume characters until \n or EOF
while (true) {
ResultType(char, Nothing) next = lexerPeek(lexer, 1);
if (next.error || next.as.success == '\n') break;
lexerConsume(lexer);
}
continue;
} else if (!peek.error && peek.as.success == '*') {
// Skip the *
lexerConsume(lexer);
// Consume characters until */ or EOF
while (true) {
ResultType(char, Nothing) next = lexerConsume(lexer);
if (next.error) break;
if (next.as.success == '\n') {
lineNum++;
DESTROY_ESTR(currentLine);
currentLine = CREATE_ESTR("");
lineStart = lexer->current;
for (size_t i = lineStart; i < lexer->inputsize; i++) {
if (lexer->input[i] == '\n') break;
char tmp[] = {lexer->input[i], '\0'};
APPEND_ESTR(currentLine, tmp);
}
}
if (next.as.success == '*') {
ResultType(char, Nothing) peek2 = lexerPeek(lexer, 1);
if (!peek2.error && peek2.as.success == '/') {
lexerConsume(lexer); // skip /
break;
}
}
}
continue;
}
}
if (chr.as.success == '#' && !inString) {
while (true) {
ResultType(char, Nothing) next = lexerPeek(lexer, 1);
if (next.error || next.as.success == '\n') break;
lexerConsume(lexer);
}
continue;
}
if (chr.as.success == '\n') {
lineNum++;
DESTROY_ESTR(currentLine);
currentLine = CREATE_ESTR("");
lineStart = lexer->current;
for (size_t i = lineStart; i < lexer->inputsize; i++) {
if (lexer->input[i] == '\n') {
break;
}
char buf_tmp[] = {lexer->input[i], '\0'};
APPEND_ESTR(currentLine, buf_tmp);
}
}
if (inString) {
char str[2] = { chr.as.success, '\0' };
APPEND_ESTR(buf, str);
if (chr.as.success == '"') {
inString = false;
}
continue;
}
switch (chr.as.success) {
case '"': {
inString = true;
APPEND_ESTR(buf, "\"");
break;
}
// These characters require themselves added seperately from the previous token.
case '{':
case '}':
case '(':
case ')':
case ',':
case ':':
case ';':
case '\n':
{
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, chr.as.success, &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
if (skipDelimiter) break;
char tmp[] = {chr.as.success, '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
break;
}
// These characters may be repeated, or followed by an equals sign.
case '+':
case '-': {
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, chr.as.success, &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
// skipDelimiter is unlikely here but handled just in case
if (skipDelimiter) break;
ResultType(char, Nothing) next = lexerPeek(lexer, 1);
if (next.error || (next.as.success != chr.as.success && next.as.success != '=')) {
char tmp[] = {chr.as.success, '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
}
if (next.as.success == '=') {
char tmp[] = {chr.as.success, '=', '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
lexerConsume(lexer);
}
if (next.as.success == chr.as.success) {
char tmp[] = {chr.as.success, chr.as.success, '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
lexerConsume(lexer);
}
break;
}
// These characters may be followed by an equals sign, or nothing else.
case '=':
case '!':
case '>':
case '<':
case '*':
case '/': {
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, chr.as.success, &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
if (skipDelimiter) break;
ResultType(char, Nothing) next = lexerPeek(lexer, 1);
if (next.error || next.as.success != '=') {
char tmp[] = {chr.as.success, '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
}
if (next.as.success == '=') {
char tmp[] = {chr.as.success, '=', '\0'};
ResultType(SolsToken, charptr) result = identifyToken(tmp);
if (result.error) {
char* err = createLexingError(lineNum, currentLine.str, result.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
result.as.success.line.num = lineNum;
result.as.success.line.content = malloc(strlen(currentLine.str) + 1);
if (result.as.success.line.content == NULL) {
char* err = createLexingError(lineNum, currentLine.str, "Couldn't allocate memory to store line information in token (in lex() function)");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
strcpy(result.as.success.line.content, currentLine.str);
addTokenToSolsTokens(&lexer->output, result.as.success);
lexerConsume(lexer);
}
break;
}
// '.' requires checking whether it's a number or an identifier after
case '.': {
ResultType(char, Nothing) peek = lexerPeek(lexer, 1);
// If the next character is a digit, then this is a literal, not a member access dot.
if (!peek.error && isdigit(peek.as.success)) {
APPEND_ESTR(buf, ".");
} else {
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, chr.as.success, &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
if (!skipDelimiter) {
addTokenToSolsTokens(&lexer->output, (SolsToken) {.type = STT_DOT});
}
}
break;
}
// This whitespace splits the program and does not get appended as it's own token.
case '\t':
case ' ': {
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, chr.as.success, &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
break;
}
default: {
char newchar[] = {chr.as.success, '\0'};
APPEND_ESTR(buf, newchar);
break;
}
}
// Check whether we need to parse types
if (strcmp(buf.str, "fun") == 0) {
if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') {
// do stuff
}
}
if (strcmp(buf.str, "template") == 0 ) {
if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') {
}
}
if (strcmp(buf.str, "object") == 0 ) {
if (!lexerPeek(lexer, 1).error && lexerPeek(lexer, 1).as.success == '(') {
}
}
}
ResultType(Nothing, charptr) res = identifyAndAdd(lexer, &buf, &lineNum, &currentLine, '\0', &skipDelimiter);
if (res.error) {
char* err = createLexingError(lineNum, currentLine.str, res.as.error);
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
if (inString) {
char* err = createLexingError(lineNum, currentLine.str, "Unterminated string");
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Error(Nothing, charptr, err);
}
DESTROY_ESTR(buf);
DESTROY_ESTR(currentLine);
return Success(Nothing, charptr, (Nothing){});
}
ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer) {
return Error(Nothing, charptr, "WIP (in processTypeSignature() function)");
}

76
src/lexer/lexer.h Normal file
View File

@@ -0,0 +1,76 @@
#ifndef LEXER_H
#define LEXER_H
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include "../include/error.h"
#include "../include/nothing.h"
#include "SolsType.h"
#include "SolsToken.h"
#include "SolsLiteral.h"
// A map containing all corresponding strs and token types.
// Use the getTokenType() function to search this
extern struct _SolsTokenTypeMap {char* str; SolsTokenType type;} SolsTokenTypeMap[];
// Represents the current state of the lexer.
// .input is the Solstice program as written by the user.
// .output is the lexed Solstice program, which is constructed by the lex() function.
// .current represents the current character from .input being lexed.
typedef struct SolsLexer {
char* input;
size_t inputsize;
SolsTokens output;
size_t current;
} SolsLexer;
Result(SolsLexer, charptr);
// Creates a lexer for use by the lex() function.
// Returns:
// Success: Constructed SolsLexer
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsLexer, charptr) createLexer(char* input);
// Uses the provided lexer to scan the code, and create tokens.
// Returne:
// Success: Nothing
// Failure: char* detailing what went wrong (usually user failure or memory failure)
ResultType(Nothing, charptr) lex(SolsLexer* lexer);
Result(char, Nothing);
// Peeks at the next token in the lexer.
// Returns:
// Success: The token with offset ahead
// Failure: Nothing (requested character is out of bounds)
ResultType(char, Nothing) lexerPeek(SolsLexer* lexer, size_t ahead);
// Consumes the next token in the lexer.
// Success: The token that has just been consumed
// Failure: Nothing (requested character is out of bounds)
ResultType(char, Nothing) lexerConsume(SolsLexer* lexer);
// Helper function to classify tokens
// Returns:
// Success: A SolsToken which has all information needed from the token.
// Failure: char* detailing what went wrong (usually memory failure)
ResultType(SolsToken, charptr) identifyToken(const char* token);
Result(SolsTokenType, Nothing);
// Helper function to convert a char* into a SolsTokenType using the SolsTokenTypeMap.
// Returns:
// Success: The corresponding SolsTokenType
// Failure: Nothing (meaning the token is likely an identifier)
ResultType(SolsTokenType, Nothing) getTokenType(const char* input);
// Helper function to lex type signatures into tokens
// FIXME this function is a work in progress
ResultType(Nothing, charptr) processTypeSignature(SolsLexer* lexer);
#endif