diff --git a/CMakeLists.txt b/CMakeLists.txt index f9b734f..a7c57bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,5 +9,7 @@ add_executable(groundc src/main.c src/parser.c src/parser.h + src/lexer.c + src/lexer.h src/types.c src/types.h) diff --git a/src/lexer.c b/src/lexer.c index 8cb9fc1..fa0fb19 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,14 +1,13 @@ #include "lexer.h" -#include -#include #include #include +#include void addTokenToLine(TokenLine* line, Token tok) { line->count++; Token* newTokens = realloc(line->tokens, line->count * sizeof(Token)); if (!newTokens) { - perror("Failed to allocate token"); + perror("Failed to allocate memory for token"); exit(EXIT_FAILURE); } line->tokens = newTokens; @@ -19,13 +18,30 @@ void addLineToLexed(LexedFile* lf, TokenLine line) { lf->lineCount++; TokenLine* newLines = realloc(lf->lines, lf->lineCount * sizeof(TokenLine)); if (!newLines) { - perror("Failed to allocate line"); + perror("Failed to allocate memory for line"); exit(EXIT_FAILURE); } lf->lines = newLines; lf->lines[lf->lineCount - 1] = line; } +void freeTokenLine(TokenLine* line) { + for (size_t i = 0; i < line->count; i++) { + free(line->tokens[i].text); + } + free(line->tokens); +} + +void freeLexedFile(LexedFile* lf) { + if (lf == NULL) return; + for (size_t i = 0; i < lf->lineCount; i++) { + freeTokenLine(&lf->lines[i]); + } + free(lf->lines); + lf->lines = NULL; + lf->lineCount = 0; +} + LexedFile lexFile(const char* fileContents) { LexedFile result = {0}; result.lines = NULL; @@ -35,7 +51,7 @@ LexedFile lexFile(const char* fileContents) { currentLine.tokens = NULL; currentLine.count = 0; - char buf[1024] = {0}; + char buf[4096] = {0}; size_t bufLen = 0; bool inString = false; bool inChar = false; @@ -44,6 +60,12 @@ LexedFile lexFile(const char* fileContents) { for (size_t i = 0; fileContents[i] != '\0'; i++) { char c = fileContents[i]; + // Safety check: prevent buffer overflow + if (bufLen >= sizeof(buf) - 1) { + fprintf(stderr, "Error: Token too long (exceeds %zu characters)\n", sizeof(buf) - 1); + exit(EXIT_FAILURE); + } + switch (c) { case '"': if (!isComment) { @@ -74,11 +96,15 @@ LexedFile lexFile(const char* fileContents) { buf[bufLen] = '\0'; Token tok; tok.text = strdup(buf); - // Add tok to currentLine (need helper function) + if (!tok.text) { + perror("Failed to duplicate token string"); + exit(EXIT_FAILURE); + } addTokenToLine(¤tLine, tok); bufLen = 0; + memset(buf, 0, sizeof(buf)); } - // Add line to result (need helper function) + // Add line to result addLineToLexed(&result, currentLine); // Reset for next line currentLine.tokens = NULL; @@ -96,8 +122,13 @@ LexedFile lexFile(const char* fileContents) { buf[bufLen] = '\0'; Token tok; tok.text = strdup(buf); + if (!tok.text) { + perror("Failed to duplicate token string"); + exit(EXIT_FAILURE); + } addTokenToLine(¤tLine, tok); bufLen = 0; + memset(buf, 0, sizeof(buf)); } addLineToLexed(&result, currentLine); currentLine.tokens = NULL; @@ -108,19 +139,29 @@ LexedFile lexFile(const char* fileContents) { break; case ' ': + case '\t': // Also handle tabs as whitespace if (!inString && !inChar) { if (bufLen > 0 && !isComment) { buf[bufLen] = '\0'; Token tok; tok.text = strdup(buf); + if (!tok.text) { + perror("Failed to duplicate token string"); + exit(EXIT_FAILURE); + } addTokenToLine(¤tLine, tok); bufLen = 0; + memset(buf, 0, sizeof(buf)); } } else { buf[bufLen++] = c; } break; + case '\r': // Handle Windows line endings + // Just skip carriage returns + break; + default: if (!isComment) { buf[bufLen++] = c; @@ -129,13 +170,19 @@ LexedFile lexFile(const char* fileContents) { } } - // Handle any remaining content - if (bufLen > 0) { + // Handle any remaining content at end of file + if (bufLen > 0 && !isComment) { buf[bufLen] = '\0'; Token tok; tok.text = strdup(buf); + if (!tok.text) { + perror("Failed to duplicate token string"); + exit(EXIT_FAILURE); + } addTokenToLine(¤tLine, tok); } + + // Add final line if it has content if (currentLine.count > 0) { addLineToLexed(&result, currentLine); } diff --git a/src/main.c b/src/main.c index 51185d7..b4ad038 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,3 @@ -#include "types.h" #include "parser.h" #include diff --git a/src/parser.c b/src/parser.c index 7a4d9e8..3a79fc7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,11 +1,14 @@ #include "parser.h" -#include "types.h" #include "lexer.h" +#include "types.h" #include +#include +#include GroundProgram createGroundProgram() { GroundProgram gp; gp.size = 0; + gp.instructions = NULL; return gp; } @@ -14,19 +17,204 @@ void addInstructionToProgram(GroundProgram* gp, GroundInstruction instruction) { GroundInstruction* ptr = realloc(gp->instructions, gp->size * sizeof(GroundInstruction)); if (ptr == NULL) { perror("Couldn't allocate memory for instruction"); - exit(1); + exit(EXIT_FAILURE); } gp->instructions = ptr; gp->instructions[gp->size - 1] = instruction; } void freeGroundProgram(GroundProgram* gp) { - for (int i = 0; i < gp->size; i++) { + for (size_t i = 0; i < gp->size; i++) { freeGroundInstruction(&gp->instructions[i]); } + free(gp->instructions); + gp->instructions = NULL; + gp->size = 0; } -GroundProgram parseFile(LexedFile file) { - GroundProgram gp; - return gp; +// Helper: Check if a string is an integer +static bool isInteger(const char* str) { + if (str == NULL || *str == '\0') return false; + + size_t i = 0; + if (str[0] == '-' || str[0] == '+') i++; + + if (str[i] == '\0') return false; + + for (; str[i] != '\0'; i++) { + if (!isdigit(str[i])) return false; + } + return true; +} + +// Helper: Check if a string is a double +static bool isDouble(const char* str) { + if (str == NULL || *str == '\0') return false; + + bool hasDecimal = false; + size_t i = 0; + + if (str[0] == '-' || str[0] == '+') i++; + + if (str[i] == '\0') return false; + + for (; str[i] != '\0'; i++) { + if (str[i] == '.') { + if (hasDecimal) return false; + hasDecimal = true; + } else if (!isdigit(str[i])) { + return false; + } + } + return hasDecimal; +} + +// Helper: Parse an argument token into a GroundArg +static GroundArg parseArgument(const char* token) { + if (token[0] == '"') { + // String literal - remove quotes + size_t len = strlen(token); + char* strVal = strndup(token + 1, len - 2); + GroundValue val = createStringGroundValue(strVal); + free(strVal); + return createValueGroundArg(val); + } + else if (token[0] == '\'') { + // Char literal + char charVal = token[1]; + return createValueGroundArg(createCharGroundValue(charVal)); + } + else if (strcmp(token, "true") == 0) { + return createValueGroundArg(createBoolGroundValue(true)); + } + else if (strcmp(token, "false") == 0) { + return createValueGroundArg(createBoolGroundValue(false)); + } + else if (token[0] == '$') { + // Value reference + return createRefGroundArg(VALREF, token + 1); + } + else if (token[0] == '&') { + // Direct reference + return createRefGroundArg(DIRREF, token + 1); + } + else if (token[0] == '%') { + // Line reference or label + return createRefGroundArg(LINEREF, token + 1); + } + else if (token[0] == '!') { + // Function reference + return createRefGroundArg(FNREF, token + 1); + } + else if (token[0] == '-') { + // Could be type reference or negative number + if (strlen(token) > 1 && !isdigit(token[1])) { + // Type reference (e.g., -int, -string) + return createRefGroundArg(LABEL, token + 1); // Using LABEL for type refs + } + // Fall through to number parsing + } + + // Try to parse as number + if (isInteger(token)) { + int64_t intVal = atoll(token); + return createValueGroundArg(createIntGroundValue(intVal)); + } + else if (isDouble(token)) { + double dblVal = atof(token); + return createValueGroundArg(createDoubleGroundValue(dblVal)); + } + + // Unknown token type + fprintf(stderr, "Warning: Unknown argument type for token: %s\n", token); + return createRefGroundArg(VALREF, token); +} + +// Helper: Convert instruction string to GroundInstType +static GroundInstType getInstructionType(const char* inst) { + if (strcmp(inst, "if") == 0) return IF; + if (strcmp(inst, "jump") == 0) return JUMP; + if (strcmp(inst, "end") == 0) return END; + if (strcmp(inst, "input") == 0 || strcmp(inst, "stdin") == 0) return INPUT; + if (strcmp(inst, "print") == 0 || strcmp(inst, "stdout") == 0) return PRINT; + if (strcmp(inst, "println") == 0 || strcmp(inst, "stdlnout") == 0) return PRINTLN; + if (strcmp(inst, "set") == 0) return SET; + if (strcmp(inst, "gettype") == 0) return GETTYPE; + if (strcmp(inst, "exists") == 0) return EXISTS; + if (strcmp(inst, "setlist") == 0) return SETLIST; + if (strcmp(inst, "setlistat") == 0) return SETLISTAT; + if (strcmp(inst, "getlistat") == 0) return GETLISTAT; + if (strcmp(inst, "getlistsize") == 0) return GETLISTSIZE; + if (strcmp(inst, "listappend") == 0) return LISTAPPEND; + if (strcmp(inst, "getstrsize") == 0) return GETSTRSIZE; + if (strcmp(inst, "getstrcharat") == 0) return GETSTRCHARAT; + if (strcmp(inst, "add") == 0) return ADD; + if (strcmp(inst, "subtract") == 0) return SUBTRACT; + if (strcmp(inst, "multiply") == 0) return MULTIPLY; + if (strcmp(inst, "divide") == 0) return DIVIDE; + if (strcmp(inst, "equal") == 0) return EQUAL; + if (strcmp(inst, "inequal") == 0) return INEQUAL; + if (strcmp(inst, "not") == 0) return NOT; + if (strcmp(inst, "greater") == 0) return GREATER; + if (strcmp(inst, "lesser") == 0) return LESSER; + if (strcmp(inst, "stoi") == 0) return STOI; + if (strcmp(inst, "stod") == 0) return STOD; + if (strcmp(inst, "tostring") == 0) return TOSTRING; + if (strcmp(inst, "fun") == 0) return FUN; + if (strcmp(inst, "return") == 0) return RETURN; + if (strcmp(inst, "endfun") == 0) return ENDFUN; + if (strcmp(inst, "pusharg") == 0) return PUSHARG; + if (strcmp(inst, "call") == 0) return CALL; + if (strcmp(inst, "struct") == 0) return STRUCT; + if (strcmp(inst, "endstruct") == 0) return ENDSTRUCT; + if (strcmp(inst, "init") == 0) return INIT; + if (strcmp(inst, "use") == 0) return USE; + if (strcmp(inst, "extern") == 0) return EXTERN; + + fprintf(stderr, "Error: Unknown instruction: %s\n", inst); + exit(EXIT_FAILURE); +} + +GroundProgram parseFile(const char* file) { + GroundProgram program = createGroundProgram(); + + // First, lex the file + LexedFile lexed = lexFile(file); + + // Parse each line + for (size_t lineNum = 0; lineNum < lexed.lineCount; lineNum++) { + TokenLine line = lexed.lines[lineNum]; + + // Skip empty lines + if (line.count == 0) { + continue; + } + + // Check if first token is a label + size_t tokenStart = 0; + if (line.tokens[0].text[0] == '@') { + // TODO: Handle labels - you might want to store them separately + // For now, skip to next token + tokenStart = 1; + if (tokenStart >= line.count) continue; + } + + // First non-label token is the instruction + const char* instToken = line.tokens[tokenStart].text; + GroundInstType instType = getInstructionType(instToken); + GroundInstruction inst = createGroundInstruction(instType); + + // Parse remaining tokens as arguments + for (size_t i = tokenStart + 1; i < line.count; i++) { + GroundArg arg = parseArgument(line.tokens[i].text); + addArgToInstruction(&inst, arg); + } + + addInstructionToProgram(&program, inst); + } + + // Clean up lexed data + freeLexedFile(&lexed); + + return program; } diff --git a/src/parser.h b/src/parser.h index f696062..083386f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -22,6 +22,6 @@ void addInstructionToProgram(GroundProgram* gp, GroundInstruction instruction); void freeGroundProgram(GroundProgram* gp); // Parses the file into a GroundProgram -GroundProgram parseFile(LexedFile file); +GroundProgram parseFile(const char* file); #endif