diff --git a/README.md b/README.md index 9e8570a..0764826 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ But uhh that's the name of the VM itself, the name of the programming language I made is Sylt, named after the German island. -To compile for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/asm/tokenize.c -o VMBL -O3` +To compile the VM for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/file_utils.c -o vmbl -O3` +To compile SASM for Linux: `gcc src/asm/sasm.c src/asm/tokenize.c src/file_utils.c src/asm/assembler.c -o sasm -O3` ## Syntax ### Example "Hello, World!" Program diff --git a/fib.sasm b/fib.sasm new file mode 100644 index 0000000..e2f3af6 --- /dev/null +++ b/fib.sasm @@ -0,0 +1,6 @@ +push 0 +push 1 +dup 1 +dup 1 +add +jump 2 \ No newline at end of file diff --git a/fib.vmbl b/fib.vmbl index d8dfbeb..ba32b6d 100644 Binary files a/fib.vmbl and b/fib.vmbl differ diff --git a/sasm b/sasm new file mode 100644 index 0000000..8a90e9f Binary files /dev/null and b/sasm differ diff --git a/src/asm/assembler.c b/src/asm/assembler.c new file mode 100644 index 0000000..816b0e1 --- /dev/null +++ b/src/asm/assembler.c @@ -0,0 +1,17 @@ +#include "assembler.h" +#include + +void assemble(Tokenizer *tokenizer) { + Token token = getCurrentToken(tokenizer); + while (token.type != TOKEN_EOF) + { + //printf("%s: %s - line %d\n", tokenTypeAsCStr(token.type), token.value, token.line); + + + //free(token.value); + token = getCurrentToken(tokenizer); + + + } + free(token.value); +} \ No newline at end of file diff --git a/src/asm/assembler.h b/src/asm/assembler.h new file mode 100644 index 0000000..8c0b172 --- /dev/null +++ b/src/asm/assembler.h @@ -0,0 +1,8 @@ +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#include "tokenize.h" + +void assemble(Tokenizer *tokenizer); + +#endif // !ASSEMBLER_H diff --git a/src/asm/instructions.h b/src/asm/instructions.h new file mode 100644 index 0000000..b3f3729 --- /dev/null +++ b/src/asm/instructions.h @@ -0,0 +1,18 @@ +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#define MAX_ARGS 3 + +typedef enum +{ + ARG_TYPE_INT +} ArgType; + +typedef struct +{ + char *mnemonic, + uint8_t argCount, + ArgType args[MAX_ARGS] +} InstructionInfo; + +#endif // !INSTRUCTIONS_H diff --git a/src/asm/sasm.c b/src/asm/sasm.c new file mode 100644 index 0000000..6d72405 --- /dev/null +++ b/src/asm/sasm.c @@ -0,0 +1,27 @@ +#include +#include +#include "tokenize.h" +#include "../file_utils.h" +#include "assembler.h" + +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: sasm \n"); + return 0; + } + + char *buffer = readStringFromFile(argv[1]); + + //printf("%s\n", buffer); + Tokenizer tokenizer = { + .source = buffer, + .column = 1, + .line = 1, + .pos = 0 + }; + + assemble(&tokenizer); + free(buffer); + + return 0; +} \ No newline at end of file diff --git a/src/asm/tokenize.c b/src/asm/tokenize.c index b27750a..27040ce 100644 --- a/src/asm/tokenize.c +++ b/src/asm/tokenize.c @@ -1,24 +1,86 @@ #include "tokenize.h" #include #include +#include +#include +/* +Returns a string buffer containing the name that was parsed. +* IMPORTANT: remember to free the buffer when you're done with it! +*/ char *parseName(Tokenizer *tokenizer) { char *ptr = tokenizer->source; + // loop over the string until we hit something that isn't a name int i = 0; while (isName(*ptr++)) { i++; + tokenizer->column++; + tokenizer->pos++; + } - printf("%d\n", i); - return "hi"; + char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator + + if (buffer == NULL) { + fprintf(stderr, "SASM: failed to allocate memory\n"); + exit(1); + } + + buffer = strncpy(buffer, tokenizer->source, i); + + tokenizer->source += i; + + return buffer; } +char *parseNumber(Tokenizer *tokenizer) { + int i = 0; + char *ptr = tokenizer->source; + + while (*ptr) + { + if (*ptr == '\n') { + //tokenizer->column = 1; + tokenizer->pos++; + //tokenizer->line++; + break; + } + + if (isspace(*ptr)) + break; + + i++; + tokenizer->column++; + tokenizer->pos++; + *ptr++; + } + + char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator + if (buffer == NULL) { + fprintf(stderr, "SASM: failed to allocate memory\n"); + exit(1); + } + + buffer = strncpy(buffer, tokenizer->source, i); + + tokenizer->source += i; + + return buffer; +} + +void parseWhitespace(Tokenizer *tokenizer) { + tokenizer->pos++; + tokenizer->column++; + *tokenizer->source++; + return; +} + bool isNumber(char character) { - return isalnum(character) || character == '.'; + return isdigit(character) || character == '.'; } bool isName(char character) { @@ -26,17 +88,72 @@ bool isName(char character) { } Token getCurrentToken(Tokenizer *tokenizer) { - Token token = (Token){}; + Token token = (Token){ + .line = tokenizer->line, + }; + + if (!*tokenizer->source) { + token.type = TOKEN_EOF; + return token; + } + + switch (*tokenizer->source) { + + case ' ': + parseWhitespace(tokenizer); + return getCurrentToken(tokenizer); + break; + case '\t': + parseWhitespace(tokenizer); + return getCurrentToken(tokenizer); + break; + case '\n': + tokenizer->column = 1; + tokenizer->line++; + tokenizer->pos++; + *tokenizer->source++; + return getCurrentToken(tokenizer); + break; + case '\r': + tokenizer->pos++; + *tokenizer->source++; + return getCurrentToken(tokenizer); + break; + default: - if (isName(*tokenizer->source)) { + if (isalpha(*tokenizer->source)) { char *tokenValue = parseName(tokenizer); + token.value = tokenValue; + + // check if the token is in the list of instruction names + token.type = TOKEN_NAME; // by default the token is a name until we find an instruction + for (int i = 0; i < sizeof(INSTRUCTION_NAMES)/sizeof(INSTRUCTION_NAMES[0]); i++) { + + // if we found an instruction with the same name as the token + if (strcmp(INSTRUCTION_NAMES[i], tokenValue) == 0) { + token.type = TOKEN_INSTRUCTION; + break; + } + + } + } else if (isdigit(*tokenizer->source)) { + char *tokenValue = parseNumber(tokenizer); + token.value = tokenValue; + + // TODO: floating point numbers + token.type = TOKEN_INT_LITERAL; + } else { + fprintf(stderr, "Invalid token `%c` on line %d, column %d\n", *tokenizer->source, tokenizer->line, tokenizer->column); + exit(1); } break; } + + return token; } char* tokenTypeAsCStr(TokenType type) { @@ -49,6 +166,14 @@ char* tokenTypeAsCStr(TokenType type) { case TOKEN_INT_LITERAL: return "INT_LITERAL"; break; + + case TOKEN_EOF: + return ""; + break; + + case TOKEN_NAME: + return "NAME"; + break; default: break; diff --git a/src/asm/tokenize.h b/src/asm/tokenize.h index b2e17ce..1b88c85 100644 --- a/src/asm/tokenize.h +++ b/src/asm/tokenize.h @@ -3,9 +3,30 @@ #include +static char *INSTRUCTION_NAMES[] = { + "push", + "drop", + "add", + "sub", + "mul", + "div", + "eq", + "neq", + "gt", + "gte", + "lt", + "lte", + "dup", + "jc", + "halt", + "jump" +}; + typedef enum { + TOKEN_EOF, // just used when looping over the tokens list TOKEN_INSTRUCTION, + TOKEN_NAME, TOKEN_INT_LITERAL, } TokenType; @@ -13,6 +34,7 @@ typedef struct { TokenType type; char* value; + unsigned int line; } Token; typedef struct @@ -25,9 +47,12 @@ typedef struct bool isNumber(char character); bool isName(char character); + Token getCurrentToken(Tokenizer *tokenizer); char* tokenTypeAsCStr(TokenType type); char *parseName(Tokenizer *tokenizer); +char *parseNumber(Tokenizer *tokenizer); +void parseWhitespace(Tokenizer *tokenizer); #endif // !TOKENIZE_H diff --git a/src/file_utils.c b/src/file_utils.c new file mode 100644 index 0000000..a97c623 --- /dev/null +++ b/src/file_utils.c @@ -0,0 +1,46 @@ +#include "file_utils.h" + +char *readStringFromFile(char* file_path) { + FILE *inputFile = fopen(file_path, "r"); + if (inputFile == NULL) { + perror("SASM: failed to open file"); + exit(1); + } + if (fseek(inputFile, 0, SEEK_END)) { + perror("SASM: failed to seek to end of file"); + fclose(inputFile); + exit(1); + } + long file_size = ftell(inputFile); + if (file_size == -1) { + perror("SASM: error getting file size"); + fclose(inputFile); + exit(1); + } + + if (fseek(inputFile, 0, SEEK_SET) != 0) { + perror("SASM: failed to seek to start of file"); + fclose(inputFile); + exit(1); + } + + char *buffer = (char*)malloc(file_size + 1); + if (buffer == NULL) { + printf("SASM: failed to allocate memory"); + fclose(inputFile); + exit(1); + } + + size_t bytesRead = fread(buffer, 1, file_size, inputFile); + if (bytesRead != file_size) { + perror("SASM: error reading file"); + free(buffer); + fclose(inputFile); + exit(1); + } + fclose(inputFile); + + buffer[file_size+1] = '\0'; + + return buffer; +} \ No newline at end of file diff --git a/src/file_utils.h b/src/file_utils.h new file mode 100644 index 0000000..3fcc83b --- /dev/null +++ b/src/file_utils.h @@ -0,0 +1,9 @@ +#ifndef FILE_UTILS_H +#define FILE_UTILS_H + +#include +#include + +char *readStringFromFile(char* file_path); + +#endif // !FILE_UTILS_H \ No newline at end of file diff --git a/src/main.c b/src/main.c index ab015f4..8d07278 100644 --- a/src/main.c +++ b/src/main.c @@ -5,24 +5,20 @@ #define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0])); VMBL_Instruction program[] = { - MAKE_INST_PUSH(124), - MAKE_INST_PUSH(2), - MAKE_INST_DIV, - MAKE_INST_HALT + MAKE_INST_PUSH(0), + MAKE_INST_PUSH(1), + MAKE_INST_DUP(1), + MAKE_INST_DUP(1), + MAKE_INST_ADD, + MAKE_INST_JMP(2) }; int main() { - //VMBL_State vmblState = {}; - - //VMBL_LoadExecutable(&vmblState, program, sizeof(program)); - //VMBL_StartVM(&vmblState); - - Tokenizer tokenizer = { - "push 1224\npush 2\ndiv\nhalt" - }; - Token token = getCurrentToken(&tokenizer); - - printf("%s\n", tokenTypeAsCStr(token.type)); + VMBL_State vmblState = {}; + VMBL_LoadExecutable(&vmblState, program, sizeof(program)); + //VMBL_SaveExecutable("fib.vmbl", program, sizeof(program)); + VMBL_StartVM(&vmblState); + return 0; } \ No newline at end of file diff --git a/src/vmbl.c b/src/vmbl.c index 529b319..c7fe64b 100644 --- a/src/vmbl.c +++ b/src/vmbl.c @@ -1,4 +1,5 @@ #include "vmbl.h" +#include "file_utils.h" #include #include #include @@ -93,6 +94,9 @@ VMBL_Exception VBML_ExecuteInstruction(VMBL_State *vmblState, VMBL_Instruction i break; + case INSTRUCTION_NOP: + break; + default: return (VMBL_Exception) { EXCEPTION_INVALID_OPCODE }; break; @@ -126,7 +130,7 @@ void VMBL_StartVM(VMBL_State *vmblState) { VMBL_Instruction instruction = vmblState->program[vmblState->ip++]; - printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]); + //printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]); VMBL_Exception exception = VBML_ExecuteInstruction(vmblState, instruction); @@ -156,8 +160,8 @@ void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_ vmblState->programSize = programSize; } -void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) { - FILE *file = fopen(filePath, "rb"); +void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath) { + /*FILE *file = fopen(filePath, "rb"); if (file == NULL) { perror("VMBL: Failed to open file"); @@ -170,11 +174,13 @@ void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) { size_t programSize = size / sizeof(vmblState->program[0]); VMBL_Instruction program[programSize]; - fread(program, sizeof(program[0]), programSize, file); + fread(program, sizeof(program[0]), programSize, file);*/ - VMBL_LoadExecutable(vmblState, program, programSize); + VMBL_Instruction *program = (VMBL_Instruction*)readStringFromFile(filePath); - fclose(file); + VMBL_LoadExecutable(vmblState, program, sizeof(program)); + + //fclose(file); } diff --git a/src/vmbl.h b/src/vmbl.h index a7d489a..445b2a2 100644 --- a/src/vmbl.h +++ b/src/vmbl.h @@ -14,6 +14,8 @@ typedef int64_t Word; typedef enum { + INSTRUCTION_NOP, + // stack operations INSTRUCTION_PUSH, INSTRUCTION_ADD, @@ -34,7 +36,7 @@ typedef enum INSTRUCTION_LESS_THAN, INSTRUCTION_LESS_THAN_EQUAL, INSTRUCTION_GREATER_THAN, - INSTRUCTION_GREATER_THAN_EQUAL, + INSTRUCTION_GREATER_THAN_EQUAL } InstructionType; typedef struct @@ -77,7 +79,7 @@ void VMBL_Dump(VMBL_State vmblState, VMBL_Exception exception); void VMBL_StartVM(VMBL_State *vmblState); void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_t programSize); -void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath); +void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath); void VMBL_SaveExecutable(const char* filePath, VMBL_Instruction *program, size_t programSize); char *instructionTypeToCStr(InstructionType type); diff --git a/vmbl b/vmbl index b4a7aff..2f8d27e 100644 Binary files a/vmbl and b/vmbl differ