starting work on assembler

This commit is contained in:
SpookyDervish
2025-12-21 09:44:16 +11:00
parent b1ff26bcbb
commit e92f097afe
16 changed files with 315 additions and 29 deletions

View File

@@ -8,7 +8,8 @@
But uhh that's the name of the VM itself, the name of the programming language I made is Sylt, named after the German island.
To compile for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/asm/tokenize.c -o VMBL -O3`
To compile the VM for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/file_utils.c -o vmbl -O3`
To compile SASM for Linux: `gcc src/asm/sasm.c src/asm/tokenize.c src/file_utils.c src/asm/assembler.c -o sasm -O3`
## Syntax
### Example "Hello, World!" Program

6
fib.sasm Normal file
View File

@@ -0,0 +1,6 @@
push 0
push 1
dup 1
dup 1
add
jump 2

BIN
fib.vmbl

Binary file not shown.

BIN
sasm Normal file

Binary file not shown.

17
src/asm/assembler.c Normal file
View File

@@ -0,0 +1,17 @@
#include "assembler.h"
#include <stdlib.h>
void assemble(Tokenizer *tokenizer) {
Token token = getCurrentToken(tokenizer);
while (token.type != TOKEN_EOF)
{
//printf("%s: %s - line %d\n", tokenTypeAsCStr(token.type), token.value, token.line);
//free(token.value);
token = getCurrentToken(tokenizer);
}
free(token.value);
}

8
src/asm/assembler.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef ASSEMBLER_H
#define ASSEMBLER_H
#include "tokenize.h"
void assemble(Tokenizer *tokenizer);
#endif // !ASSEMBLER_H

18
src/asm/instructions.h Normal file
View File

@@ -0,0 +1,18 @@
#ifndef INSTRUCTIONS_H
#define INSTRUCTIONS_H
#define MAX_ARGS 3
typedef enum
{
ARG_TYPE_INT
} ArgType;
typedef struct
{
char *mnemonic,
uint8_t argCount,
ArgType args[MAX_ARGS]
} InstructionInfo;
#endif // !INSTRUCTIONS_H

27
src/asm/sasm.c Normal file
View File

@@ -0,0 +1,27 @@
#include <stdio.h>
#include <stdlib.h>
#include "tokenize.h"
#include "../file_utils.h"
#include "assembler.h"
int main(int argc, char *argv[]) {
if (argc < 3) {
printf("Usage: sasm <file_path> <output_path>\n");
return 0;
}
char *buffer = readStringFromFile(argv[1]);
//printf("%s\n", buffer);
Tokenizer tokenizer = {
.source = buffer,
.column = 1,
.line = 1,
.pos = 0
};
assemble(&tokenizer);
free(buffer);
return 0;
}

View File

@@ -1,24 +1,86 @@
#include "tokenize.h"
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
/*
Returns a string buffer containing the name that was parsed.
* IMPORTANT: remember to free the buffer when you're done with it!
*/
char *parseName(Tokenizer *tokenizer) {
char *ptr = tokenizer->source;
// loop over the string until we hit something that isn't a name
int i = 0;
while (isName(*ptr++))
{
i++;
tokenizer->column++;
tokenizer->pos++;
}
printf("%d\n", i);
return "hi";
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
if (buffer == NULL) {
fprintf(stderr, "SASM: failed to allocate memory\n");
exit(1);
}
buffer = strncpy(buffer, tokenizer->source, i);
tokenizer->source += i;
return buffer;
}
char *parseNumber(Tokenizer *tokenizer) {
int i = 0;
char *ptr = tokenizer->source;
while (*ptr)
{
if (*ptr == '\n') {
//tokenizer->column = 1;
tokenizer->pos++;
//tokenizer->line++;
break;
}
if (isspace(*ptr))
break;
i++;
tokenizer->column++;
tokenizer->pos++;
*ptr++;
}
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
if (buffer == NULL) {
fprintf(stderr, "SASM: failed to allocate memory\n");
exit(1);
}
buffer = strncpy(buffer, tokenizer->source, i);
tokenizer->source += i;
return buffer;
}
void parseWhitespace(Tokenizer *tokenizer) {
tokenizer->pos++;
tokenizer->column++;
*tokenizer->source++;
return;
}
bool isNumber(char character) {
return isalnum(character) || character == '.';
return isdigit(character) || character == '.';
}
bool isName(char character) {
@@ -26,17 +88,72 @@ bool isName(char character) {
}
Token getCurrentToken(Tokenizer *tokenizer) {
Token token = (Token){};
Token token = (Token){
.line = tokenizer->line,
};
if (!*tokenizer->source) {
token.type = TOKEN_EOF;
return token;
}
switch (*tokenizer->source)
{
case ' ':
parseWhitespace(tokenizer);
return getCurrentToken(tokenizer);
break;
case '\t':
parseWhitespace(tokenizer);
return getCurrentToken(tokenizer);
break;
case '\n':
tokenizer->column = 1;
tokenizer->line++;
tokenizer->pos++;
*tokenizer->source++;
return getCurrentToken(tokenizer);
break;
case '\r':
tokenizer->pos++;
*tokenizer->source++;
return getCurrentToken(tokenizer);
break;
default:
if (isName(*tokenizer->source)) {
if (isalpha(*tokenizer->source)) {
char *tokenValue = parseName(tokenizer);
token.value = tokenValue;
// check if the token is in the list of instruction names
token.type = TOKEN_NAME; // by default the token is a name until we find an instruction
for (int i = 0; i < sizeof(INSTRUCTION_NAMES)/sizeof(INSTRUCTION_NAMES[0]); i++) {
// if we found an instruction with the same name as the token
if (strcmp(INSTRUCTION_NAMES[i], tokenValue) == 0) {
token.type = TOKEN_INSTRUCTION;
break;
}
}
} else if (isdigit(*tokenizer->source)) {
char *tokenValue = parseNumber(tokenizer);
token.value = tokenValue;
// TODO: floating point numbers
token.type = TOKEN_INT_LITERAL;
} else {
fprintf(stderr, "Invalid token `%c` on line %d, column %d\n", *tokenizer->source, tokenizer->line, tokenizer->column);
exit(1);
}
break;
}
return token;
}
char* tokenTypeAsCStr(TokenType type) {
@@ -49,6 +166,14 @@ char* tokenTypeAsCStr(TokenType type) {
case TOKEN_INT_LITERAL:
return "INT_LITERAL";
break;
case TOKEN_EOF:
return "<EOF>";
break;
case TOKEN_NAME:
return "NAME";
break;
default:
break;

View File

@@ -3,9 +3,30 @@
#include <stdbool.h>
static char *INSTRUCTION_NAMES[] = {
"push",
"drop",
"add",
"sub",
"mul",
"div",
"eq",
"neq",
"gt",
"gte",
"lt",
"lte",
"dup",
"jc",
"halt",
"jump"
};
typedef enum
{
TOKEN_EOF, // just used when looping over the tokens list
TOKEN_INSTRUCTION,
TOKEN_NAME,
TOKEN_INT_LITERAL,
} TokenType;
@@ -13,6 +34,7 @@ typedef struct
{
TokenType type;
char* value;
unsigned int line;
} Token;
typedef struct
@@ -25,9 +47,12 @@ typedef struct
bool isNumber(char character);
bool isName(char character);
Token getCurrentToken(Tokenizer *tokenizer);
char* tokenTypeAsCStr(TokenType type);
char *parseName(Tokenizer *tokenizer);
char *parseNumber(Tokenizer *tokenizer);
void parseWhitespace(Tokenizer *tokenizer);
#endif // !TOKENIZE_H

46
src/file_utils.c Normal file
View File

@@ -0,0 +1,46 @@
#include "file_utils.h"
char *readStringFromFile(char* file_path) {
FILE *inputFile = fopen(file_path, "r");
if (inputFile == NULL) {
perror("SASM: failed to open file");
exit(1);
}
if (fseek(inputFile, 0, SEEK_END)) {
perror("SASM: failed to seek to end of file");
fclose(inputFile);
exit(1);
}
long file_size = ftell(inputFile);
if (file_size == -1) {
perror("SASM: error getting file size");
fclose(inputFile);
exit(1);
}
if (fseek(inputFile, 0, SEEK_SET) != 0) {
perror("SASM: failed to seek to start of file");
fclose(inputFile);
exit(1);
}
char *buffer = (char*)malloc(file_size + 1);
if (buffer == NULL) {
printf("SASM: failed to allocate memory");
fclose(inputFile);
exit(1);
}
size_t bytesRead = fread(buffer, 1, file_size, inputFile);
if (bytesRead != file_size) {
perror("SASM: error reading file");
free(buffer);
fclose(inputFile);
exit(1);
}
fclose(inputFile);
buffer[file_size+1] = '\0';
return buffer;
}

9
src/file_utils.h Normal file
View File

@@ -0,0 +1,9 @@
#ifndef FILE_UTILS_H
#define FILE_UTILS_H
#include <stdlib.h>
#include <stdio.h>
char *readStringFromFile(char* file_path);
#endif // !FILE_UTILS_H

View File

@@ -5,24 +5,20 @@
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]));
VMBL_Instruction program[] = {
MAKE_INST_PUSH(124),
MAKE_INST_PUSH(2),
MAKE_INST_DIV,
MAKE_INST_HALT
MAKE_INST_PUSH(0),
MAKE_INST_PUSH(1),
MAKE_INST_DUP(1),
MAKE_INST_DUP(1),
MAKE_INST_ADD,
MAKE_INST_JMP(2)
};
int main() {
//VMBL_State vmblState = {};
//VMBL_LoadExecutable(&vmblState, program, sizeof(program));
//VMBL_StartVM(&vmblState);
Tokenizer tokenizer = {
"push 1224\npush 2\ndiv\nhalt"
};
Token token = getCurrentToken(&tokenizer);
printf("%s\n", tokenTypeAsCStr(token.type));
VMBL_State vmblState = {};
VMBL_LoadExecutable(&vmblState, program, sizeof(program));
//VMBL_SaveExecutable("fib.vmbl", program, sizeof(program));
VMBL_StartVM(&vmblState);
return 0;
}

View File

@@ -1,4 +1,5 @@
#include "vmbl.h"
#include "file_utils.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -93,6 +94,9 @@ VMBL_Exception VBML_ExecuteInstruction(VMBL_State *vmblState, VMBL_Instruction i
break;
case INSTRUCTION_NOP:
break;
default:
return (VMBL_Exception) { EXCEPTION_INVALID_OPCODE };
break;
@@ -126,7 +130,7 @@ void VMBL_StartVM(VMBL_State *vmblState) {
VMBL_Instruction instruction = vmblState->program[vmblState->ip++];
printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]);
//printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]);
VMBL_Exception exception = VBML_ExecuteInstruction(vmblState, instruction);
@@ -156,8 +160,8 @@ void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_
vmblState->programSize = programSize;
}
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) {
FILE *file = fopen(filePath, "rb");
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath) {
/*FILE *file = fopen(filePath, "rb");
if (file == NULL) {
perror("VMBL: Failed to open file");
@@ -170,11 +174,13 @@ void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) {
size_t programSize = size / sizeof(vmblState->program[0]);
VMBL_Instruction program[programSize];
fread(program, sizeof(program[0]), programSize, file);
fread(program, sizeof(program[0]), programSize, file);*/
VMBL_LoadExecutable(vmblState, program, programSize);
VMBL_Instruction *program = (VMBL_Instruction*)readStringFromFile(filePath);
fclose(file);
VMBL_LoadExecutable(vmblState, program, sizeof(program));
//fclose(file);
}

View File

@@ -14,6 +14,8 @@ typedef int64_t Word;
typedef enum
{
INSTRUCTION_NOP,
// stack operations
INSTRUCTION_PUSH,
INSTRUCTION_ADD,
@@ -34,7 +36,7 @@ typedef enum
INSTRUCTION_LESS_THAN,
INSTRUCTION_LESS_THAN_EQUAL,
INSTRUCTION_GREATER_THAN,
INSTRUCTION_GREATER_THAN_EQUAL,
INSTRUCTION_GREATER_THAN_EQUAL
} InstructionType;
typedef struct
@@ -77,7 +79,7 @@ void VMBL_Dump(VMBL_State vmblState, VMBL_Exception exception);
void VMBL_StartVM(VMBL_State *vmblState);
void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_t programSize);
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath);
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath);
void VMBL_SaveExecutable(const char* filePath, VMBL_Instruction *program, size_t programSize);
char *instructionTypeToCStr(InstructionType type);

BIN
vmbl

Binary file not shown.