starting work on assembler
This commit is contained in:
17
src/asm/assembler.c
Normal file
17
src/asm/assembler.c
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "assembler.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void assemble(Tokenizer *tokenizer) {
|
||||
Token token = getCurrentToken(tokenizer);
|
||||
while (token.type != TOKEN_EOF)
|
||||
{
|
||||
//printf("%s: %s - line %d\n", tokenTypeAsCStr(token.type), token.value, token.line);
|
||||
|
||||
|
||||
//free(token.value);
|
||||
token = getCurrentToken(tokenizer);
|
||||
|
||||
|
||||
}
|
||||
free(token.value);
|
||||
}
|
||||
8
src/asm/assembler.h
Normal file
8
src/asm/assembler.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef ASSEMBLER_H
|
||||
#define ASSEMBLER_H
|
||||
|
||||
#include "tokenize.h"
|
||||
|
||||
void assemble(Tokenizer *tokenizer);
|
||||
|
||||
#endif // !ASSEMBLER_H
|
||||
18
src/asm/instructions.h
Normal file
18
src/asm/instructions.h
Normal file
@@ -0,0 +1,18 @@
|
||||
#ifndef INSTRUCTIONS_H
|
||||
#define INSTRUCTIONS_H
|
||||
|
||||
#define MAX_ARGS 3
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ARG_TYPE_INT
|
||||
} ArgType;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *mnemonic,
|
||||
uint8_t argCount,
|
||||
ArgType args[MAX_ARGS]
|
||||
} InstructionInfo;
|
||||
|
||||
#endif // !INSTRUCTIONS_H
|
||||
27
src/asm/sasm.c
Normal file
27
src/asm/sasm.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "tokenize.h"
|
||||
#include "../file_utils.h"
|
||||
#include "assembler.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 3) {
|
||||
printf("Usage: sasm <file_path> <output_path>\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *buffer = readStringFromFile(argv[1]);
|
||||
|
||||
//printf("%s\n", buffer);
|
||||
Tokenizer tokenizer = {
|
||||
.source = buffer,
|
||||
.column = 1,
|
||||
.line = 1,
|
||||
.pos = 0
|
||||
};
|
||||
|
||||
assemble(&tokenizer);
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,24 +1,86 @@
|
||||
#include "tokenize.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/*
|
||||
Returns a string buffer containing the name that was parsed.
|
||||
* IMPORTANT: remember to free the buffer when you're done with it!
|
||||
*/
|
||||
char *parseName(Tokenizer *tokenizer) {
|
||||
char *ptr = tokenizer->source;
|
||||
|
||||
// loop over the string until we hit something that isn't a name
|
||||
int i = 0;
|
||||
while (isName(*ptr++))
|
||||
{
|
||||
i++;
|
||||
tokenizer->column++;
|
||||
tokenizer->pos++;
|
||||
|
||||
}
|
||||
|
||||
printf("%d\n", i);
|
||||
return "hi";
|
||||
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
|
||||
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "SASM: failed to allocate memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer = strncpy(buffer, tokenizer->source, i);
|
||||
|
||||
tokenizer->source += i;
|
||||
|
||||
return buffer;
|
||||
|
||||
}
|
||||
|
||||
char *parseNumber(Tokenizer *tokenizer) {
|
||||
int i = 0;
|
||||
char *ptr = tokenizer->source;
|
||||
|
||||
while (*ptr)
|
||||
{
|
||||
if (*ptr == '\n') {
|
||||
//tokenizer->column = 1;
|
||||
tokenizer->pos++;
|
||||
//tokenizer->line++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isspace(*ptr))
|
||||
break;
|
||||
|
||||
i++;
|
||||
tokenizer->column++;
|
||||
tokenizer->pos++;
|
||||
*ptr++;
|
||||
}
|
||||
|
||||
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "SASM: failed to allocate memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer = strncpy(buffer, tokenizer->source, i);
|
||||
|
||||
tokenizer->source += i;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void parseWhitespace(Tokenizer *tokenizer) {
|
||||
tokenizer->pos++;
|
||||
tokenizer->column++;
|
||||
*tokenizer->source++;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isNumber(char character) {
|
||||
return isalnum(character) || character == '.';
|
||||
return isdigit(character) || character == '.';
|
||||
}
|
||||
|
||||
bool isName(char character) {
|
||||
@@ -26,17 +88,72 @@ bool isName(char character) {
|
||||
}
|
||||
|
||||
Token getCurrentToken(Tokenizer *tokenizer) {
|
||||
Token token = (Token){};
|
||||
Token token = (Token){
|
||||
.line = tokenizer->line,
|
||||
};
|
||||
|
||||
if (!*tokenizer->source) {
|
||||
token.type = TOKEN_EOF;
|
||||
return token;
|
||||
}
|
||||
|
||||
|
||||
|
||||
switch (*tokenizer->source)
|
||||
{
|
||||
|
||||
case ' ':
|
||||
parseWhitespace(tokenizer);
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\t':
|
||||
parseWhitespace(tokenizer);
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\n':
|
||||
tokenizer->column = 1;
|
||||
tokenizer->line++;
|
||||
tokenizer->pos++;
|
||||
*tokenizer->source++;
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\r':
|
||||
tokenizer->pos++;
|
||||
*tokenizer->source++;
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (isName(*tokenizer->source)) {
|
||||
if (isalpha(*tokenizer->source)) {
|
||||
char *tokenValue = parseName(tokenizer);
|
||||
token.value = tokenValue;
|
||||
|
||||
// check if the token is in the list of instruction names
|
||||
token.type = TOKEN_NAME; // by default the token is a name until we find an instruction
|
||||
for (int i = 0; i < sizeof(INSTRUCTION_NAMES)/sizeof(INSTRUCTION_NAMES[0]); i++) {
|
||||
|
||||
// if we found an instruction with the same name as the token
|
||||
if (strcmp(INSTRUCTION_NAMES[i], tokenValue) == 0) {
|
||||
token.type = TOKEN_INSTRUCTION;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
} else if (isdigit(*tokenizer->source)) {
|
||||
char *tokenValue = parseNumber(tokenizer);
|
||||
token.value = tokenValue;
|
||||
|
||||
// TODO: floating point numbers
|
||||
token.type = TOKEN_INT_LITERAL;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid token `%c` on line %d, column %d\n", *tokenizer->source, tokenizer->line, tokenizer->column);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
char* tokenTypeAsCStr(TokenType type) {
|
||||
@@ -49,6 +166,14 @@ char* tokenTypeAsCStr(TokenType type) {
|
||||
case TOKEN_INT_LITERAL:
|
||||
return "INT_LITERAL";
|
||||
break;
|
||||
|
||||
case TOKEN_EOF:
|
||||
return "<EOF>";
|
||||
break;
|
||||
|
||||
case TOKEN_NAME:
|
||||
return "NAME";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -3,9 +3,30 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
static char *INSTRUCTION_NAMES[] = {
|
||||
"push",
|
||||
"drop",
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"div",
|
||||
"eq",
|
||||
"neq",
|
||||
"gt",
|
||||
"gte",
|
||||
"lt",
|
||||
"lte",
|
||||
"dup",
|
||||
"jc",
|
||||
"halt",
|
||||
"jump"
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
TOKEN_EOF, // just used when looping over the tokens list
|
||||
TOKEN_INSTRUCTION,
|
||||
TOKEN_NAME,
|
||||
TOKEN_INT_LITERAL,
|
||||
} TokenType;
|
||||
|
||||
@@ -13,6 +34,7 @@ typedef struct
|
||||
{
|
||||
TokenType type;
|
||||
char* value;
|
||||
unsigned int line;
|
||||
} Token;
|
||||
|
||||
typedef struct
|
||||
@@ -25,9 +47,12 @@ typedef struct
|
||||
|
||||
bool isNumber(char character);
|
||||
bool isName(char character);
|
||||
|
||||
Token getCurrentToken(Tokenizer *tokenizer);
|
||||
char* tokenTypeAsCStr(TokenType type);
|
||||
|
||||
char *parseName(Tokenizer *tokenizer);
|
||||
char *parseNumber(Tokenizer *tokenizer);
|
||||
void parseWhitespace(Tokenizer *tokenizer);
|
||||
|
||||
#endif // !TOKENIZE_H
|
||||
|
||||
46
src/file_utils.c
Normal file
46
src/file_utils.c
Normal file
@@ -0,0 +1,46 @@
|
||||
#include "file_utils.h"
|
||||
|
||||
char *readStringFromFile(char* file_path) {
|
||||
FILE *inputFile = fopen(file_path, "r");
|
||||
if (inputFile == NULL) {
|
||||
perror("SASM: failed to open file");
|
||||
exit(1);
|
||||
}
|
||||
if (fseek(inputFile, 0, SEEK_END)) {
|
||||
perror("SASM: failed to seek to end of file");
|
||||
fclose(inputFile);
|
||||
exit(1);
|
||||
}
|
||||
long file_size = ftell(inputFile);
|
||||
if (file_size == -1) {
|
||||
perror("SASM: error getting file size");
|
||||
fclose(inputFile);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fseek(inputFile, 0, SEEK_SET) != 0) {
|
||||
perror("SASM: failed to seek to start of file");
|
||||
fclose(inputFile);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
char *buffer = (char*)malloc(file_size + 1);
|
||||
if (buffer == NULL) {
|
||||
printf("SASM: failed to allocate memory");
|
||||
fclose(inputFile);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t bytesRead = fread(buffer, 1, file_size, inputFile);
|
||||
if (bytesRead != file_size) {
|
||||
perror("SASM: error reading file");
|
||||
free(buffer);
|
||||
fclose(inputFile);
|
||||
exit(1);
|
||||
}
|
||||
fclose(inputFile);
|
||||
|
||||
buffer[file_size+1] = '\0';
|
||||
|
||||
return buffer;
|
||||
}
|
||||
9
src/file_utils.h
Normal file
9
src/file_utils.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#ifndef FILE_UTILS_H
|
||||
#define FILE_UTILS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
char *readStringFromFile(char* file_path);
|
||||
|
||||
#endif // !FILE_UTILS_H
|
||||
26
src/main.c
26
src/main.c
@@ -5,24 +5,20 @@
|
||||
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]));
|
||||
|
||||
VMBL_Instruction program[] = {
|
||||
MAKE_INST_PUSH(124),
|
||||
MAKE_INST_PUSH(2),
|
||||
MAKE_INST_DIV,
|
||||
MAKE_INST_HALT
|
||||
MAKE_INST_PUSH(0),
|
||||
MAKE_INST_PUSH(1),
|
||||
MAKE_INST_DUP(1),
|
||||
MAKE_INST_DUP(1),
|
||||
MAKE_INST_ADD,
|
||||
MAKE_INST_JMP(2)
|
||||
};
|
||||
|
||||
int main() {
|
||||
//VMBL_State vmblState = {};
|
||||
|
||||
//VMBL_LoadExecutable(&vmblState, program, sizeof(program));
|
||||
//VMBL_StartVM(&vmblState);
|
||||
|
||||
Tokenizer tokenizer = {
|
||||
"push 1224\npush 2\ndiv\nhalt"
|
||||
};
|
||||
Token token = getCurrentToken(&tokenizer);
|
||||
|
||||
printf("%s\n", tokenTypeAsCStr(token.type));
|
||||
VMBL_State vmblState = {};
|
||||
|
||||
VMBL_LoadExecutable(&vmblState, program, sizeof(program));
|
||||
//VMBL_SaveExecutable("fib.vmbl", program, sizeof(program));
|
||||
VMBL_StartVM(&vmblState);
|
||||
|
||||
return 0;
|
||||
}
|
||||
18
src/vmbl.c
18
src/vmbl.c
@@ -1,4 +1,5 @@
|
||||
#include "vmbl.h"
|
||||
#include "file_utils.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -93,6 +94,9 @@ VMBL_Exception VBML_ExecuteInstruction(VMBL_State *vmblState, VMBL_Instruction i
|
||||
|
||||
break;
|
||||
|
||||
case INSTRUCTION_NOP:
|
||||
break;
|
||||
|
||||
default:
|
||||
return (VMBL_Exception) { EXCEPTION_INVALID_OPCODE };
|
||||
break;
|
||||
@@ -126,7 +130,7 @@ void VMBL_StartVM(VMBL_State *vmblState) {
|
||||
|
||||
|
||||
VMBL_Instruction instruction = vmblState->program[vmblState->ip++];
|
||||
printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]);
|
||||
//printf("%s 0x%lx, 0x%lx, 0x%lx\n", instructionTypeToCStr(instruction.type), instruction.opperands[0], instruction.opperands[1], instruction.opperands[2]);
|
||||
|
||||
VMBL_Exception exception = VBML_ExecuteInstruction(vmblState, instruction);
|
||||
|
||||
@@ -156,8 +160,8 @@ void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_
|
||||
vmblState->programSize = programSize;
|
||||
}
|
||||
|
||||
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) {
|
||||
FILE *file = fopen(filePath, "rb");
|
||||
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath) {
|
||||
/*FILE *file = fopen(filePath, "rb");
|
||||
|
||||
if (file == NULL) {
|
||||
perror("VMBL: Failed to open file");
|
||||
@@ -170,11 +174,13 @@ void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath) {
|
||||
|
||||
size_t programSize = size / sizeof(vmblState->program[0]);
|
||||
VMBL_Instruction program[programSize];
|
||||
fread(program, sizeof(program[0]), programSize, file);
|
||||
fread(program, sizeof(program[0]), programSize, file);*/
|
||||
|
||||
VMBL_LoadExecutable(vmblState, program, programSize);
|
||||
VMBL_Instruction *program = (VMBL_Instruction*)readStringFromFile(filePath);
|
||||
|
||||
fclose(file);
|
||||
VMBL_LoadExecutable(vmblState, program, sizeof(program));
|
||||
|
||||
//fclose(file);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ typedef int64_t Word;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INSTRUCTION_NOP,
|
||||
|
||||
// stack operations
|
||||
INSTRUCTION_PUSH,
|
||||
INSTRUCTION_ADD,
|
||||
@@ -34,7 +36,7 @@ typedef enum
|
||||
INSTRUCTION_LESS_THAN,
|
||||
INSTRUCTION_LESS_THAN_EQUAL,
|
||||
INSTRUCTION_GREATER_THAN,
|
||||
INSTRUCTION_GREATER_THAN_EQUAL,
|
||||
INSTRUCTION_GREATER_THAN_EQUAL
|
||||
} InstructionType;
|
||||
|
||||
typedef struct
|
||||
@@ -77,7 +79,7 @@ void VMBL_Dump(VMBL_State vmblState, VMBL_Exception exception);
|
||||
void VMBL_StartVM(VMBL_State *vmblState);
|
||||
|
||||
void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_t programSize);
|
||||
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, const char* filePath);
|
||||
void VMBL_LoadExecutableFromFile(VMBL_State *vmblState, char* filePath);
|
||||
void VMBL_SaveExecutable(const char* filePath, VMBL_Instruction *program, size_t programSize);
|
||||
|
||||
char *instructionTypeToCStr(InstructionType type);
|
||||
|
||||
Reference in New Issue
Block a user