deleted SASM
This commit is contained in:
@@ -9,7 +9,8 @@
|
||||
But uhh that's the name of the VM itself, the name of the programming language I made is Sylt, named after the German island.
|
||||
|
||||
To compile the VM for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/file_utils.c -o vmbl -O3`
|
||||
To compile SASM for Linux: `gcc src/asm/sasm.c src/asm/instructions.c src/file_utils.c src/asm/assembler.c -o sasm -O3`
|
||||
|
||||
SASM and Sylt are written in Python for now as I'm too mentally challenged to write C code rn.
|
||||
|
||||
## Syntax
|
||||
### Example "Hello, World!" Program
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
#include "assembler.h"
|
||||
#include "instructions.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
VMBL_Instruction assembleLine(char *line) {
|
||||
char *instName = strtok(line, " ");
|
||||
printf("%s\n", instName);
|
||||
}
|
||||
|
||||
void assemble(char *sourceCode, VMBL_Instruction program[], size_t programCapacity) {
|
||||
char *line = strtok(sourceCode, "\n");
|
||||
|
||||
int i = 0;
|
||||
while (line != NULL)
|
||||
{
|
||||
program[i++] = assembleLine(line);
|
||||
line = strtok(NULL, "\n");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef ASSEMBLER_H
|
||||
#define ASSEMBLER_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include "../vmbl.h"
|
||||
|
||||
|
||||
VMBL_Instruction assembleLine(char *line);
|
||||
void assemble(char *sourceCode, VMBL_Instruction program[], size_t programCapacity);
|
||||
|
||||
|
||||
#endif // !ASSEMBLER_H
|
||||
@@ -1,12 +0,0 @@
|
||||
#include "instructions.h"
|
||||
#include <string.h>
|
||||
|
||||
InstructionType instructionNameToType(char* instName) {
|
||||
for (size_t i = 0; i < sizeof(instruction_table)/sizeof(instruction_table[0]); i++) {
|
||||
|
||||
if (strcmp(instName, instruction_table[i].mnemonic) == 0) {
|
||||
return (InstructionType)i;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
#ifndef INSTRUCTIONS_H
|
||||
#define INSTRUCTIONS_H
|
||||
|
||||
#include "../vmbl.h"
|
||||
|
||||
#define MAX_ARGS 3
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ARG_TYPE_NONE,
|
||||
ARG_TYPE_INT
|
||||
} ArgType;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *mnemonic;
|
||||
uint8_t argCount;
|
||||
ArgType args[MAX_ARGS];
|
||||
} InstructionInfo;
|
||||
|
||||
static const InstructionInfo instruction_table[] = {
|
||||
[INSTRUCTION_NOP] = { "nop", 0, { ARG_TYPE_NONE } },
|
||||
[INSTRUCTION_PUSH] = { "push", 1, { ARG_TYPE_INT } },
|
||||
[INSTRUCTION_DROP] = { "drop", 1, { ARG_TYPE_INT } },
|
||||
[INSTRUCTION_ADD] = { "add", 0, { ARG_TYPE_NONE } },
|
||||
[INSTRUCTION_SUB] = { "sub", 0, { ARG_TYPE_NONE } },
|
||||
[INSTRUCTION_MUL] = { "mul", 0, { ARG_TYPE_NONE } },
|
||||
[INSTRUCTION_DIV] = { "div", 0, { ARG_TYPE_NONE } },
|
||||
[INSTRUCTION_DUPLICATE] = { "dup", 1, { ARG_TYPE_INT } },
|
||||
[INSTRUCTION_HALT] = { "halt", 0, { ARG_TYPE_NONE } },
|
||||
};
|
||||
|
||||
InstructionType instructionNameToType(char* instName);
|
||||
|
||||
#endif // !INSTRUCTIONS_H
|
||||
@@ -1,29 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../file_utils.h"
|
||||
#include "assembler.h"
|
||||
#include "../vmbl.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 3) {
|
||||
printf("Usage: sasm <file_path> <output_path>\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *buffer = readStringFromFile(argv[1]);
|
||||
|
||||
//printf("%s\n", buffer);
|
||||
/*Tokenizer tokenizer = {
|
||||
.source = buffer,
|
||||
.column = 1,
|
||||
.line = 1,
|
||||
.pos = 0
|
||||
};*/
|
||||
|
||||
VMBL_Instruction program[VMBL_PROGRAM_SIZE];
|
||||
|
||||
assemble(buffer, program, VMBL_PROGRAM_SIZE);
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,184 +0,0 @@
|
||||
#include "tokenize.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/*
|
||||
Returns a string buffer containing the name that was parsed.
|
||||
* IMPORTANT: remember to free the buffer when you're done with it!
|
||||
*/
|
||||
char *parseName(Tokenizer *tokenizer) {
|
||||
char *ptr = tokenizer->source;
|
||||
|
||||
// loop over the string until we hit something that isn't a name
|
||||
int i = 0;
|
||||
while (isName(*ptr++))
|
||||
{
|
||||
i++;
|
||||
tokenizer->column++;
|
||||
tokenizer->pos++;
|
||||
|
||||
}
|
||||
|
||||
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
|
||||
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "SASM: failed to allocate memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer = strncpy(buffer, tokenizer->source, i);
|
||||
|
||||
tokenizer->source += i;
|
||||
|
||||
return buffer;
|
||||
|
||||
}
|
||||
|
||||
char *parseNumber(Tokenizer *tokenizer) {
|
||||
int i = 0;
|
||||
char *ptr = tokenizer->source;
|
||||
|
||||
while (*ptr)
|
||||
{
|
||||
if (*ptr == '\n') {
|
||||
//tokenizer->column = 1;
|
||||
tokenizer->pos++;
|
||||
//tokenizer->line++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isspace(*ptr))
|
||||
break;
|
||||
|
||||
i++;
|
||||
tokenizer->column++;
|
||||
tokenizer->pos++;
|
||||
*ptr++;
|
||||
}
|
||||
|
||||
char *buffer = (char*)malloc(sizeof(char) * (i + 1)); // add 1 byte for null terminator
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "SASM: failed to allocate memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
buffer = strncpy(buffer, tokenizer->source, i);
|
||||
|
||||
tokenizer->source += i;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void parseWhitespace(Tokenizer *tokenizer) {
|
||||
tokenizer->pos++;
|
||||
tokenizer->column++;
|
||||
*tokenizer->source++;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isNumber(char character) {
|
||||
return isdigit(character) || character == '.';
|
||||
}
|
||||
|
||||
bool isName(char character) {
|
||||
return isalnum(character) || character == '_';
|
||||
}
|
||||
|
||||
Token getCurrentToken(Tokenizer *tokenizer) {
|
||||
Token token = (Token){
|
||||
.line = tokenizer->line,
|
||||
};
|
||||
|
||||
if (!*tokenizer->source) {
|
||||
token.type = TOKEN_EOF;
|
||||
return token;
|
||||
}
|
||||
|
||||
|
||||
|
||||
switch (*tokenizer->source)
|
||||
{
|
||||
|
||||
case ' ':
|
||||
parseWhitespace(tokenizer);
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\t':
|
||||
parseWhitespace(tokenizer);
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\n':
|
||||
tokenizer->column = 1;
|
||||
tokenizer->line++;
|
||||
tokenizer->pos++;
|
||||
*tokenizer->source++;
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
case '\r':
|
||||
tokenizer->pos++;
|
||||
*tokenizer->source++;
|
||||
return getCurrentToken(tokenizer);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (isalpha(*tokenizer->source)) {
|
||||
token.columnStart = tokenizer->column;
|
||||
|
||||
char *tokenValue = parseName(tokenizer);
|
||||
token.value = tokenValue;
|
||||
|
||||
// check if the token is in the list of instruction names
|
||||
token.type = TOKEN_NAME; // by default the token is a name until we find an instruction
|
||||
for (int i = 0; i < sizeof(INSTRUCTION_NAMES)/sizeof(INSTRUCTION_NAMES[0]); i++) {
|
||||
|
||||
// if we found an instruction with the same name as the token
|
||||
if (strcmp(INSTRUCTION_NAMES[i], tokenValue) == 0) {
|
||||
token.type = TOKEN_INSTRUCTION;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
} else if (isdigit(*tokenizer->source)) {
|
||||
token.columnStart = token.columnStart;
|
||||
char *tokenValue = parseNumber(tokenizer);
|
||||
token.value = tokenValue;
|
||||
|
||||
// TODO: floating point numbers
|
||||
token.type = TOKEN_INT_LITERAL;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid token `%c` on line %d, column %d\n", *tokenizer->source, tokenizer->line, tokenizer->column);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
char* tokenTypeAsCStr(TokenType type) {
|
||||
switch (type)
|
||||
{
|
||||
case TOKEN_INSTRUCTION:
|
||||
return "INSTRUCTION";
|
||||
break;
|
||||
|
||||
case TOKEN_INT_LITERAL:
|
||||
return "INT_LITERAL";
|
||||
break;
|
||||
|
||||
case TOKEN_EOF:
|
||||
return "<EOF>";
|
||||
break;
|
||||
|
||||
case TOKEN_NAME:
|
||||
return "NAME";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
#ifndef TOKENIZE_H
|
||||
#define TOKENIZE_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
static char *INSTRUCTION_NAMES[] = {
|
||||
"push",
|
||||
"drop",
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"div",
|
||||
"eq",
|
||||
"neq",
|
||||
"gt",
|
||||
"gte",
|
||||
"lt",
|
||||
"lte",
|
||||
"dup",
|
||||
"jc",
|
||||
"halt",
|
||||
"jump"
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
TOKEN_EOF, // just used when looping over the tokens list
|
||||
TOKEN_INSTRUCTION,
|
||||
TOKEN_NAME,
|
||||
TOKEN_INT_LITERAL,
|
||||
} TokenType;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TokenType type;
|
||||
char* value;
|
||||
unsigned int line;
|
||||
unsigned int columnStart;
|
||||
} Token;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *source;
|
||||
unsigned int pos;
|
||||
unsigned int line;
|
||||
unsigned int column;
|
||||
} Tokenizer;
|
||||
|
||||
bool isNumber(char character);
|
||||
bool isName(char character);
|
||||
|
||||
Token getCurrentToken(Tokenizer *tokenizer);
|
||||
char* tokenTypeAsCStr(TokenType type);
|
||||
|
||||
char *parseName(Tokenizer *tokenizer);
|
||||
char *parseNumber(Tokenizer *tokenizer);
|
||||
void parseWhitespace(Tokenizer *tokenizer);
|
||||
|
||||
#endif // !TOKENIZE_H
|
||||
Reference in New Issue
Block a user