From 9ccf6c558f2e7e5f9fdb5a513f729b1071b56b69 Mon Sep 17 00:00:00 2001 From: DiamondNether90 Date: Mon, 22 Dec 2025 16:55:09 +1100 Subject: [PATCH] Another failed attempt --- README.md | 24 +++++ docs/syntax.md | 59 +++++++++++++ src/compiler.py | 201 +++++++++++++++++------------------------- src/lexer.py | 1 + src/main.py | 4 +- tests/helloworld.basm | 14 +++ tests/pointer.basm | 2 + 7 files changed, 182 insertions(+), 123 deletions(-) create mode 100644 README.md create mode 100644 docs/syntax.md mode change 100644 => 100755 src/main.py create mode 100644 tests/helloworld.basm create mode 100644 tests/pointer.basm diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd47963 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Brain Assembly + +Brain Assembly is a ground-like language that compiles to Brainfuck. It provides variable creation, C-like pointers, and other tools that make it feel more familiar to use. + +# Setup + +```bash +git clone https://chookspace.com/DiamondNether90/BrainAssemblyV2 BrainAssembly +cd BrainAssembly +sudo ln -s (pwd)/src/main.py /usr/local/bin/basm +``` + +To compile a file, run: +```bash +basm (filename) (writefile) (flags) +``` +Example usage: +```bash +# Compile tests/test.basm and write code to test.bf: +basm tests/test.basm test.bf +``` + +# Syntax +Syntax can be found in `docs/syntax.md` \ No newline at end of file diff --git a/docs/syntax.md b/docs/syntax.md new file mode 100644 index 0000000..62d4bda --- /dev/null +++ b/docs/syntax.md @@ -0,0 +1,59 @@ +# Syntax Overview + +Each expression of code is separated by a semicolon. Multiple expressions may be in one line. + +The syntax for each command is ` `. Each argument is separated by a space. + +Use the `#` character for comments. + +# Commands + +### raw + +Syntax: `raw ` + +Directly appends Brainfuck to the compiled code. + +Example usage: `raw >>[>+<-]<<` + +### li(num) + +Syntax: `li(num) ...` + +The `li` command allows you to allocate memory to a specific variable. The number after `li` dictates the dimensions of a list. For a non-list, use `li0`. + +The number of malloc arguments should be equal to the dimensions. The first number dictates the bytes allocated for the entire list, the second number dictates the bytes allocated for the sublists, and so on. + +For example, to allocate memory for `myList = [[int, int, int], [int, int, int], [int, int, int]]`, you would use + +```basm +li2 int myList 36 12; # 36 for entire list (9 4-byte ints), 12 for sublists (3 4-byte ints) +``` + +Valid types are char, bool and int. + +**Note: You cannot change the number of bytes allocated later. There are no safeguards to prevent reading outside of allocated memory, such as into the next variable or other miscellanous garbage data.** + +### set + +Syntax: `set ` + +The set command allows you to set a variable to a value. Requires the same number of list indices as list dimensions. + +Enclose strings in single quotes. + +Currently only literals are supported. This will be changed in the future. + +Example usage: +```basm +li1 int nums 100; +set nums 0 927; +``` + +### ptr + +The ptr command allows you to manipulate the main pointer. + +Syntax: `ptr ` + +`ptr set `: Set the position of the pointer. \ No newline at end of file diff --git a/src/compiler.py b/src/compiler.py index ec91e44..f6c7cd9 100644 --- a/src/compiler.py +++ b/src/compiler.py @@ -3,13 +3,16 @@ from enum import Enum import sys # Note: Layout of cells is as follows -# [0] [input...] [0] [data start] +# [0] [input...] [0] [ptr: int] [1] [0] [data start] # ptr_address is relative to the first cell of data +# Each data byte is actually two bytes, stored as [0] [actualData] +# The pointer is by default on the [1] after the ptr pos. def compile(code: list[list[str]]): - returnval = '>,[>,]>' + offset: int = 5 + + returnval = '[ This code was generated using the Brain Assembly compiler\nhttps://chookspace.com/DiamondNether90/BrainAssemblyV2 ]\n\n+>>>,[>>,]>' + '>>' * offset + '>>>>+' used_bytes = 0 - ptr_address = 0 class Types(Enum): Bool = 'Boolean' @@ -22,7 +25,7 @@ def compile(code: list[list[str]]): 'int': Types.Int } - bytes = { + bytes: dict[Types, int] = { Types.Bool: 1, Types.Char: 1, Types.Int: 4, @@ -42,13 +45,6 @@ def compile(code: list[list[str]]): def __repr__(self): return f'({self.name}, {self.dimensions}, {self.type.value}, {hex(self.address)}, {self.malloc})' - class Constant: - def __init__(self, name: str, type: Types): - self.name = name - self.type = type - def __repr__(self): - return f'({self.name}, {self.type.value})' - var: dict[str, Variable] = { } @@ -59,7 +55,7 @@ def compile(code: list[list[str]]): keyword = line[0] args = len(line) if keyword[0] + keyword[1] == 'li': - if len(line) < 3: + if args < 3: sys.exit(err.error('li command syntax is "li(dim) "')) # Check dimensions try: @@ -82,125 +78,86 @@ def compile(code: list[list[str]]): # Create new variable var[line[2]] = Variable(line[2], dim, types[line[1]], used_bytes, malloc) - elif keyword == 'ptr': - match line[1]: - case 'mv': - if len(line) < 3: - sys.exit(err.error('ptr mv requires variable name or int')) - target: str | int = line[2] - try: - target = int(target) - except ValueError: - try: - vartarget = var[target] - target = vartarget.address - i = 0 - try: - for i in range(vartarget.dimensions): - target += vartarget.malloc[i+1] * int(line[3+i]) - except IndexError: - sys.exit(err.error(f'ptr mv requires the same number of list indexes as dimensions (got {i})')) - except ValueError: - sys.exit(err.error(f'ptr mv: Expected list index, got {list[3+i]}')) - except KeyError: - sys.exit(err.error(f'ptr mv requires variable name or int (got {target})')) - else: - pass - # Currently, target points to the start of the variable - # For lists, we may want the start of a specific item - - # Move pointer - if ptr_address < target: - returnval += '>' * (target - ptr_address) - elif ptr_address > target: - returnval += '<' * (ptr_address - target) - ptr_address = target - case 'add': - if len(line) < 3: - sys.exit(err.error('ptr add requires int')) - - try: - target = int(line[2]) - except ValueError: - sys.exit(err.error(f'ptr add: Expected int, got {line[2]}')) - - if target > 0: - ptr_address += target - returnval += '>' * target - elif target < 0: - ptr_address -= target - returnval += '<' * target - case 'subtract': - if len(line) < 3: - sys.exit(err.error('ptr subtract requires int')) - - try: - target = int(line[2]) - except ValueError: - sys.exit(err.error(f'ptr subtract: Expected int, got {line[2]}')) - - if target > 0: - ptr_address -= target - returnval += '<' * target - elif target < 0: - ptr_address += target - returnval += '>' * target - case 'back': - if len(line) != 2: - sys.exit(err.error(f'ptr back does not take additional arguments (got {line[2:]})')) - case _: - sys.exit(err.error(f'Expected pointer action, got {line[1]}')) + elif keyword == 'raw': + if args != 2: + sys.exit(err.error('raw syntax: raw ')) + returnval += line[1] elif keyword == 'set': - # Get variable + if args < 3: + sys.exit(err.error('set syntax: set ')) + # Get variable address try: - vartarget = var[line[1]] + argvar = var[line[1]] except KeyError: - sys.exit(err.error(f'set: Expected var, got {line[1]}')) - - if args != 3 + vartarget.dimensions: - sys.exit(err.error('set syntax: set ')) - - # Target cell for ptr is var.address + line[2] * var.malloc[0] + line[3] * var.malloc[1] + ... - target = vartarget.address - for i in range(vartarget.dimensions): + sys.exit(err.error(f'set: Could not find variable {line[1]}')) + if args != 3 + argvar.dimensions: + sys.exit(err.error(f'Not enough indices for variable {line[1]}')) + address = argvar.address + for i in range(argvar.dimensions): try: - target += int(line[i+2]) * vartarget.malloc[i+1] + address += argvar.malloc[i+1] * int(line[2+i]) except ValueError: - sys.exit(f'set: Expected list index, got {line[i+2]}') - - # Move pointer - if ptr_address < target: - returnval += '>' * (target - ptr_address) - elif ptr_address > target: - returnval += '<' * (ptr_address - target) - ptr_address = target + sys.exit(err.error(f'set {line[1]}: Expected list index, got {line[2+i]}')) + returnval += '>>' * (address + 1) - # Do stuff based on type - match vartarget.type: - case Types.Char: - returnval += '[-]' + '+' * ord(line[-1]) - case _: - sys.exit(err.error(f'set: {vartarget.type.value} type not implemented yet')) - elif keyword == 'setbyte': - if len(line) < 2: - sys.exit(err.error('setbyte command requires two arguments')) + # Get data try: - returnval += '[-]' + '+' * int(line[1]) + temp = int(line[-1]) except ValueError: - try: - returnval += '[-]' + '+' * ord(line[1]) - except TypeError: - sys.exit(err.error(f'setbyte: Expected int or char, got {line[1]}')) - elif keyword == 'print': - try: - type = types[line[1]] - except KeyError: - sys.exit(err.error(f'print: Invalid type: {line[1]}')) - - if type == Types.Char: - returnval += '.' + if line[-1][0] == line[-1][-1] == '\'': + try: + value = [ord(line[-1][1:-1])] + except TypeError: + sys.exit(err.error(f'set: Expected literal, got {line[-1]}')) + else: + sys.exit(err.error(f'set: Expected literal, got {line[-1]}')) else: - sys.exit(err.error(f'print: {line[1]} not implemented')) + value = [ + (temp >> 24) & 0xFF, + (temp >> 16) & 0xFF, + (temp >> 8) & 0xFF, + temp & 0xFF, + ] + + i = 0 + for i in range(len(value)): + returnval += '>[-]' + '+' * value[i] + '>' + returnval += '<<' * (i + address + 2) + elif keyword == 'inc': + if len(line) != 2: + sys.exit(err.error('inc syntax: inc ')) + returnval += '>>' + elif keyword == 'ptr': + if args < 2: + sys.exit(err.error('ptr syntax: ptr ')) + action = line[1] + match action: + case 'set': + if args != 3: + sys.exit(err.error('ptr set syntax: ptr set ')) + try: + pos = int(line[2]) + except ValueError: + sys.exit(err.error('ptr set var not implemented yet')) + else: + byte_array = [ + (pos >> 24) & 0xFF, + (pos >> 16) & 0xFF, + (pos >> 8) & 0xFF, + pos & 0xFF, + ] + for i in range(4): + returnval += '<[-]' + '+' * byte_array[-1-i] + returnval += '>>>>' + + case 'mv*': + # We want to actually move the pointer to the set value. + # First copy the pointer position + returnval += '<<<<[<<<<+>>>+>-]<[>+<-]>>' + returnval += '[<<<<+>>+>>-]<<[>>+<<-]>>>' + returnval += '[<<<<+>+>>>-]<<<[>>>+<<<-]>>>>' + returnval += '[<<<<+>>>>->>+<<]>>[<<+>>-]<' + else: sys.exit(err.error(f'Invalid command: {keyword}')) diff --git a/src/lexer.py b/src/lexer.py index 5bda05a..8573eb6 100644 --- a/src/lexer.py +++ b/src/lexer.py @@ -18,6 +18,7 @@ def tokenise(code: str) -> list[list[str]]: token = '' case '\'': isString = not isString + token += '\'' case '#': isComment = True case ';': diff --git a/src/main.py b/src/main.py old mode 100644 new mode 100755 index 375e66d..a07182a --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import sys import lexer import compiler @@ -5,7 +7,7 @@ import error as err def main(argc: int, argv: list[str]): if argc < 3: - return f'Usage: python {argv[0]} (filename) (writefile) (flags)' + return f'Usage: {argv[0]} (filename) (writefile) (flags)' try: with open(argv[1], 'r') as file: diff --git a/tests/helloworld.basm b/tests/helloworld.basm new file mode 100644 index 0000000..eea5936 --- /dev/null +++ b/tests/helloworld.basm @@ -0,0 +1,14 @@ +li1 char hello_world 13; +set hello_world 1 'H'; +set hello_world 2 'e'; +set hello_world 3 'l'; +set hello_world 4 'l'; +set hello_world 5 'o'; +set hello_world 6 ','; +set hello_world 7 ' '; +set hello_world 8 'w'; +set hello_world 9 'o'; +set hello_world 10 'r'; +set hello_world 11 'l'; +set hello_world 12 'd'; +set hello_world 13 '!'; \ No newline at end of file diff --git a/tests/pointer.basm b/tests/pointer.basm new file mode 100644 index 0000000..26c49a7 --- /dev/null +++ b/tests/pointer.basm @@ -0,0 +1,2 @@ +ptr set 29; +ptr mv*; \ No newline at end of file