commit a604b8d5c4bcf0a61a2f65ed25c09f67092bce7a Author: Maxwell Jeffress Date: Mon Sep 1 20:41:41 2025 +1000 Initial commit diff --git a/main.py b/main.py new file mode 100644 index 0000000..af7762d --- /dev/null +++ b/main.py @@ -0,0 +1,17 @@ +import sys +import preprocessor +import token + +if len(sys.argv) < 2: + print("Usage: hgc (file)") + exit(1) + +file = open(sys.argv[1]).readlines() + +lines: list[token.tokenlist] = [] + +for line in file: + lines.append(preprocessor.process_line(line)) + +for line in lines: + print(line) \ No newline at end of file diff --git a/preprocessor.py b/preprocessor.py new file mode 100644 index 0000000..1c9ddc4 --- /dev/null +++ b/preprocessor.py @@ -0,0 +1,36 @@ +import token + +delimiters = ["=", ">", "<", "+", "-", "*", "/", " "] + +def doNothing(): + return + +def process_line(process: str) -> list[token.Token]: + buf = "" + tokens: list[token.Token] = [] + prevEquals = False + for c in process: + if c in delimiters and buf != "": + tokens.append(token.Token(buf)) + buf = "" + if prevEquals and c != '=': + tokens.append(token.Token("=")) + prevEquals = False + match c: + case '\n': + doNothing() + case ' ': + doNothing() + case '=': + if prevEquals: + prevEquals = False + tokens.append(token.Token(buf)) + else: + prevEquals = True + case _: + buf += c + if buf != "": + tokens.append(token.Token(buf)) + + + return tokens \ No newline at end of file diff --git a/test.high b/test.high new file mode 100644 index 0000000..0785a87 --- /dev/null +++ b/test.high @@ -0,0 +1,3 @@ +let dingus = 10 + +print(dingus) \ No newline at end of file diff --git a/token.py b/token.py new file mode 100644 index 0000000..02e512e --- /dev/null +++ b/token.py @@ -0,0 +1,95 @@ +from enum import Enum + +class TokenType(Enum): + INTEGER = 1 + DOUBLE = 2 + STRING = 3 + CHAR = 4 + BOOLEAN = 5 + + FUNCTION = 6 + VARIABLE = 7 + IF = 8 + ELSE = 9 + WHILE = 10 + LET = 11 + + ADD = 12 + SUBTRACT = 13 + MULTIPLY = 14 + DIVIDE = 15 + + SET = 16 + + EQUAL = 17 + GREATER = 18 + LESSER = 19 + + UNKNOWN = 0 + +class Token: + def __init__(self, tok: str): + self.value = tok + self.type = get_type(tok) + + def __repr__(self) -> str: + return f"Token(type={self.type.name}, value='{self.value}')" + +type tokenlist = list[Token] + +def get_type(process: str) -> TokenType: + # Keywords + match process: + case "let": + return TokenType.LET + case "if": + return TokenType.IF + case "else": + return TokenType.ELSE + case "while": + return TokenType.WHILE + case "true" | "false": + return TokenType.BOOLEAN + case "+": + return TokenType.ADD + case "-": + return TokenType.SUBTRACT + case "*": + return TokenType.MULTIPLY + case "/": + return TokenType.DIVIDE + case "=": + return TokenType.SET + case "==": + return TokenType.EQUAL + case ">": + return TokenType.GREATER + case "<": + return TokenType.LESSER + + # String/Char Literals + if len(process) >= 2: + if process.startswith('"') and process.endswith('"'): + return TokenType.STRING + if process.startswith("'") and process.endswith("'") and len(process) == 3: + return TokenType.CHAR + + # Numeric Literals + if '.' in process: + try: + float(process) + return TokenType.DOUBLE + except ValueError: + pass + else: + try: + int(process) + return TokenType.INTEGER + except ValueError: + pass + + # Identifiers (Variables/Functions) + if process and (process[0].isalpha() or process[0] == '_') and all(c.isalnum() or c == '_' for c in process): + return TokenType.VARIABLE + + return TokenType.UNKNOWN \ No newline at end of file