started work on lexer

This commit is contained in:
SpookyDervish
2025-10-13 06:55:35 +11:00
parent 6da4f73ffb
commit 07ca0fcc32
7 changed files with 193 additions and 6 deletions

106
lexer.py Normal file
View File

@@ -0,0 +1,106 @@
from lexer_token import Token, TokenType
from typing import Any
class Lexer:
def __init__(self, source: str) -> None:
self.source = source
self.position: int = -1
self.read_position: int = 0
self.line_no: int = 1
self.current_char: str | None = None
self.__read_char()
def __read_char(self) -> None:
if self.read_position >= len(self.source):
self.current_char = None
else:
self.current_char = self.source[self.read_position]
self.position = self.read_position
self.read_position += 1
def __skip_whitespace(self) -> None:
while self.current_char in [' ', '\t', '\n', '\r']:
if self.current_char == "\n":
self.line_no += 1
self.__read_char()
def __new_token(self, tt: TokenType, literal: Any) -> Token:
return Token(tt, literal, self.line_no, self.position)
def __is_digit(self, char: str) -> bool:
return "0" <= char and char <= "9"
def __read_number(self) -> Token:
start_pos: int = self.position
dot_count: int = 0
output: str = ""
while self.__is_digit(self.current_char) or self.current_char == ".":
if self.current_char == ".":
dot_count += 1
if dot_count > 1:
# todo: error message
return self.__new_token(TokenType.ILLEGAL, self.source[start_pos:self.position])
output += self.source[self.position]
self.__read_char()
if self.current_char is None:
break
if dot_count == 0:
return self.__new_token(TokenType.INT, int(output))
else:
return self.__new_token(TokenType.FLOAT, float(output))
def next_token(self) -> Token:
tok: Token = None
self.__skip_whitespace()
match self.current_char:
case "+":
tok = self.__new_token(TokenType.PLUS, self.current_char)
case "-":
tok = self.__new_token(TokenType.MINUS, self.current_char)
case "*":
tok = self.__new_token(TokenType.ASTERISK, self.current_char)
case "/":
tok = self.__new_token(TokenType.SLASH, self.current_char)
case "^":
tok = self.__new_token(TokenType.POW, self.current_char)
case "%":
tok = self.__new_token(TokenType.MODULUS, self.current_char)
case "(":
tok = self.__new_token(TokenType.LPAREN, self.current_char)
case ")":
tok = self.__new_token(TokenType.RPAREN, self.current_char)
case "[":
tok = self.__new_token(TokenType.LBRACKET, self.current_char)
case "]":
tok = self.__new_token(TokenType.RBRACKET, self.current_char)
case "{":
tok = self.__new_token(TokenType.LCURLY, self.current_char)
case "}":
tok = self.__new_token(TokenType.RCURLY, self.current_char)
case ":":
tok = self.__new_token(TokenType.COLON, self.current_char)
case None:
tok = self.__new_token(TokenType.EOF, "")
case _:
if self.__is_digit(self.current_char):
tok = self.__read_number()
return tok
else:
tok = self.__new_token(TokenType.ILLEGAL, self.current_char)
self.__read_char()
return tok

42
lexer_token.py Normal file
View File

@@ -0,0 +1,42 @@
from enum import Enum
from typing import Any
class TokenType(Enum):
# Special tokens
EOF = "EOF"
ILLEGAL = "ILLEGAL"
# Data types
INT = "INT"
FLOAT = "FLOAT"
# Arithmetic symbols
PLUS = "PLUS"
MINUS = "MINUS"
ASTERISK = "ASTERISK"
SLASH = "SLASH"
POW = "POW"
MODULUS = "MODULUS"
# Symbols
LPAREN = "LPAREN"
RPAREN = "RPAREN"
LBRACKET = "LBRACKET"
RBRACKET = "RBRACKET"
LCURLY = "LCURLY"
RCURLY = "RCURLY"
COLON = "COLON"
class Token:
def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None:
self.type = type
self.literal = literal
self.line_no = line_no
self.position = position
def __str__(self) -> str:
return f"token[{self.type} : {self.literal} : Line {self.line_no} : Position {self.position}]"
def __repr__(self) -> str:
return str(self)

13
main.py Normal file
View File

@@ -0,0 +1,13 @@
from lexer import Lexer
LEXER_DEBUG: bool = True
if __name__ == "__main__":
with open("tests/lexer.pla") as f:
code: str = f.read()
if LEXER_DEBUG:
debug_lex: Lexer = Lexer(source=code)
while debug_lex.current_char is not None:
print(debug_lex.next_token())

View File

@@ -1,2 +1,2 @@
depend "io.pla"
write("Hello, World!")
print("Hello, World!")

6
tests/lexer.pla Normal file
View File

@@ -0,0 +1,6 @@
123
0.456
[]
{}
(1 + 3 * 2 ^ 4) % 2
2 - 1

18
tests/types.pla Normal file
View File

@@ -0,0 +1,18 @@
depend "io.pla"
enum Gender {
male,
female
}
struct Person {
name: String,
age: Int = 0,
speak: Func
}
speak = Func(sentence: String): Nil {
print(sentence)
}
max: Person = {"Max", 17, }

View File

@@ -6,8 +6,10 @@ myBoolean: Bool = true
myString: String = "Hello!\n"
myList: List = [1, "hi", true, [1, 2, 3], 0.789]
write(String(myInt))
write(String(myDecimal))
write(String(myBoolean))
write(myString)
write(String(myList))
MY_CONSTANT: Const(String) = "foo bar"
print(String(myInt))
print(String(myDecimal))
print(String(myBoolean))
print(myString)
print(String(myList))