started work on lexer
This commit is contained in:
106
lexer.py
Normal file
106
lexer.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from lexer_token import Token, TokenType
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, source: str) -> None:
|
||||
self.source = source
|
||||
|
||||
self.position: int = -1
|
||||
self.read_position: int = 0
|
||||
self.line_no: int = 1
|
||||
|
||||
self.current_char: str | None = None
|
||||
|
||||
self.__read_char()
|
||||
|
||||
def __read_char(self) -> None:
|
||||
if self.read_position >= len(self.source):
|
||||
self.current_char = None
|
||||
else:
|
||||
self.current_char = self.source[self.read_position]
|
||||
|
||||
self.position = self.read_position
|
||||
self.read_position += 1
|
||||
|
||||
def __skip_whitespace(self) -> None:
|
||||
while self.current_char in [' ', '\t', '\n', '\r']:
|
||||
if self.current_char == "\n":
|
||||
self.line_no += 1
|
||||
|
||||
self.__read_char()
|
||||
|
||||
def __new_token(self, tt: TokenType, literal: Any) -> Token:
|
||||
return Token(tt, literal, self.line_no, self.position)
|
||||
|
||||
def __is_digit(self, char: str) -> bool:
|
||||
return "0" <= char and char <= "9"
|
||||
|
||||
def __read_number(self) -> Token:
|
||||
start_pos: int = self.position
|
||||
dot_count: int = 0
|
||||
|
||||
output: str = ""
|
||||
|
||||
while self.__is_digit(self.current_char) or self.current_char == ".":
|
||||
if self.current_char == ".":
|
||||
dot_count += 1
|
||||
|
||||
if dot_count > 1:
|
||||
# todo: error message
|
||||
return self.__new_token(TokenType.ILLEGAL, self.source[start_pos:self.position])
|
||||
|
||||
output += self.source[self.position]
|
||||
self.__read_char()
|
||||
|
||||
if self.current_char is None:
|
||||
break
|
||||
|
||||
if dot_count == 0:
|
||||
return self.__new_token(TokenType.INT, int(output))
|
||||
else:
|
||||
return self.__new_token(TokenType.FLOAT, float(output))
|
||||
|
||||
def next_token(self) -> Token:
|
||||
tok: Token = None
|
||||
|
||||
self.__skip_whitespace()
|
||||
|
||||
match self.current_char:
|
||||
case "+":
|
||||
tok = self.__new_token(TokenType.PLUS, self.current_char)
|
||||
case "-":
|
||||
tok = self.__new_token(TokenType.MINUS, self.current_char)
|
||||
case "*":
|
||||
tok = self.__new_token(TokenType.ASTERISK, self.current_char)
|
||||
case "/":
|
||||
tok = self.__new_token(TokenType.SLASH, self.current_char)
|
||||
case "^":
|
||||
tok = self.__new_token(TokenType.POW, self.current_char)
|
||||
case "%":
|
||||
tok = self.__new_token(TokenType.MODULUS, self.current_char)
|
||||
case "(":
|
||||
tok = self.__new_token(TokenType.LPAREN, self.current_char)
|
||||
case ")":
|
||||
tok = self.__new_token(TokenType.RPAREN, self.current_char)
|
||||
case "[":
|
||||
tok = self.__new_token(TokenType.LBRACKET, self.current_char)
|
||||
case "]":
|
||||
tok = self.__new_token(TokenType.RBRACKET, self.current_char)
|
||||
case "{":
|
||||
tok = self.__new_token(TokenType.LCURLY, self.current_char)
|
||||
case "}":
|
||||
tok = self.__new_token(TokenType.RCURLY, self.current_char)
|
||||
case ":":
|
||||
tok = self.__new_token(TokenType.COLON, self.current_char)
|
||||
case None:
|
||||
tok = self.__new_token(TokenType.EOF, "")
|
||||
case _:
|
||||
if self.__is_digit(self.current_char):
|
||||
tok = self.__read_number()
|
||||
return tok
|
||||
else:
|
||||
tok = self.__new_token(TokenType.ILLEGAL, self.current_char)
|
||||
|
||||
self.__read_char()
|
||||
return tok
|
||||
42
lexer_token.py
Normal file
42
lexer_token.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
# Special tokens
|
||||
EOF = "EOF"
|
||||
ILLEGAL = "ILLEGAL"
|
||||
|
||||
# Data types
|
||||
INT = "INT"
|
||||
FLOAT = "FLOAT"
|
||||
|
||||
# Arithmetic symbols
|
||||
PLUS = "PLUS"
|
||||
MINUS = "MINUS"
|
||||
ASTERISK = "ASTERISK"
|
||||
SLASH = "SLASH"
|
||||
POW = "POW"
|
||||
MODULUS = "MODULUS"
|
||||
|
||||
# Symbols
|
||||
LPAREN = "LPAREN"
|
||||
RPAREN = "RPAREN"
|
||||
LBRACKET = "LBRACKET"
|
||||
RBRACKET = "RBRACKET"
|
||||
LCURLY = "LCURLY"
|
||||
RCURLY = "RCURLY"
|
||||
COLON = "COLON"
|
||||
|
||||
class Token:
|
||||
def __init__(self, type: TokenType, literal: Any, line_no: int, position: int) -> None:
|
||||
self.type = type
|
||||
self.literal = literal
|
||||
self.line_no = line_no
|
||||
self.position = position
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"token[{self.type} : {self.literal} : Line {self.line_no} : Position {self.position}]"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return str(self)
|
||||
13
main.py
Normal file
13
main.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from lexer import Lexer
|
||||
|
||||
LEXER_DEBUG: bool = True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("tests/lexer.pla") as f:
|
||||
code: str = f.read()
|
||||
|
||||
if LEXER_DEBUG:
|
||||
debug_lex: Lexer = Lexer(source=code)
|
||||
while debug_lex.current_char is not None:
|
||||
print(debug_lex.next_token())
|
||||
@@ -1,2 +1,2 @@
|
||||
depend "io.pla"
|
||||
write("Hello, World!")
|
||||
print("Hello, World!")
|
||||
6
tests/lexer.pla
Normal file
6
tests/lexer.pla
Normal file
@@ -0,0 +1,6 @@
|
||||
123
|
||||
0.456
|
||||
[]
|
||||
{}
|
||||
(1 + 3 * 2 ^ 4) % 2
|
||||
2 - 1
|
||||
18
tests/types.pla
Normal file
18
tests/types.pla
Normal file
@@ -0,0 +1,18 @@
|
||||
depend "io.pla"
|
||||
|
||||
enum Gender {
|
||||
male,
|
||||
female
|
||||
}
|
||||
|
||||
struct Person {
|
||||
name: String,
|
||||
age: Int = 0,
|
||||
speak: Func
|
||||
}
|
||||
|
||||
speak = Func(sentence: String): Nil {
|
||||
print(sentence)
|
||||
}
|
||||
|
||||
max: Person = {"Max", 17, }
|
||||
@@ -6,8 +6,10 @@ myBoolean: Bool = true
|
||||
myString: String = "Hello!\n"
|
||||
myList: List = [1, "hi", true, [1, 2, 3], 0.789]
|
||||
|
||||
write(String(myInt))
|
||||
write(String(myDecimal))
|
||||
write(String(myBoolean))
|
||||
write(myString)
|
||||
write(String(myList))
|
||||
MY_CONSTANT: Const(String) = "foo bar"
|
||||
|
||||
print(String(myInt))
|
||||
print(String(myDecimal))
|
||||
print(String(myBoolean))
|
||||
print(myString)
|
||||
print(String(myList))
|
||||
Reference in New Issue
Block a user