205 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			205 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from lexer_token import Token, TokenType, lookup_ident
 | |
| from typing import Any
 | |
| 
 | |
| 
 | |
| class Lexer:
 | |
| 	def __init__(self, source: str) -> None:
 | |
| 		self.source = source
 | |
| 
 | |
| 		self.position: int = -1
 | |
| 		self.read_position: int = 0
 | |
| 		self.line_no: int = 1
 | |
| 
 | |
| 		self.current_char: str | None = None
 | |
| 
 | |
| 		self.__read_char()
 | |
| 
 | |
| 	def __read_char(self) -> None:
 | |
| 		if self.read_position >= len(self.source):
 | |
| 			self.current_char = None
 | |
| 		else:
 | |
| 			self.current_char = self.source[self.read_position]
 | |
| 
 | |
| 		self.position = self.read_position
 | |
| 		self.read_position += 1
 | |
| 
 | |
| 	def __peek_char(self) -> str | None:
 | |
| 		if self.read_position >= len(self.source):
 | |
| 			return None
 | |
| 		
 | |
| 		return self.source[self.read_position]
 | |
| 
 | |
| 	def __skip_whitespace(self) -> None:
 | |
| 		while self.current_char in [' ', '\t', '\n', '\r']:
 | |
| 			if self.current_char == "\n":
 | |
| 				self.line_no += 1
 | |
| 			
 | |
| 			self.__read_char()
 | |
| 
 | |
| 	def __new_token(self, tt: TokenType, literal: Any) -> Token:
 | |
| 		return Token(tt, literal, self.line_no, self.position)
 | |
| 
 | |
| 	def __is_digit(self, char: str) -> bool:
 | |
| 		return "0" <= char and char <= "9"
 | |
| 	
 | |
| 	def __is_letter(self, char: str) -> bool:
 | |
| 		return "a" <= char and char <= "z" or "A" <= char and char <= "Z" or char == "_"
 | |
| 	
 | |
| 	def __read_number(self) -> Token:
 | |
| 		start_pos: int = self.position
 | |
| 		dot_count: int = 0
 | |
| 
 | |
| 		output: str = ""
 | |
| 
 | |
| 		while self.__is_digit(self.current_char) or self.current_char == ".":
 | |
| 			if self.current_char == ".":
 | |
| 				dot_count += 1
 | |
| 			
 | |
| 			if dot_count > 1:
 | |
| 				# todo: error message
 | |
| 				return self.__new_token(TokenType.ILLEGAL, self.source[start_pos:self.position])
 | |
| 			
 | |
| 			output += self.source[self.position]
 | |
| 			self.__read_char()
 | |
| 
 | |
| 			if self.current_char is None:
 | |
| 				break
 | |
| 
 | |
| 		if dot_count == 0:
 | |
| 			return self.__new_token(TokenType.INT, int(output))
 | |
| 		else:
 | |
| 			return self.__new_token(TokenType.FLOAT, float(output))
 | |
| 
 | |
| 	def __read_identifier(self) -> str:
 | |
| 		position = self.position
 | |
| 		while self.current_char is not None and (self.__is_letter(self.current_char) or self.current_char.isalnum()):
 | |
| 			self.__read_char()
 | |
| 
 | |
| 		return self.source[position:self.position]
 | |
| 
 | |
| 	def next_token(self) -> Token:
 | |
| 		tok: Token = None
 | |
| 
 | |
| 		self.__skip_whitespace()
 | |
| 
 | |
| 		match self.current_char:
 | |
| 			case "+":
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.PLUS_EQ, ch + self.current_char)
 | |
| 				elif self.__peek_char() == "+":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.PLUS_PLUS, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.PLUS, self.current_char)
 | |
| 			case "-":
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.MINUS_EQ, ch + self.current_char)
 | |
| 				elif self.__peek_char() == "-":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.MINUS_MINUS, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.MINUS, self.current_char)
 | |
| 			case "*":
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.MUL_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.ASTERISK, self.current_char)
 | |
| 			case "/":
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.DIV_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.SLASH, self.current_char)
 | |
| 			case "^":
 | |
| 				tok = self.__new_token(TokenType.POW, self.current_char)
 | |
| 			case "%":
 | |
| 				tok = self.__new_token(TokenType.MODULUS, self.current_char)
 | |
| 			case "<":
 | |
| 				# Handle <=
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.LT_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.LT, self.current_char)
 | |
| 			case ">":
 | |
| 				# Handle >=
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.GT_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.GT, self.current_char)
 | |
| 			case "=":
 | |
| 				# Handle ==
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.EQ_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.EQ, self.current_char)
 | |
| 			case "!":
 | |
| 				# Handle !=
 | |
| 				if self.__peek_char() == "=":
 | |
| 					ch = self.current_char
 | |
| 					self.__read_char()
 | |
| 					tok = self.__new_token(TokenType.NOT_EQ, ch + self.current_char)
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.BANG, self.current_char)
 | |
| 			case "(":
 | |
| 				tok = self.__new_token(TokenType.LPAREN, self.current_char)
 | |
| 			case ")":
 | |
| 				tok = self.__new_token(TokenType.RPAREN, self.current_char)
 | |
| 			case "[":
 | |
| 				tok = self.__new_token(TokenType.LBRACKET, self.current_char)
 | |
| 			case "]":
 | |
| 				tok = self.__new_token(TokenType.RBRACKET, self.current_char)
 | |
| 			case "{":
 | |
| 				tok = self.__new_token(TokenType.LBRACE, self.current_char)
 | |
| 			case "}":
 | |
| 				tok = self.__new_token(TokenType.RBRACE, self.current_char)
 | |
| 			case ";":
 | |
| 				tok = self.__new_token(TokenType.SEMICOLON, self.current_char)
 | |
| 			case ":":
 | |
| 				tok = self.__new_token(TokenType.COLON, self.current_char)
 | |
| 			case ",":
 | |
| 				tok = self.__new_token(TokenType.COMMA, self.current_char)
 | |
| 			case "$":
 | |
| 				tok = self.__new_token(TokenType.DOLLARSIGN, self.current_char)
 | |
| 			case '"':
 | |
| 				tok = self.__new_token(TokenType.STRING, self.__read_string())
 | |
| 			case None:
 | |
| 				tok = self.__new_token(TokenType.EOF, "")
 | |
| 			case _:
 | |
| 				if self.__is_letter(self.current_char):
 | |
| 					literal: str = self.__read_identifier()
 | |
| 					tt: TokenType = lookup_ident(literal)
 | |
| 					tok = self.__new_token(tt, literal)
 | |
| 					return tok
 | |
| 				
 | |
| 				if self.__is_digit(self.current_char):
 | |
| 					tok = self.__read_number()
 | |
| 					return tok
 | |
| 				else:
 | |
| 					tok = self.__new_token(TokenType.ILLEGAL, self.current_char)
 | |
| 
 | |
| 		self.__read_char()
 | |
| 		return tok
 | |
| 	
 | |
| 	def __read_string(self):
 | |
| 		position: int = self.position + 1
 | |
| 		while True:
 | |
| 			self.__read_char()
 | |
| 			if self.current_char == '"' or self.current_char is None:
 | |
| 				break
 | |
| 		return self.source[position:self.position]
 | |
| 			 | 
