import re import random regex = re.compile("[^a-zA-Z ]") class Token: def __init__(self): self.completions: list[str] = [] def __repr__(self): return self.completions.__repr__() def add_completion(self, word: str): self.completions.append(word) class Model: def __init__(self): self.tokens: dict[str, Token] = {} def predict(self, previous: str) -> str | None: if previous in self.tokens: completion = self.tokens[previous].completions return random.choice(completion) else: return None def tokenise(instr: str): return re.split(r" |\n", regex.sub("", instr).lower()) def train(data: str): model = Model() spldata = tokenise(data) for i in range(0, len(spldata) - 1): word = spldata[i] next_word = spldata[i + 1] if not word in model.tokens: model.tokens[word] = Token() model.tokens[word].add_completion(next_word) return model def prompt(prompt: str, model: Model) -> str: words = tokenise(prompt) output: str = "" currentWord = words[-1] for _ in range(1, 200): currentWord = model.predict(currentWord) if currentWord == None: break output += currentWord + " " return output