Files
barry/barry.py

50 lines
1.3 KiB
Python
Raw Normal View History

2025-12-27 17:21:40 +11:00
import re
import random
regex = re.compile("[^a-zA-Z ]")
class Token:
def __init__(self):
self.completions: list[str] = []
def __repr__(self):
return self.completions.__repr__()
def add_completion(self, word: str):
self.completions.append(word)
class Model:
def __init__(self):
self.tokens: dict[str, Token] = {}
def predict(self, previous: str) -> str | None:
if previous in self.tokens:
completion = self.tokens[previous].completions
return random.choice(completion)
else:
return None
def tokenise(instr: str):
return re.split(r" |\n", regex.sub("", instr).lower())
def train(data: str):
model = Model()
spldata = tokenise(data)
for i in range(0, len(spldata) - 1):
word = spldata[i]
next_word = spldata[i + 1]
if not word in model.tokens:
model.tokens[word] = Token()
model.tokens[word].add_completion(next_word)
return model
def prompt(prompt: str, model: Model) -> str:
words = tokenise(prompt)
output: str = ""
currentWord = words[-1]
for _ in range(1, 200):
currentWord = model.predict(currentWord)
if currentWord == None:
break
output += currentWord + " "
return output