license and cleanup

This commit is contained in:
Brett Kuprel
2022-06-27 14:34:10 -04:00
parent 32b7aa196b
commit 18e6a9852f
7 changed files with 25 additions and 42 deletions
+1
View File
@@ -1,6 +1,7 @@
from math import inf
from typing import List, Tuple
class TextTokenizer:
def __init__(self, vocab: dict, merges: List[str]):
self.token_from_subword = vocab