added is_verbose flag

This commit is contained in:
Brett Kuprel
2022-07-01 20:17:20 -04:00
parent 35e97768a5
commit cf9656baa2
3 changed files with 23 additions and 19 deletions
+3 -2
View File
@@ -3,7 +3,8 @@ from typing import List, Tuple
class TextTokenizer:
def __init__(self, vocab: dict, merges: List[str]):
def __init__(self, vocab: dict, merges: List[str], is_verbose: bool = True):
self.is_verbose = is_verbose
self.token_from_subword = vocab
pairs = [tuple(pair.split()) for pair in merges]
self.rank_from_pair = dict(zip(pairs, range(len(pairs))))
@@ -36,5 +37,5 @@ class TextTokenizer:
(subwords[i + 2:] if i + 2 < len(subwords) else [])
)
print(subwords)
if self.is_verbose: print(subwords)
return subwords