SYMBOL INDEX (31 symbols across 1 files) FILE: bytepiece/bytepiece.py function normalize (line 16) | def normalize(text, maxlen=0, isolate_digits=False): class Trainer (line 31) | class Trainer: method __init__ (line 35) | def __init__( method count_ngrams (line 54) | def count_ngrams(self, texts): method prune_ngrams (line 63) | def prune_ngrams(self, ngrams): method trans (line 82) | def trans(self): method _tokenize (line 90) | def _tokenize(self, text): method count_pieces (line 113) | def count_pieces(self, texts): method split_pieces (line 120) | def split_pieces(self, keep, drop): method prune_pieces (line 127) | def prune_pieces(self, pieces, workers=1, batch_size=1000): method norm (line 180) | def norm(self, texts): method train (line 185) | def train(self, texts, workers=1, batch_size=1000): method dump (line 201) | def dump(self, pieces): method save (line 208) | def save(self, path): method pcount (line 221) | def pcount(self, inputs, count, merge, init, desc, workers, batch_size): method pcount_ngrams (line 251) | def pcount_ngrams(self, texts, workers=1, batch_size=1000): method psplit_pieces (line 263) | def psplit_pieces(self, keep, drop, workers=1, batch_size=1000): method pcount_pieces (line 274) | def pcount_pieces(self, texts, workers=1, batch_size=1000): class Tokenizer (line 285) | class Tokenizer: method __init__ (line 288) | def __init__(self, pieces, seed=None): method set_seed (line 306) | def set_seed(self, seed): method _tokenize (line 310) | def _tokenize(self, text, alpha=-1): method tokenize (line 313) | def tokenize(self, text, alpha=-1, iter=False): method piece_to_id (line 319) | def piece_to_id(self, p): method id_to_piece (line 322) | def id_to_piece(self, i): method pieces_to_ids (line 325) | def pieces_to_ids(self, pieces): method ids_to_pieces (line 328) | def ids_to_pieces(self, ids): method encode (line 331) | def encode(self, text, add_bos=False, add_eos=False, alpha=-1, iter=Fa... method decode (line 344) | def decode(self, ids): method convert_to_sentencepiece (line 348) | def convert_to_sentencepiece(self, path): function convert_to_bytepiece (line 391) | def convert_to_bytepiece(pieces, path):