SYMBOL INDEX (84 symbols across 9 files) FILE: cache/make_vocab.py function main (line 9) | def main(): FILE: eval.py function build_files (line 13) | def build_files(data_path, tokenized_data_path, num_pieces, full_tokeniz... function main (line 39) | def main(): FILE: generate.py function is_word (line 9) | def is_word(word): function _is_chinese_char (line 16) | def _is_chinese_char(char): function top_k_top_p_filtering (line 40) | def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-floa... function sample_sequence (line 71) | def sample_sequence(model, context, length, n_ctx, tokenizer, temperatur... function fast_sample_sequence (line 92) | def fast_sample_sequence(model, context, length, temperature=1.0, top_k=... function generate (line 114) | def generate(n_ctx, model, context, length, tokenizer, temperature=1, to... function main (line 124) | def main(): FILE: generate_texts.py function is_word (line 11) | def is_word(word): function _is_chinese_char (line 18) | def _is_chinese_char(char): function top_k_top_p_filtering (line 42) | def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-floa... function sample_sequence (line 73) | def sample_sequence(model, context, length, n_ctx, tokenizer, temperatur... function main (line 94) | def main(): FILE: tokenizations/bpe_tokenizer.py function get_pairs (line 22) | def get_pairs(word): class Encoder (line 31) | class Encoder: method __init__ (line 32) | def __init__(self, encoder, bpe_merges): method bpe (line 39) | def bpe(self, token): method encode (line 79) | def encode(self, text): method decode (line 82) | def decode(self, tokens): method tokenize (line 86) | def tokenize(self, text): method convert_tokens_to_ids (line 91) | def convert_tokens_to_ids(self, tokens): class Encoder_SP (line 94) | class Encoder_SP: method __init__ (line 95) | def __init__(self, model_path): method encode (line 100) | def encode(self, text): method decode (line 107) | def decode(self, tokens): method tokenize (line 115) | def tokenize(self, text): method convert_tokens_to_ids (line 118) | def convert_tokens_to_ids(self, tokens): function get_encoder (line 121) | def get_encoder(encoder_file, bpe_file): FILE: tokenizations/tokenization_bert.py function load_vocab (line 66) | def load_vocab(vocab_file): function whitespace_tokenize (line 77) | def whitespace_tokenize(text): class BertTokenizer (line 86) | class BertTokenizer(PreTrainedTokenizer): method __init__ (line 105) | def __init__(self, vocab_file, do_lower_case=True, do_basic_tokenize=T... method vocab_size (line 143) | def vocab_size(self): method _tokenize (line 146) | def _tokenize(self, text): method _convert_token_to_id (line 156) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 160) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 164) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 169) | def save_vocabulary(self, vocab_path): method from_pretrained (line 185) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa... class BasicTokenizer (line 203) | class BasicTokenizer(object): method __init__ (line 206) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin... method tokenize (line 226) | def tokenize(self, text, never_split=None): method _run_strip_accents (line 257) | def _run_strip_accents(self, text): method _run_split_on_punc (line 268) | def _run_split_on_punc(self, text, never_split=None): method _tokenize_chinese_chars (line 290) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 303) | def _is_chinese_char(self, cp): method _clean_text (line 325) | def _clean_text(self, text): class WordpieceTokenizer (line 339) | class WordpieceTokenizer(object): method __init__ (line 342) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100): method tokenize (line 347) | def tokenize(self, text): function _is_whitespace (line 399) | def _is_whitespace(char): function _is_control (line 411) | def _is_control(char): function _is_punctuation (line 423) | def _is_punctuation(char): FILE: tokenizations/tokenization_bert_word_level.py function load_vocab (line 69) | def load_vocab(vocab_file): function whitespace_tokenize (line 80) | def whitespace_tokenize(text): class BertTokenizer (line 89) | class BertTokenizer(PreTrainedTokenizer): method __init__ (line 108) | def __init__(self, vocab_file, do_lower_case=True, do_basic_tokenize=T... method vocab_size (line 146) | def vocab_size(self): method _tokenize (line 149) | def _tokenize(self, text): method _convert_token_to_id (line 159) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 163) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 167) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 172) | def save_vocabulary(self, vocab_path): method from_pretrained (line 188) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa... class BasicTokenizer (line 206) | class BasicTokenizer(object): method __init__ (line 209) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin... method tokenize (line 229) | def tokenize(self, text, never_split=None): method _run_strip_accents (line 260) | def _run_strip_accents(self, text): method _run_split_on_punc (line 271) | def _run_split_on_punc(self, text, never_split=None): method _tokenize_chinese_chars (line 305) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 320) | def _is_chinese_char(self, cp): method _clean_text (line 342) | def _clean_text(self, text): class WordpieceTokenizer (line 356) | class WordpieceTokenizer(object): method __init__ (line 359) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100): method tokenize (line 364) | def tokenize(self, text): function _is_whitespace (line 416) | def _is_whitespace(char): function _is_control (line 428) | def _is_control(char): function _is_punctuation (line 440) | def _is_punctuation(char): FILE: train.py function build_files (line 15) | def build_files(data_path, tokenized_data_path, num_pieces, full_tokeniz... function main (line 41) | def main(): FILE: train_single.py function build_files (line 17) | def build_files(raw_data_path, tokenized_data_path, full_tokenizer, num_... function main (line 38) | def main():