SYMBOL INDEX (84 symbols across 9 files)

FILE: cache/make_vocab.py
  function main (line 9) | def main():

FILE: eval.py
  function build_files (line 13) | def build_files(data_path, tokenized_data_path, num_pieces, full_tokeniz...
  function main (line 39) | def main():

FILE: generate.py
  function is_word (line 9) | def is_word(word):
  function _is_chinese_char (line 16) | def _is_chinese_char(char):
  function top_k_top_p_filtering (line 40) | def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-floa...
  function sample_sequence (line 71) | def sample_sequence(model, context, length, n_ctx, tokenizer, temperatur...
  function fast_sample_sequence (line 92) | def fast_sample_sequence(model, context, length, temperature=1.0, top_k=...
  function generate (line 114) | def generate(n_ctx, model, context, length, tokenizer, temperature=1, to...
  function main (line 124) | def main():

FILE: generate_texts.py
  function is_word (line 11) | def is_word(word):
  function _is_chinese_char (line 18) | def _is_chinese_char(char):
  function top_k_top_p_filtering (line 42) | def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-floa...
  function sample_sequence (line 73) | def sample_sequence(model, context, length, n_ctx, tokenizer, temperatur...
  function main (line 94) | def main():

FILE: tokenizations/bpe_tokenizer.py
  function get_pairs (line 22) | def get_pairs(word):
  class Encoder (line 31) | class Encoder:
    method __init__ (line 32) | def __init__(self, encoder, bpe_merges):
    method bpe (line 39) | def bpe(self, token):
    method encode (line 79) | def encode(self, text):
    method decode (line 82) | def decode(self, tokens):
    method tokenize (line 86) | def tokenize(self, text):
    method convert_tokens_to_ids (line 91) | def convert_tokens_to_ids(self, tokens):
  class Encoder_SP (line 94) | class Encoder_SP:
    method __init__ (line 95) | def __init__(self, model_path):
    method encode (line 100) | def encode(self, text):
    method decode (line 107) | def decode(self, tokens):
    method tokenize (line 115) | def tokenize(self, text):
    method convert_tokens_to_ids (line 118) | def convert_tokens_to_ids(self, tokens):
  function get_encoder (line 121) | def get_encoder(encoder_file, bpe_file):

FILE: tokenizations/tokenization_bert.py
  function load_vocab (line 66) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 77) | def whitespace_tokenize(text):
  class BertTokenizer (line 86) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 105) | def __init__(self, vocab_file, do_lower_case=True, do_basic_tokenize=T...
    method vocab_size (line 143) | def vocab_size(self):
    method _tokenize (line 146) | def _tokenize(self, text):
    method _convert_token_to_id (line 156) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 160) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 164) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 169) | def save_vocabulary(self, vocab_path):
    method from_pretrained (line 185) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...
  class BasicTokenizer (line 203) | class BasicTokenizer(object):
    method __init__ (line 206) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 226) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 257) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 268) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 290) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 303) | def _is_chinese_char(self, cp):
    method _clean_text (line 325) | def _clean_text(self, text):
  class WordpieceTokenizer (line 339) | class WordpieceTokenizer(object):
    method __init__ (line 342) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 347) | def tokenize(self, text):
  function _is_whitespace (line 399) | def _is_whitespace(char):
  function _is_control (line 411) | def _is_control(char):
  function _is_punctuation (line 423) | def _is_punctuation(char):

FILE: tokenizations/tokenization_bert_word_level.py
  function load_vocab (line 69) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 80) | def whitespace_tokenize(text):
  class BertTokenizer (line 89) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 108) | def __init__(self, vocab_file, do_lower_case=True, do_basic_tokenize=T...
    method vocab_size (line 146) | def vocab_size(self):
    method _tokenize (line 149) | def _tokenize(self, text):
    method _convert_token_to_id (line 159) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 163) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 167) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 172) | def save_vocabulary(self, vocab_path):
    method from_pretrained (line 188) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...
  class BasicTokenizer (line 206) | class BasicTokenizer(object):
    method __init__ (line 209) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 229) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 260) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 271) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 305) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 320) | def _is_chinese_char(self, cp):
    method _clean_text (line 342) | def _clean_text(self, text):
  class WordpieceTokenizer (line 356) | class WordpieceTokenizer(object):
    method __init__ (line 359) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 364) | def tokenize(self, text):
  function _is_whitespace (line 416) | def _is_whitespace(char):
  function _is_control (line 428) | def _is_control(char):
  function _is_punctuation (line 440) | def _is_punctuation(char):

FILE: train.py
  function build_files (line 15) | def build_files(data_path, tokenized_data_path, num_pieces, full_tokeniz...
  function main (line 41) | def main():

FILE: train_single.py
  function build_files (line 17) | def build_files(raw_data_path, tokenized_data_path, full_tokenizer, num_...
  function main (line 38) | def main():