SYMBOL INDEX (36 symbols across 9 files) FILE: convert_tf2_ckpt_for_all_frameworks.py function load_tf2_weights_in_bert (line 30) | def load_tf2_weights_in_bert(model, tf_checkpoint_path, config): function convert_tf2_checkpoint_to_pytorch (line 240) | def convert_tf2_checkpoint_to_pytorch(tf_checkpoint_path, tf_config_path... FILE: create_pretraining_data.py class TrainingInstance (line 89) | class TrainingInstance(object): method __init__ (line 92) | def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm... method __str__ (line 100) | def __str__(self): method __repr__ (line 112) | def __repr__(self): function write_instance_to_example_files (line 116) | def write_instance_to_example_files(instances, tokenizer, max_seq_length, function create_int_feature (line 195) | def create_int_feature(values): function create_float_feature (line 200) | def create_float_feature(values): function create_training_instances (line 205) | def create_training_instances(input_files, function create_instances_from_document (line 256) | def create_instances_from_document( function create_masked_lm_predictions (line 377) | def create_masked_lm_predictions(tokens, masked_lm_prob, function truncate_seq_pair (line 454) | def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): function main (line 472) | def main(_): FILE: japanese_tokenizers/implementations.py class JapaneseWordPieceTokenizer (line 24) | class JapaneseWordPieceTokenizer(BertWordPieceTokenizer): method __init__ (line 25) | def __init__( FILE: japanese_tokenizers/pre_tokenizers.py class MeCabPreTokenizer (line 22) | class MeCabPreTokenizer: method __init__ (line 23) | def __init__(self, mecab_dic: Optional[str] = None, mecab_option: Opti... method mecab_split (line 45) | def mecab_split(self, i: int, normalized_string: NormalizedString) -> ... method pre_tokenize (line 56) | def pre_tokenize(self, pretok: PreTokenizedString): FILE: make_alphabet_from_unidic.py function main (line 20) | def main(args): FILE: make_corpus_wiki.py class MeCabSentenceSplitter (line 25) | class MeCabSentenceSplitter: method __init__ (line 26) | def __init__(self, mecab_option=None): method __call__ (line 36) | def __call__(self, text): function filter_text (line 55) | def filter_text(text): function preprocess_text (line 63) | def preprocess_text(text, title=None): function main (line 87) | def main(args): FILE: merge_split_corpora.py function _open_file (line 31) | def _open_file(filename): function main (line 40) | def main(args): FILE: tokenization.py class BertJapaneseTokenizer (line 24) | class BertJapaneseTokenizer(BertJapaneseTokenizerBase): method __init__ (line 25) | def __init__( method _convert_token_to_id (line 71) | def _convert_token_to_id(self, token): class CharacterTokenizer (line 78) | class CharacterTokenizer(CharacterTokenizerBase): method __init__ (line 79) | def __init__(self, vocab, unk_token, normalize_text=True): method tokenize (line 82) | def tokenize(self, text): FILE: train_tokenizer.py function main (line 27) | def main(args):