SYMBOL INDEX (283 symbols across 19 files) FILE: code/assemble.py function refine_entity (line 11) | def refine_entity(w,s,e): function convert (line 25) | def convert(entity, refine=False): function get_entities (line 37) | def get_entities(text,tags): function check_special (line 66) | def check_special(text): function merge_by_4_tuple (line 74) | def merge_by_4_tuple(raw_texts,data,weights,threshold=3.0, refine=False): function assemble_fake (line 129) | def assemble_fake(): function assemble_final (line 173) | def assemble_final(): FILE: code/conlleval.py class FormatError (line 20) | class FormatError(Exception): class EvalCounts (line 26) | class EvalCounts(object): method __init__ (line 27) | def __init__(self): function parse_args (line 40) | def parse_args(argv): function parse_tag (line 57) | def parse_tag(t): function evaluate (line 62) | def evaluate(iterable, options=None): function uniq (line 144) | def uniq(iterable): function calculate_metrics (line 149) | def calculate_metrics(correct, guessed, total): function metrics (line 157) | def metrics(counts): function report (line 170) | def report(counts, out=None): function report_notprint (line 196) | def report_notprint(counts, out=None): function end_of_chunk (line 230) | def end_of_chunk(prev_tag, tag, prev_type, type_): function start_of_chunk (line 255) | def start_of_chunk(prev_tag, tag, prev_type, type_): function return_report (line 280) | def return_report(input_file): function main (line 286) | def main(argv): FILE: code/create_raw_text.py function read_conll (line 15) | def read_conll(fname): function read_track3 (line 28) | def read_track3(fname): function create_preatrain_data (line 37) | def create_preatrain_data(): function convert_distance (line 63) | def convert_distance(item,tags): function convert_village (line 76) | def convert_village(item,tags): function convert_intersection (line 89) | def convert_intersection(item,tags,pattern): function get_intersection_pattern (line 108) | def get_intersection_pattern(): function check_devzone (line 126) | def check_devzone(name): function convert_data_format_v2 (line 132) | def convert_data_format_v2(sentence): function _get_refine_entity (line 169) | def _get_refine_entity(raw_files): function _fix_data (line 194) | def _fix_data(ent_tp_cnt, update_files, iob=False): function fix_data (line 225) | def fix_data(): function create_extra_train_data (line 248) | def create_extra_train_data(): FILE: code/electra-pretrain/build_pretraining_dataset.py function create_int_feature (line 29) | def create_int_feature(values): class ExampleBuilder (line 34) | class ExampleBuilder(object): method __init__ (line 37) | def __init__(self, tokenizer, max_length): method add_line (line 42) | def add_line(self, line): method _create_example (line 50) | def _create_example(self): method _make_tf_example (line 63) | def _make_tf_example(self, first_segment, second_segment): class ExampleWriter (line 83) | class ExampleWriter(object): method __init__ (line 86) | def __init__(self, job_id, vocab_file, output_dir, max_seq_length, method write_examples (line 101) | def write_examples(self, input_file): method finish (line 115) | def finish(self): function write_examples (line 120) | def write_examples(job_id, args): function main (line 155) | def main(): FILE: code/electra-pretrain/configure_pretraining.py class PretrainingConfig (line 25) | class PretrainingConfig(object): method __init__ (line 28) | def __init__(self, model_name, data_dir, **kwargs): method update (line 128) | def update(self, kwargs): FILE: code/electra-pretrain/model/modeling.py class BertConfig (line 36) | class BertConfig(object): method __init__ (line 39) | def __init__(self, method from_dict (line 88) | def from_dict(cls, json_object): method from_json_file (line 96) | def from_json_file(cls, json_file): method to_dict (line 102) | def to_dict(self): method to_json_string (line 107) | def to_json_string(self): class BertModel (line 112) | class BertModel(object): method __init__ (line 137) | def __init__(self, method get_pooled_output (line 258) | def get_pooled_output(self): method get_sequence_output (line 261) | def get_sequence_output(self): method get_all_encoder_layers (line 270) | def get_all_encoder_layers(self): method get_embedding_output (line 273) | def get_embedding_output(self): method get_embedding_table (line 284) | def get_embedding_table(self): function gelu (line 288) | def gelu(input_tensor): function get_activation (line 304) | def get_activation(activation_string): function get_assignment_map_from_checkpoint (line 341) | def get_assignment_map_from_checkpoint(tvars, init_checkpoint, prefix=""... function dropout (line 367) | def dropout(input_tensor, dropout_prob): function layer_norm (line 385) | def layer_norm(input_tensor, name=None): function layer_norm_and_dropout (line 391) | def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): function create_initializer (line 398) | def create_initializer(initializer_range=0.02): function load_pretrained_embedding (line 402) | def load_pretrained_embedding(embedding_file, vocab_size, embedding_size): function embedding_lookup (line 415) | def embedding_lookup(input_ids, function embedding_postprocessor (line 484) | def embedding_postprocessor(input_tensor, function create_attention_mask_from_input_mask (line 580) | def create_attention_mask_from_input_mask(from_tensor, to_mask): function attention_layer (line 614) | def attention_layer(from_tensor, function transformer_model (line 810) | def transformer_model(input_tensor, function get_shape_list (line 947) | def get_shape_list(tensor, expected_rank=None, name=None): function reshape_to_matrix (line 992) | def reshape_to_matrix(input_tensor): function reshape_from_matrix (line 1006) | def reshape_from_matrix(output_tensor, orig_shape_list): function assert_rank (line 1019) | def assert_rank(tensor, expected_rank, name=None): FILE: code/electra-pretrain/model/optimization.py function create_optimizer (line 30) | def create_optimizer( class AdamWeightDecayOptimizer (line 151) | class AdamWeightDecayOptimizer(tf.train.Optimizer): method __init__ (line 154) | def __init__(self, method _apply_gradients (line 172) | def _apply_gradients(self, grads_and_vars, learning_rate): method apply_gradients (line 223) | def apply_gradients(self, grads_and_vars, global_step=None, name=None): method _do_use_weight_decay (line 244) | def _do_use_weight_decay(self, param_name): method _get_variable_name (line 254) | def _get_variable_name(self, param_name): function _get_layer_lrs (line 262) | def _get_layer_lrs(learning_rate, layer_decay, n_layers): FILE: code/electra-pretrain/model/tokenization.py function convert_to_unicode (line 29) | def convert_to_unicode(text): function printable_text (line 49) | def printable_text(text): function load_vocab (line 72) | def load_vocab(vocab_file): function convert_by_vocab (line 87) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 95) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 99) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 103) | def whitespace_tokenize(text): class SimpleTokenizer (line 111) | class SimpleTokenizer(object): method __init__ (line 112) | def __init__(self, vocab_file): method tokenize (line 116) | def tokenize(self, text): method convert_tokens_to_ids (line 120) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 123) | def convert_ids_to_tokens(self, ids): class FullTokenizer (line 127) | class FullTokenizer(object): method __init__ (line 130) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 136) | def tokenize(self, text): method convert_tokens_to_ids (line 144) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 147) | def convert_ids_to_tokens(self, ids): class BasicTokenizer (line 151) | class BasicTokenizer(object): method __init__ (line 154) | def __init__(self, do_lower_case=True): method tokenize (line 162) | def tokenize(self, text): method _run_strip_accents (line 186) | def _run_strip_accents(self, text): method _run_split_on_punc (line 197) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 217) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 230) | def _is_chinese_char(self, cp): method _clean_text (line 252) | def _clean_text(self, text): class WordpieceTokenizer (line 266) | class WordpieceTokenizer(object): method __init__ (line 269) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 274) | def tokenize(self, text): function _is_whitespace (line 328) | def _is_whitespace(char): function _is_control (line 340) | def _is_control(char): function _is_punctuation (line 352) | def _is_punctuation(char): FILE: code/electra-pretrain/pretrain/pretrain_data.py function get_input_fn (line 32) | def get_input_fn(config: configure_pretraining.PretrainingConfig, is_tra... function _decode_record (line 83) | def _decode_record(record, name_to_features): function features_to_inputs (line 105) | def features_to_inputs(features): function get_updated_inputs (line 119) | def get_updated_inputs(inputs, **kwargs): function print_tokens (line 134) | def print_tokens(inputs: Inputs, inv_vocab, updates_mask=None): FILE: code/electra-pretrain/pretrain/pretrain_helpers.py function gather_positions (line 33) | def gather_positions(sequence, positions): function scatter_update (line 62) | def scatter_update(sequence, updates, positions): function _get_candidates_mask (line 118) | def _get_candidates_mask(inputs: pretrain_data.Inputs, vocab, function mask (line 131) | def mask(config: configure_pretraining.PretrainingConfig, function unmask (line 203) | def unmask(inputs: pretrain_data.Inputs): function sample_from_softmax (line 209) | def sample_from_softmax(logits, disallow=None): FILE: code/electra-pretrain/run_pretraining.py class PretrainingModel (line 37) | class PretrainingModel(object): method __init__ (line 40) | def __init__(self, config: configure_pretraining.PretrainingConfig, method _get_masked_lm_output (line 145) | def _get_masked_lm_output(self, inputs: pretrain_data.Inputs, model): method _get_discriminator_output (line 193) | def _get_discriminator_output(self, inputs, discriminator, labels): method _get_fake_data (line 220) | def _get_fake_data(self, inputs, mlm_logits): method _build_transformer (line 240) | def _build_transformer(self, inputs: pretrain_data.Inputs, is_training, function get_generator_config (line 258) | def get_generator_config(config: configure_pretraining.PretrainingConfig, function model_fn_builder (line 271) | def model_fn_builder(config: configure_pretraining.PretrainingConfig): function train_or_eval (line 332) | def train_or_eval(config: configure_pretraining.PretrainingConfig): function train_one_step (line 395) | def train_one_step(config: configure_pretraining.PretrainingConfig): function main (line 406) | def main(): FILE: code/electra-pretrain/util/training_utils.py class ETAHook (line 31) | class ETAHook(tf.estimator.SessionRunHook): method __init__ (line 34) | def __init__(self, to_log, n_steps, iterations_per_loop, on_tpu, method begin (line 49) | def begin(self): method before_run (line 52) | def before_run(self, run_context): method after_run (line 57) | def after_run(self, run_context, run_values): method end (line 65) | def end(self, session): method log (line 70) | def log(self, run_values=None): function secs_to_str (line 91) | def secs_to_str(secs): function get_bert_config (line 100) | def get_bert_config(config): FILE: code/electra-pretrain/util/utils.py function load_json (line 29) | def load_json(path): function write_json (line 34) | def write_json(o, path): function load_pickle (line 41) | def load_pickle(path): function write_pickle (line 46) | def write_pickle(o, path): function mkdir (line 53) | def mkdir(path): function rmrf (line 58) | def rmrf(path): function rmkdir (line 63) | def rmkdir(path): function log (line 68) | def log(*args): function log_config (line 74) | def log_config(config): function heading (line 80) | def heading(*args): function nest_dict (line 86) | def nest_dict(d, prefixes, delim="_"): function flatten_dict (line 100) | def flatten_dict(d, delim="_"): FILE: code/modeling.py function layer_norm (line 30) | def layer_norm(input_tensor, name=None): function scale_l2 (line 36) | def scale_l2(x, norm_length=1.0): class BertConfig (line 48) | class BertConfig(object): method __init__ (line 51) | def __init__(self, method from_dict (line 100) | def from_dict(cls, json_object): method from_json_file (line 108) | def from_json_file(cls, json_file): method to_dict (line 114) | def to_dict(self): method to_json_string (line 119) | def to_json_string(self): class BertModel (line 124) | class BertModel(object): method __init__ (line 148) | def __init__(self, method get_pooled_output (line 275) | def get_pooled_output(self): method get_sequence_output (line 278) | def get_sequence_output(self): method get_all_encoder_layers (line 287) | def get_all_encoder_layers(self): method get_position_embedding_output (line 290) | def get_position_embedding_output(self): method get_embedding_output (line 293) | def get_embedding_output(self): method get_embedding_table (line 304) | def get_embedding_table(self): function gelu (line 308) | def gelu(input_tensor): function get_activation (line 324) | def get_activation(activation_string): function get_assignment_map_from_checkpoint (line 361) | def get_assignment_map_from_checkpoint(tvars, init_checkpoint, ignore_na... function dropout (line 393) | def dropout(input_tensor, dropout_prob): function layer_norm_and_dropout (line 411) | def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): function create_initializer (line 417) | def create_initializer(initializer_range=0.02): function load_pretrained_embedding (line 421) | def load_pretrained_embedding(embedding_file, vocab_size, embedding_size): function embedding_lookup (line 434) | def embedding_lookup(input_ids, function embedding_postprocessor (line 501) | def embedding_postprocessor(input_tensor, function create_attention_mask_from_input_mask (line 597) | def create_attention_mask_from_input_mask(from_tensor, to_mask): function attention_layer (line 631) | def attention_layer(from_tensor, function transformer_model (line 827) | def transformer_model(input_tensor, function get_shape_list (line 968) | def get_shape_list(tensor, expected_rank=None, name=None): function reshape_to_matrix (line 1005) | def reshape_to_matrix(input_tensor): function reshape_from_matrix (line 1019) | def reshape_from_matrix(output_tensor, orig_shape_list): function assert_rank (line 1032) | def assert_rank(tensor, expected_rank, name=None): FILE: code/optimization.py function create_optimizer (line 24) | def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, h... class AdamWeightDecayOptimizer (line 169) | class AdamWeightDecayOptimizer(tf.train.Optimizer): method __init__ (line 172) | def __init__(self, method _apply_gradients (line 192) | def _apply_gradients(self, grads_and_vars, learning_rate): method apply_gradients (line 243) | def apply_gradients(self, grads_and_vars, global_step=None, name=None): method _do_use_weight_decay (line 262) | def _do_use_weight_decay(self, param_name): method _get_variable_name (line 272) | def _get_variable_name(self, param_name): FILE: code/pipeline.py class Timer (line 21) | class Timer(object): method __init__ (line 22) | def __init__(self): method get_current_time (line 25) | def get_current_time(self): function train_model (line 28) | def train_model(args,cmd): FILE: code/run_biaffine_ner.py class InputExample (line 175) | class InputExample(object): method __init__ (line 178) | def __init__(self, guid, text, label=None, raw_text=None): class InputFeatures (line 194) | class InputFeatures(object): method __init__ (line 197) | def __init__(self, input_ids, input_mask, segment_ids, span_mask, gold... function data_enhance (line 205) | def data_enhance(sentences, num=10): class DataProcessor (line 246) | class DataProcessor(object): method get_train_examples (line 249) | def get_train_examples(self, data_dir): method get_dev_examples (line 253) | def get_dev_examples(self, data_dir): method get_labels (line 257) | def get_labels(self): class NERProcessor (line 262) | class NERProcessor(DataProcessor): method __init__ (line 263) | def __init__(self, fold_id=0, fold_num=0): method get_train_examples (line 267) | def get_train_examples(self, data_dir, file_name='train.conll'): method get_dev_examples (line 307) | def get_dev_examples(self, data_dir, file_name="dev.conll"): method get_test_examples (line 325) | def get_test_examples(self, data_dir, file_name="final_test.txt"): method get_labels (line 339) | def get_labels(self): method check (line 344) | def check(self, text, label): function convert_single_example (line 358) | def convert_single_example(ex_index, example, label_list, max_seq_length... function filed_based_convert_examples_to_features (line 425) | def filed_based_convert_examples_to_features( function file_based_input_fn_builder (line 452) | def file_based_input_fn_builder(input_file, batch_size, seq_length, is_t... function biaffine_mapping (line 506) | def biaffine_mapping(vector_set_1, function create_model (line 590) | def create_model(bert_config, is_training, input_ids, input_mask, function focal_loss (line 672) | def focal_loss(logits, labels, gamma=2.0): function model_fn_builder (line 680) | def model_fn_builder(bert_config, num_labels, init_checkpoint=None, lear... function main (line 841) | def main(_): FILE: code/tokenization.py function convert_to_unicode (line 29) | def convert_to_unicode(text): function printable_text (line 49) | def printable_text(text): function load_vocab (line 72) | def load_vocab(vocab_file): function convert_by_vocab (line 87) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 95) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 99) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 103) | def whitespace_tokenize(text): class SimpleTokenizer (line 111) | class SimpleTokenizer(object): method __init__ (line 112) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 117) | def tokenize(self, text): method convert_tokens_to_ids (line 122) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 125) | def convert_ids_to_tokens(self, ids): class FullTokenizer (line 129) | class FullTokenizer(object): method __init__ (line 132) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 138) | def tokenize(self, text): method convert_tokens_to_ids (line 146) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 149) | def convert_ids_to_tokens(self, ids): class BasicTokenizer (line 153) | class BasicTokenizer(object): method __init__ (line 156) | def __init__(self, do_lower_case=True): method tokenize (line 164) | def tokenize(self, text): method _run_strip_accents (line 188) | def _run_strip_accents(self, text): method _run_split_on_punc (line 199) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 219) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 232) | def _is_chinese_char(self, cp): method _clean_text (line 254) | def _clean_text(self, text): class WordpieceTokenizer (line 268) | class WordpieceTokenizer(object): method __init__ (line 271) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 276) | def tokenize(self, text): function _is_whitespace (line 330) | def _is_whitespace(char): function _is_control (line 342) | def _is_control(char): function _is_punctuation (line 354) | def _is_punctuation(char): FILE: code/utils.py function normalize (line 16) | def normalize(text): function convert_data_format (line 21) | def convert_data_format(sentence): function convert_back_to_bio (line 53) | def convert_back_to_bio(entities,text): function iobes_iob (line 62) | def iobes_iob(tags): function iob_iobes (line 82) | def iob_iobes(tags): function read_data (line 106) | def read_data(fnames, zeros=False, lower=False): function iob2 (line 151) | def iob2(tags): function update_tag_scheme (line 172) | def update_tag_scheme(sentences, tag_scheme='iobes', convert_to_iob=False): function eval_ner (line 198) | def eval_ner(results, path, name): function convert_to_bio (line 217) | def convert_to_bio(tags): function get_biaffine_pred_prob (line 234) | def get_biaffine_pred_prob(text, span_scores, label_list): function get_biaffine_pred_ner (line 260) | def get_biaffine_pred_ner(text, span_scores, is_flat_ner=True): function get_biaffine_pred_ner_with_dp (line 297) | def get_biaffine_pred_ner_with_dp(text, span_scores, with_logits=True, t... class SWAHook (line 348) | class SWAHook(tf.train.SessionRunHook): method __init__ (line 349) | def __init__(self, swa_steps, start_swa_step, checkpoint_path): method begin (line 355) | def begin(self): method after_run (line 375) | def after_run(self, run_context, run_values): method end (line 386) | def end(self, session): class BestF1Exporter (line 390) | class BestF1Exporter(tf.estimator.Exporter): method __init__ (line 391) | def __init__(self, input_fn, examples, label_list, max_seq_length, dp=... method name (line 401) | def name(self): method get_biaffine_result (line 404) | def get_biaffine_result(self,estimator): method export (line 440) | def export(self, estimator, export_path, checkpoint_path, eval_result,...