SYMBOL INDEX (552 symbols across 71 files) FILE: entity_candidate/biencoder/biencoder_.py function load_biencoder (line 26) | def load_biencoder(params): class BiEncoderModule (line 32) | class BiEncoderModule(torch.nn.Module): method __init__ (line 33) | def __init__(self, params): method forward (line 51) | def forward( class BiEncoderRanker (line 73) | class BiEncoderRanker(torch.nn.Module): method __init__ (line 74) | def __init__(self, params, shared=None): method load_model (line 100) | def load_model(self, fname, cpu=False): method build_model (line 108) | def build_model(self): method get_optimizer (line 120) | def get_optimizer(self, optim_states=None, saved_optim_type=None): method encode_context (line 128) | def encode_context(self, cands): method encode_candidate (line 137) | def encode_candidate(self, cands): method score_candidate (line 147) | def score_candidate( method forward (line 296) | def forward(self, context_input, cand_input_A, cand_input_B=None, labe... function to_bert_input (line 335) | def to_bert_input(token_idx, null_idx): FILE: entity_candidate/biencoder/data_process_.py function select_field (line 21) | def select_field(data, key1, key2=None): function get_context_representation (line 28) | def get_context_representation( function get_candidate_representation (line 87) | def get_candidate_representation( function process_mention_data (line 121) | def process_mention_data( function process_mention_data_ (line 224) | def process_mention_data_( function process_mention_data__ (line 347) | def process_mention_data__( FILE: entity_candidate/biencoder/eval_biencoder_.py function load_entity_dict (line 28) | def load_entity_dict(logger, params, is_zeshel): function get_candidate_pool_tensor_zeshel (line 65) | def get_candidate_pool_tensor_zeshel( function get_candidate_pool_tensor_helper (line 86) | def get_candidate_pool_tensor_helper( function get_candidate_pool_tensor (line 109) | def get_candidate_pool_tensor( function encode_candidate (line 139) | def encode_candidate( function load_or_generate_candidate_pool (line 188) | def load_or_generate_candidate_pool( function make_ids (line 223) | def make_ids(data_path): function main (line 292) | def main(params): FILE: entity_candidate/biencoder/nn_prediction_.py function get_topk_predictions (line 18) | def get_topk_predictions( FILE: entity_candidate/biencoder/train_biencoder_.py function evaluate (line 45) | def evaluate(reranker, eval_dataloader, params, device, logger,): function evaluate_ (line 89) | def evaluate_(reranker, eval_dataloader, params, device, logger,): function get_optimizer (line 133) | def get_optimizer(model, params): function get_optimizer (line 141) | def get_optimizer(model, params): function get_scheduler (line 150) | def get_scheduler(params, optimizer, len_train_data, logger): function main (line 166) | def main(params): FILE: entity_candidate/biencoder/zeshel_utils.py function load_entity_dict_zeshel (line 36) | def load_entity_dict_zeshel(logger, params): class Stats (line 70) | class Stats(): method __init__ (line 71) | def __init__(self, top_k=1000): method add (line 80) | def add(self, idx): method extend (line 88) | def extend(self, stats): method output (line 93) | def output(self): FILE: entity_candidate/build_faiss_index.py function main (line 17) | def main(params): FILE: entity_candidate/candidate_data_fetcher.py function get_model (line 11) | def get_model(parameters): class Wikimedia_Data_Fetcher (line 15) | class Wikimedia_Data_Fetcher: method __init__ (line 16) | def __init__(self, path_to_data): method get_data_for_entity (line 19) | def get_data_for_entity(self, entity_data): FILE: entity_candidate/candidate_generation.py function get_model (line 15) | def get_model(params): class Candidate_Generator (line 19) | class Candidate_Generator: method __init__ (line 20) | def __init__(self, parameters=None): method get_candidates (line 23) | def get_candidates(self, mention_data): class BM45_Candidate_Generator (line 28) | class BM45_Candidate_Generator(Candidate_Generator): method __init__ (line 31) | def __init__(self, params): method _filter_result (line 48) | def _filter_result(self, cand, detailed=True): method get_candidates (line 68) | def get_candidates(self, mention_data): method process_mentions_for_candidate_generator (line 118) | def process_mentions_for_candidate_generator(sentences, mentions): method solr_escape (line 124) | def solr_escape(string): FILE: entity_candidate/candidate_ranking/bert_reranking.py class BertForReranking (line 26) | class BertForReranking(BertPreTrainedModel): method __init__ (line 95) | def __init__(self, config): method forward (line 103) | def forward( class BertReranker (line 160) | class BertReranker: method __init__ (line 161) | def __init__(self, parameters): method rerank (line 179) | def rerank(self, mentions, sentences): method get_scheduler_and_optimizer (line 231) | def get_scheduler_and_optimizer(self, parameters, train_tensor_data, l... method get_model (line 282) | def get_model(parameters): method get_tokenizer (line 296) | def get_tokenizer(parameters): method _get_candidate_representation (line 303) | def _get_candidate_representation( method _get_mention_context_end2end (line 337) | def _get_mention_context_end2end(mention, sentences): method _select_field (line 356) | def _select_field(samples, field): method _get_context_token_representation (line 363) | def _get_context_token_representation( method _process_mentions_for_model (line 393) | def _process_mentions_for_model( FILE: entity_candidate/candidate_ranking/evaluate.py function evaluate_model_on_dataset (line 20) | def evaluate_model_on_dataset( function evaluate (line 104) | def evaluate(parameters, logger=None): FILE: entity_candidate/candidate_ranking/train.py function main (line 41) | def main(parameters): FILE: entity_candidate/candidate_ranking/utils.py function new_read_dataset (line 24) | def new_read_dataset(dataset_name, preprocessed_json_data_parent_folder,... function filter_samples (line 101) | def filter_samples(samples, top_k, gold_key="gold_pos"): function _truncate_seq_pair (line 113) | def _truncate_seq_pair(tokens_a, tokens_b, max_length): function eval_precision_bm45_dataloader (line 125) | def eval_precision_bm45_dataloader(dataloader, ks=[1, 5, 10], number_of_... function accuracy (line 148) | def accuracy(out, labels): function remove_module_from_state_dict (line 153) | def remove_module_from_state_dict(state_dict): function save_model (line 161) | def save_model(model, tokenizer, output_dir): function get_logger (line 174) | def get_logger(output_dir=None): function write_to_file (line 201) | def write_to_file(path, string, mode="w"): function get_reranker (line 206) | def get_reranker(parameters): function get_biencoder (line 210) | def get_biencoder(parameters): FILE: entity_candidate/candidate_retrieval/candidate_generators.py function mention_data_summary (line 12) | def mention_data_summary(mention): class Simple_Candidate_Generator (line 16) | class Simple_Candidate_Generator: method __init__ (line 17) | def __init__(self, params): method _filter_result (line 37) | def _filter_result(self, cand): method get_candidates (line 59) | def get_candidates( class Pregenerated_Candidates_Data_Fetcher (line 119) | class Pregenerated_Candidates_Data_Fetcher: method __init__ (line 120) | def __init__(self, parameters): method get_candidates_data (line 132) | def get_candidates_data(self, candidates_wikidata_ids): method filter_result (line 145) | def filter_result(cand, detailed=True): method get_candidate_data_for_wikidata_id (line 165) | def get_candidate_data_for_wikidata_id(self, wikidata_id): FILE: entity_candidate/candidate_retrieval/data_ingestion.py function remove_all_docs (line 51) | def remove_all_docs(): function load_data (line 55) | def load_data(): function get_data_for_key (line 59) | def get_data_for_key(data, title): FILE: entity_candidate/candidate_retrieval/dataset.py function read_csv_file (line 18) | def read_csv_file(path, added_params): function read_conll_file (line 73) | def read_conll_file(data, path): function load_person_names (line 145) | def load_person_names(path): function find_coref (line 153) | def find_coref(ment, mentlist, person_names): function with_coref (line 174) | def with_coref(dataset, person_names): function eval (line 193) | def eval(testset, system_pred, nel=False): function get_candidate_generator (line 225) | def get_candidate_generator(added_params): class CoNLLDataset (line 235) | class CoNLLDataset: method __init__ (line 240) | def __init__(self, path, person_path, conll_path, added_params): class FetchCandidateEntities (line 300) | class FetchCandidateEntities(object): method __init__ (line 305) | def __init__(self, p_e_m_data_path="data/basic_data/p_e_m_data/"): method process (line 322) | def process(self, span): FILE: entity_candidate/candidate_retrieval/evaluator.py class Evaluator (line 12) | class Evaluator: method __init__ (line 13) | def __init__(self, data): method candidate_generation (line 16) | def candidate_generation( method candidate_generation_recall_at (line 92) | def candidate_generation_recall_at(self, ax=None, max_rank=None): FILE: entity_candidate/candidate_retrieval/perform_and_evaluate_candidate_retrieval_multithreaded.py function run_thread (line 26) | def run_thread(arguments): function split (line 61) | def split(a, n): function main (line 66) | def main(args): function get_parameters (line 157) | def get_parameters(args): FILE: entity_candidate/candidate_retrieval/utils.py function solr_escape (line 18) | def solr_escape(string): function get_wikidata_id_from_link_name (line 36) | def get_wikidata_id_from_link_name(link): function get_datasets (line 52) | def get_datasets(get_test_dataset=False, get_pregenereted_candidates_wik... function get_sent_context (line 119) | def get_sent_context(mention, key, solr_escaped=True): function get_list_of_mentions (line 156) | def get_list_of_mentions(dev_datasets): function write_candidate_generation_results_for_a_run_to_file (line 272) | def write_candidate_generation_results_for_a_run_to_file(run, results_du... function write_candidate_generation_execution_time_to_file (line 285) | def write_candidate_generation_execution_time_to_file( function write_candidate_generation_results_to_file (line 294) | def write_candidate_generation_results_to_file( FILE: entity_candidate/common/optimizer.py function get_bert_optimizer (line 37) | def get_bert_optimizer(models, type_optimization, learning_rate, fp16=Fa... function ellipse (line 82) | def ellipse(lst, max_display=5, sep='|'): FILE: entity_candidate/common/params.py class BlinkParser (line 21) | class BlinkParser(argparse.ArgumentParser): method __init__ (line 35) | def __init__( method add_blink_args (line 60) | def add_blink_args(self, args=None): method add_model_args (line 93) | def add_model_args(self, args=None): method add_training_args (line 168) | def add_training_args(self, args=None): method add_eval_args (line 237) | def add_eval_args(self, args=None): FILE: entity_candidate/common/ranker_base.py function get_model_obj (line 10) | def get_model_obj(model): class BertEncoder (line 14) | class BertEncoder(nn.Module): method __init__ (line 15) | def __init__( method forward (line 28) | def forward(self, token_ids, segment_ids, attention_mask): FILE: entity_candidate/crossencoder/crossencoder.py function load_crossencoder (line 36) | def load_crossencoder(params): class CrossEncoderModule (line 42) | class CrossEncoderModule(torch.nn.Module): method __init__ (line 43) | def __init__(self, params, tokenizer): method forward (line 59) | def forward( class CrossEncoderRanker (line 66) | class CrossEncoderRanker(torch.nn.Module): method __init__ (line 67) | def __init__(self, params, shared=None): method load_model (line 104) | def load_model(self, fname, cpu=False): method save (line 111) | def save(self, output_dir): method build_model (line 115) | def build_model(self): method save_model (line 118) | def save_model(self, output_dir): method get_optimizer (line 127) | def get_optimizer(self, optim_states=None, saved_optim_type=None): method score_candidate (line 135) | def score_candidate(self, text_vecs, context_len): method forward (line 146) | def forward(self, input_idx, label_input, context_len): function to_bert_input (line 152) | def to_bert_input(token_idx, null_idx, segment_pos): FILE: entity_candidate/crossencoder/data_process.py function prepare_crossencoder_mentions (line 17) | def prepare_crossencoder_mentions( function prepare_crossencoder_candidates (line 46) | def prepare_crossencoder_candidates( function filter_crossencoder_tensor_input (line 89) | def filter_crossencoder_tensor_input( function prepare_crossencoder_data (line 115) | def prepare_crossencoder_data( FILE: entity_candidate/crossencoder/train_cross.py function modify (line 43) | def modify(context_input, candidate_input, max_seq_length): function evaluate (line 63) | def evaluate(reranker, eval_dataloader, device, logger, context_length, ... function get_optimizer (line 134) | def get_optimizer(model, params): function get_scheduler (line 143) | def get_scheduler(params, optimizer, len_train_data, logger): function main (line 159) | def main(params): FILE: entity_candidate/indexer/faiss_indexer.py class DenseIndexer (line 22) | class DenseIndexer(object): method __init__ (line 23) | def __init__(self, buffer_size: int = 50000): method index_data (line 28) | def index_data(self, data: np.array): method search_knn (line 31) | def search_knn(self, query_vectors: np.array, top_docs: int): method serialize (line 34) | def serialize(self, index_file: str): method deserialize_from (line 38) | def deserialize_from(self, index_file: str): class DenseFlatIndexer (line 47) | class DenseFlatIndexer(DenseIndexer): method __init__ (line 48) | def __init__(self, vector_sz: int = 1, buffer_size: int = 50000): method index_data (line 52) | def index_data(self, data: np.array): method search_knn (line 65) | def search_knn(self, query_vectors, top_k): class DenseHNSWFlatIndexer (line 71) | class DenseHNSWFlatIndexer(DenseIndexer): method __init__ (line 76) | def __init__( method index_data (line 94) | def index_data(self, data: np.array): method search_knn (line 131) | def search_knn(self, query_vectors, top_k): method deserialize_from (line 138) | def deserialize_from(self, file: str): FILE: entity_candidate/main_dense.py function _print_colorful_text (line 42) | def _print_colorful_text(input_sentence, samples): function _print_colorful_prediction (line 65) | def _print_colorful_prediction( function _annotate (line 75) | def _annotate(ner_model, input_sentences): function _load_candidates (line 99) | def _load_candidates( function __map_test_entities (line 154) | def __map_test_entities(test_entities_path, title2id, logger): function __load_test (line 173) | def __load_test(test_filename, kb2id, wikipedia_id2local_id, logger): function _get_test_samples (line 210) | def _get_test_samples( function _process_biencoder_dataloader (line 220) | def _process_biencoder_dataloader(samples, tokenizer, biencoder_params): function _run_biencoder (line 237) | def _run_biencoder(biencoder, dataloader, candidate_encoding, top_k=100,... function _process_crossencoder_dataloader (line 263) | def _process_crossencoder_dataloader(context_input, label_input, crossen... function _run_crossencoder (line 272) | def _run_crossencoder(crossencoder, dataloader, logger, context_len, dev... function load_models (line 289) | def load_models(args, logger=None): function run (line 341) | def run( FILE: entity_candidate/main_solr.py function main (line 19) | def main(parameters): FILE: entity_candidate/ner.py function get_model (line 11) | def get_model(parameters=None): class NER_model (line 15) | class NER_model: method __init__ (line 16) | def __init__(self, parameters=None): method predict (line 19) | def predict(self, sents): class Flair (line 29) | class Flair(NER_model): method __init__ (line 30) | def __init__(self, parameters=None): method predict (line 33) | def predict(self, sentences): FILE: entity_candidate/reranker.py function get_model (line 10) | def get_model(params): FILE: entity_candidate/utils.py function read_sentences_from_file (line 15) | def read_sentences_from_file(path_to_file, one_sentence_per_line=True): function get_candidate_summary (line 34) | def get_candidate_summary(candidate): function present_sentence_mentions (line 42) | def present_sentence_mentions(sentence, mentions, output_file): function sentence_mentions_pairs (line 84) | def sentence_mentions_pairs(sentences, mentions): function present_annotated_sentences (line 103) | def present_annotated_sentences(sentences, mentions, output_file=None): function write_dicts_as_json_per_line (line 110) | def write_dicts_as_json_per_line(list_of_dicts, txt_file_path): function get_mentions_txt_file_path (line 120) | def get_mentions_txt_file_path(output_folder_path): function get_sentences_txt_file_path (line 128) | def get_sentences_txt_file_path(output_folder_path): function get_end2end_pickle_output_file_path (line 136) | def get_end2end_pickle_output_file_path(output_folder_path): function write_end2end_pickle_output (line 144) | def write_end2end_pickle_output(sentences, mentions, output_file_id): function get_end2end_pretty_output_file_path (line 150) | def get_end2end_pretty_output_file_path(output_folder_path): FILE: entity_candidate/utils/tokenization.py function convert_to_unicode (line 27) | def convert_to_unicode(text): function printable_text (line 47) | def printable_text(text): function load_vocab (line 70) | def load_vocab(vocab_file): function convert_by_vocab (line 85) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 95) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 99) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 103) | def whitespace_tokenize(text): class FullTokenizer (line 112) | class FullTokenizer(object): method __init__ (line 115) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 121) | def tokenize(self, text): method convert_tokens_to_ids (line 129) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 132) | def convert_ids_to_tokens(self, ids): class BasicTokenizer (line 136) | class BasicTokenizer(object): method __init__ (line 139) | def __init__(self, do_lower_case=True): method tokenize (line 147) | def tokenize(self, text): method _run_strip_accents (line 171) | def _run_strip_accents(self, text): method _run_split_on_punc (line 182) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 202) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 215) | def _is_chinese_char(self, cp): method _clean_text (line 237) | def _clean_text(self, text): class WordpieceTokenizer (line 251) | class WordpieceTokenizer(object): method __init__ (line 254) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 259) | def tokenize(self, text): function _is_whitespace (line 313) | def _is_whitespace(char): function _is_control (line 325) | def _is_control(char): function _is_punctuation (line 337) | def _is_punctuation(char): FILE: entity_candidate/utils/utils.py function timer (line 9) | def timer(func): function set_seed (line 27) | def set_seed(seed=123): function set_logger (line 39) | def set_logger(log_path): FILE: entity_sort/blink/biencoder/biencoder.py function load_biencoder (line 26) | def load_biencoder(params): class BiEncoderModule (line 32) | class BiEncoderModule(torch.nn.Module): method __init__ (line 33) | def __init__(self, params): method forward (line 51) | def forward( class BiEncoderRanker (line 73) | class BiEncoderRanker(torch.nn.Module): method __init__ (line 74) | def __init__(self, params, shared=None): method load_model (line 99) | def load_model(self, fname, cpu=False): method build_model (line 106) | def build_model(self): method get_optimizer (line 118) | def get_optimizer(self, optim_states=None, saved_optim_type=None): method encode_context (line 126) | def encode_context(self, cands): method encode_candidate (line 135) | def encode_candidate(self, cands): method score_candidate (line 148) | def score_candidate( method forward (line 188) | def forward(self, context_input, cand_input, label_input=None): function to_bert_input (line 203) | def to_bert_input(token_idx, null_idx): FILE: entity_sort/blink/biencoder/data_process.py function select_field (line 18) | def select_field(data, key1, key2=None): function get_context_representation (line 25) | def get_context_representation( function get_candidate_representation (line 84) | def get_candidate_representation( function process_mention_data (line 119) | def process_mention_data( FILE: entity_sort/blink/build_faiss_index.py function main (line 17) | def main(params): FILE: entity_sort/blink/candidate_ranking/bert_reranking.py class BertForReranking (line 26) | class BertForReranking(BertPreTrainedModel): method __init__ (line 95) | def __init__(self, config): method forward (line 103) | def forward( class BertReranker (line 160) | class BertReranker: method __init__ (line 161) | def __init__(self, parameters): method rerank (line 179) | def rerank(self, mentions, sentences): method get_scheduler_and_optimizer (line 231) | def get_scheduler_and_optimizer(self, parameters, train_tensor_data, l... method get_model (line 282) | def get_model(parameters): method get_tokenizer (line 296) | def get_tokenizer(parameters): method _get_candidate_representation (line 303) | def _get_candidate_representation( method _get_mention_context_end2end (line 337) | def _get_mention_context_end2end(mention, sentences): method _select_field (line 356) | def _select_field(samples, field): method _get_context_token_representation (line 363) | def _get_context_token_representation( method _process_mentions_for_model (line 393) | def _process_mentions_for_model( FILE: entity_sort/blink/candidate_ranking/evaluate.py function evaluate_model_on_dataset (line 20) | def evaluate_model_on_dataset( function evaluate (line 104) | def evaluate(parameters, logger=None): FILE: entity_sort/blink/candidate_ranking/train.py function main (line 41) | def main(parameters): FILE: entity_sort/blink/candidate_ranking/utils.py function new_read_dataset (line 24) | def new_read_dataset(dataset_name, preprocessed_json_data_parent_folder,... function filter_samples (line 101) | def filter_samples(samples, top_k, gold_key="gold_pos"): function _truncate_seq_pair (line 113) | def _truncate_seq_pair(tokens_a, tokens_b, max_length): function eval_precision_bm45_dataloader (line 125) | def eval_precision_bm45_dataloader(dataloader, ks=[1, 5, 10], number_of_... function accuracy (line 148) | def accuracy(out, labels): function remove_module_from_state_dict (line 153) | def remove_module_from_state_dict(state_dict): function save_model (line 161) | def save_model(model, tokenizer, output_dir): function get_logger (line 174) | def get_logger(output_dir=None): function write_to_file (line 201) | def write_to_file(path, string, mode="w"): function get_reranker (line 206) | def get_reranker(parameters): function get_biencoder (line 210) | def get_biencoder(parameters): FILE: entity_sort/blink/common/optimizer.py function get_bert_optimizer (line 37) | def get_bert_optimizer(models, type_optimization, learning_rate, fp16=Fa... function ellipse (line 82) | def ellipse(lst, max_display=5, sep='|'): FILE: entity_sort/blink/common/params.py class BlinkParser (line 21) | class BlinkParser(argparse.ArgumentParser): method __init__ (line 35) | def __init__( method add_blink_args (line 60) | def add_blink_args(self, args=None): method add_model_args (line 93) | def add_model_args(self, args=None): method add_training_args (line 168) | def add_training_args(self, args=None): method add_eval_args (line 237) | def add_eval_args(self, args=None): FILE: entity_sort/blink/common/ranker_base.py function get_model_obj (line 10) | def get_model_obj(model): class BertEncoder (line 14) | class BertEncoder(nn.Module): method __init__ (line 15) | def __init__( method forward (line 28) | def forward(self, token_ids, segment_ids, attention_mask): FILE: entity_sort/blink/indexer/faiss_indexer.py class DenseIndexer (line 22) | class DenseIndexer(object): method __init__ (line 23) | def __init__(self, buffer_size: int = 50000): method index_data (line 28) | def index_data(self, data: np.array): method search_knn (line 31) | def search_knn(self, query_vectors: np.array, top_docs: int): method serialize (line 34) | def serialize(self, index_file: str): method deserialize_from (line 38) | def deserialize_from(self, index_file: str): class DenseFlatIndexer (line 47) | class DenseFlatIndexer(DenseIndexer): method __init__ (line 48) | def __init__(self, vector_sz: int = 1, buffer_size: int = 50000): method index_data (line 52) | def index_data(self, data: np.array): method search_knn (line 65) | def search_knn(self, query_vectors, top_k): class DenseHNSWFlatIndexer (line 71) | class DenseHNSWFlatIndexer(DenseIndexer): method __init__ (line 76) | def __init__( method index_data (line 94) | def index_data(self, data: np.array): method search_knn (line 131) | def search_knn(self, query_vectors, top_k): method deserialize_from (line 138) | def deserialize_from(self, file: str): FILE: entity_sort/blink/utils/tokenization.py function convert_to_unicode (line 27) | def convert_to_unicode(text): function printable_text (line 47) | def printable_text(text): function load_vocab (line 70) | def load_vocab(vocab_file): function convert_by_vocab (line 85) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 95) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 99) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 103) | def whitespace_tokenize(text): class FullTokenizer (line 112) | class FullTokenizer(object): method __init__ (line 115) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 121) | def tokenize(self, text): method convert_tokens_to_ids (line 129) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 132) | def convert_ids_to_tokens(self, ids): class BasicTokenizer (line 136) | class BasicTokenizer(object): method __init__ (line 139) | def __init__(self, do_lower_case=True): method tokenize (line 147) | def tokenize(self, text): method _run_strip_accents (line 171) | def _run_strip_accents(self, text): method _run_split_on_punc (line 182) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 202) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 215) | def _is_chinese_char(self, cp): method _clean_text (line 237) | def _clean_text(self, text): class WordpieceTokenizer (line 251) | class WordpieceTokenizer(object): method __init__ (line 254) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 259) | def tokenize(self, text): function _is_whitespace (line 313) | def _is_whitespace(char): function _is_control (line 325) | def _is_control(char): function _is_punctuation (line 337) | def _is_punctuation(char): FILE: entity_sort/blink/utils/utils.py function timer (line 9) | def timer(func): function set_seed (line 27) | def set_seed(seed=123): function set_logger (line 39) | def set_logger(log_path): FILE: entity_sort/el_config.py class Args (line 4) | class Args: method parse (line 6) | def parse(): method initialize (line 11) | def initialize(parser): method get_parser (line 71) | def get_parser(self): FILE: entity_sort/el_dataset.py class ELDataset (line 10) | class ELDataset(Dataset): method __init__ (line 11) | def __init__(self, features): # 特征向量 method __len__ (line 15) | def __len__(self): method __getitem__ (line 18) | def __getitem__(self, index): FILE: entity_sort/el_main.py function _load_candidates (line 48) | def _load_candidates(faiss_index=None, index_path=None): function load_models (line 64) | def load_models(args): function _process_biencoder_dataloader (line 83) | def _process_biencoder_dataloader(samples, tokenizer, biencoder_params): function _run_biencoder (line 96) | def _run_biencoder(biencoder, dataloader, top_k=10, indexer=None): function _run_biencoder_test (line 113) | def _run_biencoder_test(biencoder, dataloader, entity_to_ids, subject_id... class Trainer (line 183) | class Trainer: method __init__ (line 184) | def __init__(self, args, train_loader, dev_loader, test_loader): method configure_optimizers (line 201) | def configure_optimizers(self): method load_ckp (line 226) | def load_ckp(self, model, checkpoint_path): method save_ckp (line 233) | def save_ckp(self, state, checkpoint_path): method train (line 245) | def train(self): method dev (line 344) | def dev(self): method test (line 368) | def test(self, checkpoint_path): method convert_example_to_feature (line 397) | def convert_example_to_feature(self, method predict (line 505) | def predict(self, method get_metrics (line 665) | def get_metrics(self, outputs, targets): method get_classification_report (line 672) | def get_classification_report(self, outputs, targets): FILE: entity_sort/el_models.py class BertForEntityLinking (line 12) | class BertForEntityLinking(nn.Module): method _forward_unimplemented (line 13) | def _forward_unimplemented(self, *input: Any) -> None: method __init__ (line 16) | def __init__(self, args): method find_entity_span (line 45) | def find_entity_span(self, entity_mask, flag=1): method forward (line 52) | def forward(self, token_ids, attention_masks, token_type_ids, seq_labe... class Args (line 98) | class Args: FILE: entity_sort/el_preprocess.py class InputExample (line 20) | class InputExample: method __init__ (line 21) | def __init__(self, set_type, text, seq_label, entity_label): class BaseFeature (line 28) | class BaseFeature: method __init__ (line 29) | def __init__(self, token_ids, attention_masks, token_type_ids): class BertFeature (line 35) | class BertFeature(BaseFeature): method __init__ (line 36) | def __init__(self, token_ids, attention_masks, token_type_ids, seq_lab... class ELProcessor (line 46) | class ELProcessor: method __init__ (line 47) | def __init__(self): method read_json (line 53) | def read_json(self, path): method get_result (line 58) | def get_result(self, lines, set_type): method get_text_pair (line 90) | def get_text_pair(self, word, kb_id, text): method get_info (line 124) | def get_info(self, subject_id, maxlength): function convert_bert_example (line 166) | def convert_bert_example(ex_idx, example: InputExample, tokenizer: BertT... function convert_examples_to_features (line 269) | def convert_examples_to_features(examples, max_seq_len, bert_dir): function split_train_test (line 302) | def split_train_test(examples, train_rate): function get_out (line 317) | def get_out(processor, txt_path, args, mode): FILE: entity_sort/el_processor.py class ELProcessor (line 10) | class ELProcessor: method __init__ (line 11) | def __init__(self): method read_json (line 17) | def read_json(self, path): # 读取train.json method get_result (line 22) | def get_result(self, lines): method get_text_pair (line 45) | def get_text_pair(self, word, kb_id, text): method get_info (line 72) | def get_info(self, subject_id): FILE: entity_sort/el_service.py function dict_to_obj (line 15) | def dict_to_obj(dictObj): class EntityLinking (line 25) | class EntityLinking: method __init__ (line 26) | def __init__(self): method predict (line 45) | def predict(self, text): method parse_result (line 58) | def parse_result(self, result): function get_result (line 75) | def get_result(): FILE: entity_sort/my_jieba/__init__.py function setLogLevel (line 54) | def setLogLevel(log_level): class Tokenizer (line 58) | class Tokenizer(object): method __init__ (line 60) | def __init__(self, dictionary=DEFAULT_DICT): method __repr__ (line 73) | def __repr__(self): method gen_pfdict (line 77) | def gen_pfdict(f): method initialize (line 99) | def initialize(self, dictionary=None): method check_initialized (line 174) | def check_initialized(self): method calc (line 178) | def calc(self, sentence, DAG, route): method get_DAG (line 186) | def get_DAG(self, sentence): method __cut_all (line 204) | def __cut_all(self, sentence): method __cut_DAG_NO_HMM (line 232) | def __cut_DAG_NO_HMM(self, sentence): method __cut_DAG (line 255) | def __cut_DAG(self, sentence): method cut (line 295) | def cut(self, sentence, cut_all=False, HMM=True, use_paddle=False): method cut_for_search (line 344) | def cut_for_search(self, sentence, HMM=True): method lcut (line 362) | def lcut(self, *args, **kwargs): method lcut_for_search (line 365) | def lcut_for_search(self, *args, **kwargs): method _lcut_no_hmm (line 371) | def _lcut_no_hmm(self, sentence): method _lcut_all (line 374) | def _lcut_all(self, sentence): method _lcut_for_search_no_hmm (line 377) | def _lcut_for_search_no_hmm(self, sentence): method get_dict_file (line 380) | def get_dict_file(self): method load_userdict (line 386) | def load_userdict(self, f): method add_word (line 426) | def add_word(self, word, freq=None, tag=None): method del_word (line 449) | def del_word(self, word): method suggest_freq (line 455) | def suggest_freq(self, segment, tune=False): method tokenize (line 486) | def tokenize(self, unicode_sentence, mode="default", HMM=True): method set_dictionary (line 519) | def set_dictionary(self, dictionary_path): function _lcut_all (line 552) | def _lcut_all(s): function _lcut (line 556) | def _lcut(s): function _lcut_no_hmm (line 560) | def _lcut_no_hmm(s): function _lcut_all (line 564) | def _lcut_all(s): function _lcut_for_search (line 568) | def _lcut_for_search(s): function _lcut_for_search_no_hmm (line 572) | def _lcut_for_search_no_hmm(s): function _pcut (line 576) | def _pcut(sentence, cut_all=False, HMM=True): function _pcut_for_search (line 589) | def _pcut_for_search(sentence, HMM=True): function enable_parallel (line 600) | def enable_parallel(processnum=None): function disable_parallel (line 623) | def disable_parallel(): FILE: entity_sort/my_jieba/__main__.py function cutfunc (line 34) | def cutfunc(sentence, _, HMM=True): FILE: entity_sort/my_jieba/_compat.py function setLogLevel (line 11) | def setLogLevel(log_level): function enable_paddle (line 27) | def enable_paddle(): function strdecode (line 76) | def strdecode(sentence): function resolve_filename (line 85) | def resolve_filename(f): FILE: entity_sort/my_jieba/analyse/__init__.py function set_stop_words (line 16) | def set_stop_words(stop_words_path): FILE: entity_sort/my_jieba/analyse/analyzer.py class ChineseTokenizer (line 19) | class ChineseTokenizer(Tokenizer): method __call__ (line 21) | def __call__(self, text, **kargs): function ChineseAnalyzer (line 34) | def ChineseAnalyzer(stoplist=STOP_WORDS, minsize=1, stemfn=stem, cachesi... FILE: entity_sort/my_jieba/analyse/textrank.py class UndirectWeightedGraph (line 13) | class UndirectWeightedGraph: method __init__ (line 16) | def __init__(self): method addEdge (line 19) | def addEdge(self, start, end, weight): method rank (line 24) | def rank(self): class TextRank (line 57) | class TextRank(KeywordExtractor): method __init__ (line 59) | def __init__(self): method pairfilter (line 65) | def pairfilter(self, wp): method textrank (line 69) | def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns'... FILE: entity_sort/my_jieba/analyse/tfidf.py class KeywordExtractor (line 15) | class KeywordExtractor(object): method set_stop_words (line 23) | def set_stop_words(self, stop_words_path): method extract_tags (line 31) | def extract_tags(self, *args, **kwargs): class IDFLoader (line 35) | class IDFLoader(object): method __init__ (line 37) | def __init__(self, idf_path=None): method set_new_path (line 44) | def set_new_path(self, new_idf_path): method get_idf (line 55) | def get_idf(self): class TFIDF (line 59) | class TFIDF(KeywordExtractor): method __init__ (line 61) | def __init__(self, idf_path=None): method set_idf_path (line 68) | def set_idf_path(self, idf_path): method extract_tags (line 75) | def extract_tags(self, sentence, topK=20, withWeight=False, allowPOS=(... FILE: entity_sort/my_jieba/finalseg/__init__.py function load_model (line 23) | def load_model(): function viterbi (line 37) | def viterbi(obs, states, start_p, trans_p, emit_p): function __cut (line 59) | def __cut(sentence): function add_force_split (line 81) | def add_force_split(word): function cut (line 85) | def cut(sentence): FILE: entity_sort/my_jieba/lac_small/creator.py function create_model (line 28) | def create_model(vocab_size, num_labels, mode='train'): FILE: entity_sort/my_jieba/lac_small/nets.py function lex_net (line 25) | def lex_net(word, vocab_size, num_labels, for_infer=True, target=None): FILE: entity_sort/my_jieba/lac_small/predict.py function get_sent (line 51) | def get_sent(str1): function get_result (line 68) | def get_result(str1): FILE: entity_sort/my_jieba/lac_small/reader_small.py function load_kv_dict (line 24) | def load_kv_dict(dict_path, class Dataset (line 50) | class Dataset(object): method __init__ (line 52) | def __init__(self): method vocab_size (line 66) | def vocab_size(self): method num_labels (line 71) | def num_labels(self): method word_to_ids (line 75) | def word_to_ids(self, words): method label_to_ids (line 85) | def label_to_ids(self, labels): method get_vars (line 95) | def get_vars(self,str1): FILE: entity_sort/my_jieba/lac_small/utils.py function str2bool (line 25) | def str2bool(v): function parse_result (line 33) | def parse_result(words, crf_decode, dataset): function parse_padding_result (line 76) | def parse_padding_result(words, crf_decode, seq_lens, dataset): function init_checkpoint (line 122) | def init_checkpoint(exe, init_checkpoint_path, main_program): FILE: entity_sort/my_jieba/posseg/__init__.py function load_model (line 27) | def load_model(): class pair (line 45) | class pair(object): method __init__ (line 47) | def __init__(self, word, flag): method __unicode__ (line 51) | def __unicode__(self): method __repr__ (line 54) | def __repr__(self): method __str__ (line 57) | def __str__(self): method __iter__ (line 63) | def __iter__(self): method __lt__ (line 66) | def __lt__(self, other): method __eq__ (line 69) | def __eq__(self, other): method __hash__ (line 72) | def __hash__(self): method encode (line 75) | def encode(self, arg): class POSTokenizer (line 79) | class POSTokenizer(object): method __init__ (line 81) | def __init__(self, tokenizer=None): method __repr__ (line 85) | def __repr__(self): method __getattr__ (line 88) | def __getattr__(self, name): method initialize (line 94) | def initialize(self, dictionary=None): method load_word_tag (line 98) | def load_word_tag(self, f): method makesure_userdict_loaded (line 114) | def makesure_userdict_loaded(self): method __cut (line 119) | def __cut(self, sentence): method __cut_detail (line 137) | def __cut_detail(self, sentence): method __cut_DAG_NO_HMM (line 154) | def __cut_DAG_NO_HMM(self, sentence): method __cut_DAG (line 177) | def __cut_DAG(self, sentence): method __cut_internal (line 217) | def __cut_internal(self, sentence, HMM=True): method _lcut_internal (line 244) | def _lcut_internal(self, sentence): method _lcut_internal_no_hmm (line 247) | def _lcut_internal_no_hmm(self, sentence): method cut (line 250) | def cut(self, sentence, HMM=True): method lcut (line 254) | def lcut(self, *args, **kwargs): function _lcut_internal (line 267) | def _lcut_internal(s): function _lcut_internal_no_hmm (line 271) | def _lcut_internal_no_hmm(s): function cut (line 275) | def cut(sentence, HMM=True, use_paddle=False): function lcut (line 309) | def lcut(sentence, HMM=True, use_paddle=False): FILE: entity_sort/my_jieba/posseg/viterbi.py function get_top_states (line 10) | def get_top_states(t_state_v, K=4): function viterbi (line 14) | def viterbi(obs, states, start_p, trans_p, emit_p): FILE: entity_sort/service_main.py class Trainer (line 7) | class Trainer: method __init__ (line 8) | def __init__(self, args): method load_ckp (line 16) | def load_ckp(self, model, checkpoint_path): method save_ckp (line 23) | def save_ckp(self, state, checkpoint_path): method convert_example_to_feature (line 27) | def convert_example_to_feature(self0, method predict (line 93) | def predict(self, FILE: entity_sort/utils/tokenization.py function convert_to_unicode (line 27) | def convert_to_unicode(text): function printable_text (line 47) | def printable_text(text): function load_vocab (line 70) | def load_vocab(vocab_file): function convert_by_vocab (line 85) | def convert_by_vocab(vocab, items): function convert_tokens_to_ids (line 95) | def convert_tokens_to_ids(vocab, tokens): function convert_ids_to_tokens (line 99) | def convert_ids_to_tokens(inv_vocab, ids): function whitespace_tokenize (line 103) | def whitespace_tokenize(text): class FullTokenizer (line 112) | class FullTokenizer(object): method __init__ (line 115) | def __init__(self, vocab_file, do_lower_case=True): method tokenize (line 121) | def tokenize(self, text): method convert_tokens_to_ids (line 129) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 132) | def convert_ids_to_tokens(self, ids): class BasicTokenizer (line 136) | class BasicTokenizer(object): method __init__ (line 139) | def __init__(self, do_lower_case=True): method tokenize (line 147) | def tokenize(self, text): method _run_strip_accents (line 171) | def _run_strip_accents(self, text): method _run_split_on_punc (line 182) | def _run_split_on_punc(self, text): method _tokenize_chinese_chars (line 202) | def _tokenize_chinese_chars(self, text): method _is_chinese_char (line 215) | def _is_chinese_char(self, cp): method _clean_text (line 237) | def _clean_text(self, text): class WordpieceTokenizer (line 251) | class WordpieceTokenizer(object): method __init__ (line 254) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=... method tokenize (line 259) | def tokenize(self, text): function _is_whitespace (line 313) | def _is_whitespace(char): function _is_control (line 325) | def _is_control(char): function _is_punctuation (line 337) | def _is_punctuation(char): FILE: entity_sort/utils/utils.py function timer (line 9) | def timer(func): function set_seed (line 27) | def set_seed(seed=123): function set_logger (line 39) | def set_logger(log_path):