SYMBOL INDEX (280 symbols across 36 files) FILE: code/Attentive_BiLSTM/HAN.py class Embeeding_Attn (line 16) | class Embeeding_Attn(nn.Module): method __init__ (line 17) | def __init__(self): method forward (line 42) | def forward(self,x): class Word_Attn (line 59) | class Word_Attn(nn.Module): method __init__ (line 60) | def __init__(self): method forward (line 85) | def forward(self,x): FILE: code/Attentive_BiLSTM/Word_Freqency_Mapper.py class Word_Freqency_Mapper (line 5) | class Word_Freqency_Mapper: method __init__ (line 7) | def __init__(self,bins=100, w=5.0): method Find_Freq_Vector_for_words (line 15) | def Find_Freq_Vector_for_words(self): method Read_File (line 47) | def Read_File(self, ip_file): method Read_Test_Data (line 106) | def Read_Test_Data(self, input_test_file): method Read_Dev_Data (line 114) | def Read_Dev_Data(self, input_dev_file): method Find_Train_Data_Freq (line 123) | def Find_Train_Data_Freq(self, input_train_file): method Find_Gaussian_Bining_For_Training_Data_Freq (line 136) | def Find_Gaussian_Bining_For_Training_Data_Freq(self): method Write_Freq_To_File (line 146) | def Write_Freq_To_File(self,output_file): FILE: code/Attentive_BiLSTM/conlleval_py.py function endOfChunk (line 88) | def endOfChunk(prevTag, tag, prevType, type_): function startOfChunk (line 113) | def startOfChunk(prevTag, tag, prevType, type_): function calcMetrics (line 137) | def calcMetrics(TP, P, T, percent=True): function splitTag (line 150) | def splitTag(chunkTag, oTag = "O", raw = False): function countChunks (line 167) | def countChunks(args,inputFile): function evaluate (line 246) | def evaluate(correctChunk, foundGuessed, foundCorrect, correctTags, toke... function evaluate_conll_file (line 336) | def evaluate_conll_file(inputFile="conll_output.txt", to_tsv=False, tsv_... FILE: code/Attentive_BiLSTM/gaussian_binner.py function gaussian (line 6) | def gaussian(diff, sig): class GaussianBinner (line 10) | class GaussianBinner: method __init__ (line 12) | def __init__(self, bins=10, w=0.2): method fit (line 18) | def fit(self, x, features_to_be_binned): method transform (line 34) | def transform(self, x, features_to_be_binned): FILE: code/Attentive_BiLSTM/loader_so.py function unicodeToAscii (line 24) | def unicodeToAscii(s): function load_sentences_so (line 32) | def load_sentences_so(path, lower, zeros, merge_tag,set_of_selected_tags): function load_sentences_so_w_pred (line 122) | def load_sentences_so_w_pred(path_main_file, path_segmenter_pred_file, ... function load_sentences_conll (line 265) | def load_sentences_conll(path, lower, zeros): function update_tag_scheme (line 289) | def update_tag_scheme(sentences, tag_scheme): function word_mapping (line 319) | def word_mapping(sentences, lower): function char_mapping (line 339) | def char_mapping(sentences): function tag_mapping (line 352) | def tag_mapping(sentences): function cap_feature (line 365) | def cap_feature(s): function hand_features_to_idx (line 383) | def hand_features_to_idx(sentences): function prepare_sentence (line 394) | def prepare_sentence(str_words, word_to_id, char_to_id, lower=False): function seg_pred_to_idx (line 411) | def seg_pred_to_idx(sentence): function seg_pred_to_idx_prev (line 424) | def seg_pred_to_idx_prev(sentence): function ctc_pred_to_idx (line 443) | def ctc_pred_to_idx(sentence, ctc_pred_dict): function ner_pred_to_idx (line 456) | def ner_pred_to_idx(sentence, tag_to_id): function prepare_dataset (line 469) | def prepare_dataset(sentences, word_to_id, char_to_id, tag_to_id, ctc_pr... function augment_with_pretrained (line 509) | def augment_with_pretrained(dictionary, ext_emb_path, words): function pad_seq (line 560) | def pad_seq(seq, max_length, PAD_token=0): function get_batch (line 565) | def get_batch(start, batch_size, datas, singletons=[]): function random_batch (line 614) | def random_batch(batch_size, train_data, singletons=[]): FILE: code/Attentive_BiLSTM/make_segment_pred.py function read_file (line 4) | def read_file(ip_file): FILE: code/Attentive_BiLSTM/make_vocab.py function read_file (line 5) | def read_file(ip_file): FILE: code/Attentive_BiLSTM/model.py function to_scalar (line 25) | def to_scalar(var): function argmax (line 29) | def argmax(vec): function prepare_sequence (line 34) | def prepare_sequence(seq, to_ix): function log_sum_exp (line 40) | def log_sum_exp(vec): function _align_word (line 47) | def _align_word(input_matrix, word_pos_list=[1]): class BiLSTM_CRF (line 87) | class BiLSTM_CRF(nn.Module): method __init__ (line 89) | def __init__(self, vocab_size, tag_to_ix, embedding_dim, freq_embed_di... method _score_sentence (line 254) | def _score_sentence(self, feats, tags): method get_char_embedding (line 270) | def get_char_embedding(self, sentence, chars2, caps, chars2_length, d): method _get_lstm_features_w_elmo_and_char (line 318) | def _get_lstm_features_w_elmo_and_char(self, sentence_words, sentence,... method apply_attention (line 372) | def apply_attention(self, elmo_embeds, seg_embeds, ctc_embeds): method _get_lstm_features_w_elmo (line 400) | def _get_lstm_features_w_elmo(self, sentence_words, sentence, seg_pred... method _get_lstm_features_w_elmo_prev (line 462) | def _get_lstm_features_w_elmo_prev(self, sentence_words, sentence, mar... method _get_lstm_features (line 535) | def _get_lstm_features(self, sentence, markdown, chars2, caps, chars2_... method _forward_alg (line 603) | def _forward_alg(self, feats): method viterbi_decode (line 623) | def viterbi_decode(self, feats): method neg_log_likelihood (line 657) | def neg_log_likelihood(self, sentence_tokens, sentence, sentence_seg_p... method forward (line 680) | def forward(self,sentence_tokens, sentence, sentence_seg_preds, senten... FILE: code/Attentive_BiLSTM/print_result.py function print_result (line 9) | def print_result(eval_result,epoch_count, sorted_entity_list_file, entit... FILE: code/Attentive_BiLSTM/test_char_embeddings.py function prepare_train_set_dev_data (line 69) | def prepare_train_set_dev_data(): function evaluating (line 212) | def evaluating(model, datas, best_F, epoch_count, phase_name): function save_char_embed (line 306) | def save_char_embed(sentence_words, char_embed_dict, char_embed_vectors): function train_model (line 320) | def train_model(model, step_lr_scheduler, optimizer, train_data, dev_dat... function get_char_embedding_dict (line 453) | def get_char_embedding_dict(datas, model, parameters): function test_dev_oov_char_embedding (line 500) | def test_dev_oov_char_embedding(train_data, dev_data, model, parameters,... function find_similar_word (line 519) | def find_similar_word(train_char_embedding, word_char_embed, limit = 11): FILE: code/Attentive_BiLSTM/tolatex.py function tolatex (line 1) | def tolatex(table_dict,caption=""): FILE: code/Attentive_BiLSTM/train_so.py function create_frequecny_vector (line 63) | def create_frequecny_vector(): function save_char_embed (line 75) | def save_char_embed(sentence_words, char_embed_dict, char_embed_vectors): function read_ctc_pred_file (line 90) | def read_ctc_pred_file(): function prepare_train_set_dev_data (line 102) | def prepare_train_set_dev_data(): function evaluating (line 300) | def evaluating(model, datas, best_F, epoch_count, phase_name): function train_model (line 409) | def train_model(model, step_lr_scheduler, optimizer, train_data, dev_dat... FILE: code/Attentive_BiLSTM/utils_so.py function get_name (line 23) | def get_name(parameters): function set_values (line 37) | def set_values(name, param, pretrained): function create_dico (line 53) | def create_dico(item_list): function create_mapping (line 68) | def create_mapping(dico): function zero_digits (line 79) | def zero_digits(s): function iob2 (line 86) | def iob2(tags): function iob_iobes (line 108) | def iob_iobes(tags): function iobes_iob (line 133) | def iobes_iob(tags): function insert_singletons (line 154) | def insert_singletons(words, singletons, p=0.5): function pad_word_chars (line 167) | def pad_word_chars(words): function create_input (line 189) | def create_input(data, parameters, add_label, singletons=None): function init_embedding (line 216) | def init_embedding(input_embedding): function init_linear (line 224) | def init_linear(input_linear): function adjust_learning_rate (line 234) | def adjust_learning_rate(optimizer, lr): function init_lstm (line 242) | def init_lstm(input_lstm): function init_gru (line 287) | def init_gru(input_gru): function Merge_Label (line 335) | def Merge_Label(inputFile): class Sort_Entity_by_Count (line 409) | class Sort_Entity_by_Count: method __init__ (line 411) | def __init__(self, train_file,output_file): method get_label_counter (line 429) | def get_label_counter(self, label_counter): method Read_File (line 460) | def Read_File(self, ip_file): FILE: code/BERT_NER/E2E_SoftNER.py function read_file (line 18) | def read_file(input_file, output_folder): function merge_all_conll_files (line 35) | def merge_all_conll_files(conlll_folder, output_file): function create_segmenter_input (line 65) | def create_segmenter_input(conll_format_file, segmenter_input_file, ctc_... function create_ner_input (line 97) | def create_ner_input(segmenter_output_file, ner_input_file, ctc_classifi... function parse_args (line 130) | def parse_args(): function Extract_NER (line 149) | def Extract_NER(input_file): FILE: code/BERT_NER/softner_ner_predict_from_file.py function set_seed (line 62) | def set_seed(args): function train (line 70) | def train(args, train_dataset, model, tokenizer, labels, pad_token_label... function evaluate (line 240) | def evaluate(args, model, tokenizer, labels, pad_token_label_id, mode, p... function load_and_cache_examples (line 312) | def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id,... function parse_args (line 360) | def parse_args(): function predict_entities (line 534) | def predict_entities(input_file,output_prediction_file): FILE: code/BERT_NER/softner_segmenter_preditct_from_file.py function set_seed (line 65) | def set_seed(args): function train (line 73) | def train(args, train_dataset, model, tokenizer, labels, pad_token_label... function evaluate (line 243) | def evaluate(args, model, tokenizer, labels, pad_token_label_id, mode, p... function load_and_cache_examples (line 322) | def load_and_cache_examples(args, tokenizer, labels, pad_token_label_id,... function parse_args (line 379) | def parse_args(): function predict_segments (line 555) | def predict_segments(input_file,output_prediction_file): FILE: code/BERT_NER/utils_ctc/binning.py function gaussian (line 6) | def gaussian(diff, sig): class GaussianBinner (line 10) | class GaussianBinner: method __init__ (line 12) | def __init__(self, bins=10, w=0.2): method fit (line 18) | def fit(self, x, features_to_be_binned): method transform (line 35) | def transform(self, x, features_to_be_binned): FILE: code/BERT_NER/utils_ctc/features.py class Features (line 23) | class Features: method __init__ (line 24) | def __init__(self, resources, n=5): method get_feature_vector (line 36) | def get_feature_vector(self, word): method get_features_from_token (line 51) | def get_features_from_token(self, token, train): method get_features (line 71) | def get_features(self, file_name, train): method transform_features (line 98) | def transform_features(self, features, train_flag): FILE: code/BERT_NER/utils_ctc/model.py class NeuralClassifier (line 15) | class NeuralClassifier(torch.nn.Module): method __init__ (line 16) | def __init__(self, input_feat_dim, target_label_dim, vocab_size, pre_w... method forward (line 43) | def forward(self, features, word_ids): method get_scores (line 58) | def get_scores(self, features, word_ids): method CrossEntropy (line 90) | def CrossEntropy(self, features, word_ids, gold_labels): method predict (line 98) | def predict(self, features, word_ids): FILE: code/BERT_NER/utils_ctc/prediction_ctc.py function eval (line 45) | def eval(predictions, gold_labels, phase): function get_word_dict_pre_embeds (line 58) | def get_word_dict_pre_embeds(train_file, test_file): function popluate_word_id_from_file (line 98) | def popluate_word_id_from_file(file_name, word_to_id): function popluate_word_id_from_token (line 113) | def popluate_word_id_from_token(token, word_to_id): function get_train_test_word_id (line 132) | def get_train_test_word_id(train_file, test_file, word_to_id): function prediction_on_token_input (line 139) | def prediction_on_token_input(ctc_ip_token, ctc_classifier, vocab_size, ... function prediction_on_file_input (line 177) | def prediction_on_file_input(ctc_input_file, ctc_classifier, vocab_size,... function train_ctc_model (line 217) | def train_ctc_model(train_file, test_file): FILE: code/BERT_NER/utils_ctc/rules.py function regex_or (line 4) | def regex_or(*items): function IS_URL (line 48) | def IS_URL(token): function IS_NUMBER (line 55) | def IS_NUMBER(token): function IS_FILE_NAME (line 62) | def IS_FILE_NAME(token): FILE: code/BERT_NER/utils_ner.py class InputExample (line 26) | class InputExample(object): method __init__ (line 29) | def __init__(self, guid, words, labels): class InputFeatures (line 43) | class InputFeatures(object): method __init__ (line 46) | def __init__(self, input_ids, input_mask, freq_ids, segment_ids, label... function read_examples_from_file (line 56) | def read_examples_from_file(data_dir, mode, path): function convert_examples_to_features (line 87) | def convert_examples_to_features( function get_labels (line 267) | def get_labels(path): FILE: code/BERT_NER/utils_preprocess/anntoconll.py function argparser (line 45) | def argparser(): function read_sentence (line 65) | def read_sentence(f): function strip_labels (line 84) | def strip_labels(lines): function attach_labels (line 108) | def attach_labels(labels, lines): function text_to_conll (line 132) | def text_to_conll(f): function relabel (line 205) | def relabel(lines, annotations, file_name): function process_files (line 256) | def process_files(files, output_directory, phase_name=""): function parse_textbounds (line 286) | def parse_textbounds(f): function eliminate_overlaps (line 306) | def eliminate_overlaps(textbounds): function get_annotations (line 329) | def get_annotations(fn): function Read_Main_Input_Folder (line 342) | def Read_Main_Input_Folder(input_folder): function process_folder (line 354) | def process_folder(source_folder, output_dir_ann, min_folder_number = 1,... function convert_standoff_to_conll (line 364) | def convert_standoff_to_conll(source_directory_ann, output_directory_con... FILE: code/BERT_NER/utils_preprocess/fix_char_encoding.py class Fix_Char_Code (line 5) | class Fix_Char_Code: method __init__ (line 7) | def __init__(self): method Get_List_of_Labels (line 10) | def Get_List_of_Labels(self, tokenized_word_list_len, main_label): method Fix_Word_Label (line 24) | def Fix_Word_Label(self, word, gold_label, raw_label): method Read_File (line 52) | def Read_File(self, ip_file): FILE: code/BERT_NER/utils_preprocess/format_markdown.py function find_string_indices (line 32) | def find_string_indices(input_string,string_to_search): class Stackoverflow_Info_Extract (line 43) | class Stackoverflow_Info_Extract: method __init__ (line 44) | def __init__(self, annotattion_folder): method Extract_Text_From_XML (line 52) | def Extract_Text_From_XML(self,input_text): method tokenize_and_annotae_post_body (line 152) | def tokenize_and_annotae_post_body(self, xml_filtered_string, post_id): FILE: code/BERT_NER/utils_preprocess/map_text_to_char.py function map_text_to_char (line 4) | def map_text_to_char(main_sent, tokens, offset): FILE: code/BERT_NER/utils_preprocess/sentencesplit.py function _text_by_offsets_gen (line 17) | def _text_by_offsets_gen(text, offsets): function _normspace (line 22) | def _normspace(s): function sentencebreaks_to_newlines (line 27) | def sentencebreaks_to_newlines(text): function main (line 75) | def main(argv): FILE: code/BERT_NER/utils_preprocess/ssplit.py function _refine_split (line 54) | def _refine_split(offsets, original_text): function _sentence_boundary_gen (line 117) | def _sentence_boundary_gen(text, regex): function regex_sentence_boundary_gen (line 122) | def regex_sentence_boundary_gen(text): function newline_sentence_boundary_gen (line 128) | def newline_sentence_boundary_gen(text): function _text_by_offsets_gen (line 138) | def _text_by_offsets_gen(text, offsets): FILE: code/BERT_NER/utils_preprocess/stokenizer.py function regex_or (line 50) | def regex_or(*items): function num2roman (line 245) | def num2roman(num): function generate_number_list (line 260) | def generate_number_list(limit): function splitEdgePunct_software (line 354) | def splitEdgePunct_software(input): function Split_End_of_Sentence_Punc (line 493) | def Split_End_of_Sentence_Punc(token_list): function simpleTokenize_software (line 524) | def simpleTokenize_software(text): function addAllnonempty (line 594) | def addAllnonempty(master, smaller): function squeezeWhitespace (line 602) | def squeezeWhitespace(input): function splitToken (line 606) | def splitToken(token): function tokenize_text (line 613) | def tokenize_text(text): function normalizeTextForTagger (line 619) | def normalizeTextForTagger(text): function Split_On_Multiple_Dot (line 628) | def Split_On_Multiple_Dot(input_word): function Split_On_Non_function_end_parenthesis (line 657) | def Split_On_Non_function_end_parenthesis(input_word): function Split_On_last_letter_Colon_Mark (line 711) | def Split_On_last_letter_Colon_Mark(input_word): function Split_On_last_letter_Quote_Mark (line 739) | def Split_On_last_letter_Quote_Mark(input_word): function Split_Words_Inside_Parenthesis (line 777) | def Split_Words_Inside_Parenthesis(input_word): function Split_Parenthesis_At_End_of_URL (line 797) | def Split_Parenthesis_At_End_of_URL(input_word): function Split_Punc_At_End_of_Word (line 820) | def Split_Punc_At_End_of_Word(input_word): function SO_Tokenizer_wrapper (line 833) | def SO_Tokenizer_wrapper(tokens): function match_paren (line 894) | def match_paren(current,previous): function find_word_w_balanced_paren (line 903) | def find_word_w_balanced_paren(line): function Mask_Nested_Paren_HTML_Word (line 941) | def Mask_Nested_Paren_HTML_Word(text): function Resotre_Masked_Words (line 985) | def Resotre_Masked_Words(tokens,nested_parenthesis_words_dict, base): function tokenize (line 1040) | def tokenize(text): function White_Space_Remove_From_Word (line 1069) | def White_Space_Remove_From_Word(word,filler_string=""): FILE: code/BERT_NER/utils_preprocess/stokenizer_base_rules.py function regex_or (line 42) | def regex_or(*items): function splitEdgePunct (line 209) | def splitEdgePunct(input): function simpleTokenize (line 215) | def simpleTokenize(text): function addAllnonempty (line 273) | def addAllnonempty(master, smaller): function squeezeWhitespace (line 281) | def squeezeWhitespace(input): function splitToken (line 285) | def splitToken(token): function tokenize (line 292) | def tokenize(text): function normalizeTextForTagger (line 298) | def normalizeTextForTagger(text): function tokenizeRawText (line 309) | def tokenizeRawText(text): FILE: code/BERT_NER/utils_preprocess/tokenize_base_rules.py function regex_or (line 40) | def regex_or(*items): function splitEdgePunct (line 207) | def splitEdgePunct(input): function simpleTokenize (line 213) | def simpleTokenize(text): function addAllnonempty (line 271) | def addAllnonempty(master, smaller): function squeezeWhitespace (line 279) | def squeezeWhitespace(input): function splitToken (line 283) | def splitToken(token): function tokenize (line 290) | def tokenize(text): function normalizeTextForTagger (line 296) | def normalizeTextForTagger(text): function tokenizeRawTweetText (line 307) | def tokenizeRawTweetText(text): FILE: code/BERT_NER/utils_seg.py class InputExample (line 26) | class InputExample(object): method __init__ (line 29) | def __init__(self, guid, words, labels): class InputFeatures (line 43) | class InputFeatures(object): method __init__ (line 46) | def __init__(self, input_ids, input_mask, freq_ids, md_ids, label_ids,... function read_examples_from_file (line 56) | def read_examples_from_file(data_dir, mode, path=None): function convert_examples_to_features (line 85) | def convert_examples_to_features( function get_labels (line 265) | def get_labels(path): FILE: code/DataReader/loader_so.py function Merge_Label (line 5) | def Merge_Label(inputFile): function loader_so_text (line 77) | def loader_so_text(path, merge_tag=True, replace_low_freq_tags=True): FILE: code/DataReader/read_so_post_info.py function find_string_indices (line 32) | def find_string_indices(input_string,string_to_search): class Stackoverflow_Info_Extract (line 44) | class Stackoverflow_Info_Extract: method __init__ (line 45) | def __init__(self,annotattion_folder): method Extract_Text_From_XML (line 55) | def Extract_Text_From_XML(self,input_text): method tokenize_and_annotae_post_body (line 153) | def tokenize_and_annotae_post_body(self, xml_filtered_string,post_id): method read_file (line 230) | def read_file(self, input_file): FILE: code/SOTokenizer/ark_twokenize.py function regex_or (line 40) | def regex_or(*items): function splitEdgePunct (line 207) | def splitEdgePunct(input): function simpleTokenize (line 213) | def simpleTokenize(text): function addAllnonempty (line 271) | def addAllnonempty(master, smaller): function squeezeWhitespace (line 279) | def squeezeWhitespace(input): function splitToken (line 283) | def splitToken(token): function tokenize (line 290) | def tokenize(text): function normalizeTextForTagger (line 296) | def normalizeTextForTagger(text): function tokenizeRawTweetText (line 307) | def tokenizeRawTweetText(text): FILE: code/SOTokenizer/stokenizer.py function regex_or (line 42) | def regex_or(*items): function num2roman (line 233) | def num2roman(num): function generate_number_list (line 248) | def generate_number_list(limit): function splitEdgePunct_software (line 334) | def splitEdgePunct_software(input): function Split_End_of_Sentence_Punc (line 445) | def Split_End_of_Sentence_Punc(token_list): function simpleTokenize_software (line 474) | def simpleTokenize_software(text): function addAllnonempty (line 544) | def addAllnonempty(master, smaller): function squeezeWhitespace (line 552) | def squeezeWhitespace(input): function splitToken (line 556) | def splitToken(token): function tokenize_text (line 563) | def tokenize_text(text): function normalizeTextForTagger (line 569) | def normalizeTextForTagger(text): function Split_On_Multiple_Dot (line 578) | def Split_On_Multiple_Dot(input_word): function Split_On_Non_function_end_parenthesis (line 603) | def Split_On_Non_function_end_parenthesis(input_word): function Split_On_last_letter_Colon_Mark (line 657) | def Split_On_last_letter_Colon_Mark(input_word): function Split_On_last_letter_Quote_Mark (line 683) | def Split_On_last_letter_Quote_Mark(input_word): function Split_Words_Inside_Parenthesis (line 721) | def Split_Words_Inside_Parenthesis(input_word): function Split_Parenthesis_At_End_of_URL (line 738) | def Split_Parenthesis_At_End_of_URL(input_word): function SO_Tokenizer_wrapper (line 759) | def SO_Tokenizer_wrapper(tokens): function match_paren (line 815) | def match_paren(current,previous): function find_word_w_balanced_paren (line 824) | def find_word_w_balanced_paren(line): function Mask_Nested_Paren_HTML_Word (line 861) | def Mask_Nested_Paren_HTML_Word(text): function Resotre_Masked_Words (line 902) | def Resotre_Masked_Words(tokens,nested_parenthesis_words_dict, base): function tokenize (line 933) | def tokenize(text):