SYMBOL INDEX (383 symbols across 55 files) FILE: src/book/ch02.py function lexical_diversity (line 8) | def lexical_diversity(text): function contents (line 11) | def contents(corpus): function describe (line 14) | def describe(corpus): function brown_word_usage_by_category (line 24) | def brown_word_usage_by_category(brown, words): function brown_word_usage_by_category_cfg (line 33) | def brown_word_usage_by_category_cfg(brown, words): function inaugural_word_usage_by_year (line 41) | def inaugural_word_usage_by_year(inaugural, words): function udhr_language_length (line 50) | def udhr_language_length(udhr, languages): function load_local (line 57) | def load_local(corpus_root): function generate_model (line 60) | def generate_model(cfdist, start_word, num=15): function unusual_words (line 65) | def unusual_words(text): function non_stopword_content_pct (line 71) | def non_stopword_content_pct(text): function gen_words_puzzle (line 76) | def gen_words_puzzle(puzzle_letters, obligatory_letter, min_word_size): function gender_ambig_names (line 83) | def gender_ambig_names(): function gender_names_by_firstchar (line 89) | def gender_names_by_firstchar(): function semantic_similarity (line 97) | def semantic_similarity(left, right): FILE: src/book/ch02_ex.py function ex1 (line 7) | def ex1(): function ex2 (line 15) | def ex2(): function ex4 (line 23) | def ex4(): function ex5 (line 40) | def ex5(): function ex7 (line 52) | def ex7(): function ex8 (line 59) | def ex8(): function ex10 (line 74) | def ex10(): function ex11 (line 91) | def ex11(): function ex12 (line 100) | def ex12(): function ex13 (line 114) | def ex13(): function ex14_supergloss (line 124) | def ex14_supergloss(s): function ex15 (line 134) | def ex15(): function ex16 (line 139) | def ex16(): function ex17 (line 149) | def ex17(): function ex18 (line 157) | def ex18(): function ex25_findlanguage (line 170) | def ex25_findlanguage(): function ex26_branchingfactor (line 187) | def ex26_branchingfactor(): function branchingfactor_r (line 196) | def branchingfactor_r(synset, num_synsets, num_hyponyms): function ex27_polysemy (line 203) | def ex27_polysemy(): function main (line 214) | def main(): FILE: src/book/ch03.py function download (line 12) | def download(url, file): function web_file_plain (line 20) | def web_file_plain(): function web_file_html (line 36) | def web_file_html(): function web_file_rss (line 46) | def web_file_rss(): function unicode_read (line 58) | def unicode_read(): function basic_regexps (line 64) | def basic_regexps(): function compress (line 72) | def compress(regex, word): function compress_vowels (line 76) | def compress_vowels(): function consonant_vowel_sequences_rotokas (line 83) | def consonant_vowel_sequences_rotokas(): function discover_hypernyms (line 95) | def discover_hypernyms(): function find_in_stemmed_index (line 101) | def find_in_stemmed_index(word): function regex_word_tokenize (line 114) | def regex_word_tokenize(): function sentence_tokenization (line 132) | def sentence_tokenization(): function main (line 138) | def main(): FILE: src/book/ch03_ex.py function ch03_10 (line 7) | def ch03_10(): function ch03_18_wh_words (line 11) | def ch03_18_wh_words(): function ch03_29_reading_difficulty (line 15) | def ch03_29_reading_difficulty(): function ch03_30_porter_vs_lancaster (line 27) | def ch03_30_porter_vs_lancaster(): function ch03_42_wordnet_semantic_index (line 35) | def ch03_42_wordnet_semantic_index(): function bigram_freqdist (line 68) | def bigram_freqdist(words): function ch03_43_translate (line 73) | def ch03_43_translate(): function main (line 87) | def main(): FILE: src/book/ch04_ex.py function bylen (line 8) | def bylen(x, y): function ch04_10_sort_words_by_length (line 11) | def ch04_10_sort_words_by_length(words): function gematrix_score (line 14) | def gematrix_score(word): function ch04_16_gematria_for_word (line 23) | def ch04_16_gematria_for_word(): function ch04_17_shorten (line 31) | def ch04_17_shorten(words, n): function ch04_19_sort_by_path_sim (line 38) | def ch04_19_sort_by_path_sim(synsets, ref_synset): function insert_trie (line 49) | def insert_trie(keys): function insert_trie_r (line 54) | def insert_trie_r(trie, key): function lookup_trie (line 63) | def lookup_trie(trie, key): function lookup_trie_r (line 67) | def lookup_trie_r(trie, key, buf): function ch04_23_lookup_trie (line 81) | def ch04_23_lookup_trie(): function catalan1 (line 90) | def catalan1(n): function catalan2 (line 96) | def catalan2(cache, n): function ch04_26_catalan_numbers (line 105) | def ch04_26_catalan_numbers(): function main (line 135) | def main(): FILE: src/book/ch05.py function basic_tagger_usage (line 7) | def basic_tagger_usage(): function similar_words (line 20) | def similar_words(): function tagged_token_representation (line 26) | def tagged_token_representation(): function common_verbs_in_news (line 39) | def common_verbs_in_news(): function findtags (line 59) | def findtags(tag_prefix, tagged_text): function how_is_often_used_in_text (line 68) | def how_is_often_used_in_text(): function find_verb_to_verb_patterns (line 80) | def find_verb_to_verb_patterns(): function find_highly_ambiguous_words (line 87) | def find_highly_ambiguous_words(): function tag_most_frequent_words (line 99) | def tag_most_frequent_words(): function word_count (line 109) | def word_count(): function anagrams (line 117) | def anagrams(): function analysis_using_word_and_prev_pos (line 128) | def analysis_using_word_and_prev_pos(): function invert_dictionary (line 136) | def invert_dictionary(): function tagging_tests (line 142) | def tagging_tests(): function _evaluate_tagger (line 165) | def _evaluate_tagger(cfd, wordlist, sents): function effect_of_model_size_on_tagger (line 170) | def effect_of_model_size_on_tagger(): function train_and_test_tagger (line 185) | def train_and_test_tagger(): function show_sparse_data_problem_with_bigram_tagger (line 194) | def show_sparse_data_problem_with_bigram_tagger(): function nested_backoff_tagger (line 204) | def nested_backoff_tagger(): function ambiguous_tags (line 229) | def ambiguous_tags(): function main (line 248) | def main(): FILE: src/book/ch05_ex.py function ch05_1_3_tag_sentences (line 7) | def ch05_1_3_tag_sentences(): function ch05_10_train_test_unigram_tagger (line 17) | def ch05_10_train_test_unigram_tagger(): function ch05_11_train_test_affix_tagger (line 29) | def ch05_11_train_test_affix_tagger(): function ch05_14_brown_corpus_tags_list (line 37) | def ch05_14_brown_corpus_tags_list(): function ch05_15_brown_corpus_trivia (line 41) | def ch05_15_brown_corpus_trivia(): function ch05_17_lookup_tagger_performance_upper_limit (line 69) | def ch05_17_lookup_tagger_performance_upper_limit(): function ch05_18_brown_corpus_statistics (line 82) | def ch05_18_brown_corpus_statistics(): function ch05_20_brown_corpus_words_phrases_by_tag (line 107) | def ch05_20_brown_corpus_words_phrases_by_tag(): function ch05_21_qualifiers_before_adore_love_like_prefer (line 133) | def ch05_21_qualifiers_before_adore_love_like_prefer(): function ch05_22_regular_expression_tagger (line 141) | def ch05_22_regular_expression_tagger(): function ch05_27_collapse_tags_based_on_conf_matrix (line 155) | def ch05_27_collapse_tags_based_on_conf_matrix(): function ch05_30_bigram_tagger_low_freq_words_as_unk (line 159) | def ch05_30_bigram_tagger_low_freq_words_as_unk(): function ch05_32_brill_tagger (line 178) | def ch05_32_brill_tagger(): function ch05_33_list_pos_of_word_given_word_and_pos (line 183) | def ch05_33_list_pos_of_word_given_word_and_pos(): function ch05_34_num_words_with_1to10_distinct_tags (line 192) | def ch05_34_num_words_with_1to10_distinct_tags(): function ch05_35_must_contexts (line 221) | def ch05_35_must_contexts(): function ch05_37_prev_token_tagger (line 232) | def ch05_37_prev_token_tagger(): function ch05_39_statistical_tagger (line 236) | def ch05_39_statistical_tagger(): function main (line 241) | def main(): FILE: src/book/ch06.py function _gender_features (line 10) | def _gender_features(word): function naive_bayes_gender_classifier (line 25) | def naive_bayes_gender_classifier(): function error_analysis (line 42) | def error_analysis(): function _document_features (line 62) | def _document_features(document, word_features): function document_classification_movie_reviews (line 69) | def document_classification_movie_reviews(): function _pos_features (line 85) | def _pos_features(word, common_suffixes): function pos_tagging_classification (line 91) | def pos_tagging_classification(): function _pos_features2 (line 110) | def _pos_features2(sentence, i): function pos_tagging_classification_with_sentence_context (line 121) | def pos_tagging_classification_with_sentence_context(): function _pos_features3 (line 134) | def _pos_features3(sentence, i, history): class ConsecutivePosTagger (line 147) | class ConsecutivePosTagger(nltk.TaggerI): method __init__ (line 148) | def __init__(self, train_sents): method tag (line 159) | def tag(self, sentence): function sequence_classification_using_prev_pos (line 167) | def sequence_classification_using_prev_pos(): function _punct_features (line 175) | def _punct_features(tokens, i): function sentence_segmentation_as_classification_for_punctuation (line 182) | def sentence_segmentation_as_classification_for_punctuation(): function _dialog_act_features (line 201) | def _dialog_act_features(post): function identify_dialog_act_types (line 207) | def identify_dialog_act_types(): function _rte_features (line 216) | def _rte_features(rtepair): function recognize_text_entailment (line 226) | def recognize_text_entailment(): function entropy (line 237) | def entropy(labels): function calc_entropy (line 242) | def calc_entropy(): function main (line 247) | def main(): FILE: src/book/ch07.py function _ie_preprocess (line 8) | def _ie_preprocess(document): function simple_regex_based_np_chunker (line 13) | def simple_regex_based_np_chunker(): function _find_chunks (line 33) | def _find_chunks(pattern): function extract_pos_pattern_with_chunk_parser (line 45) | def extract_pos_pattern_with_chunk_parser(): function iob_to_tree (line 49) | def iob_to_tree(): function read_chunked_corpus (line 71) | def read_chunked_corpus(): function evaluate_chunker (line 76) | def evaluate_chunker(): class UnigramChunker (line 85) | class UnigramChunker(nltk.ChunkParserI): method __init__ (line 86) | def __init__(self, train_sents): method parse (line 92) | def parse(self, sentence): function chunk_with_unigram_tagger (line 100) | def chunk_with_unigram_tagger(): function _npchunk_features (line 111) | def _npchunk_features(sentence, i, history): class ConsecutiveNPChunkTagger (line 135) | class ConsecutiveNPChunkTagger(nltk.TaggerI): method __init__ (line 136) | def __init__(self, train_sents): method tag (line 148) | def tag(self, sentence): class ConsecutiveNPChunker (line 156) | class ConsecutiveNPChunker(nltk.ChunkParserI): method __init__ (line 157) | def __init__(self, train_sents): method parse (line 162) | def parse(self, sentence): function train_classifier_based_chunker (line 167) | def train_classifier_based_chunker(): function recursive_chunk_parser (line 174) | def recursive_chunk_parser(): function _traverse (line 190) | def _traverse(t): function nltk_tree_handling (line 201) | def nltk_tree_handling(): function named_entity_recognition (line 218) | def named_entity_recognition(): function relation_extraction (line 224) | def relation_extraction(): function relation_extraction2 (line 230) | def relation_extraction2(): function main (line 249) | def main(): FILE: src/book/ch07_ex.py function ch07_02_match_np_containing_plural_nouns (line 7) | def ch07_02_match_np_containing_plural_nouns(): function ch07_03_develop_grammar_with_chunkparser (line 16) | def ch07_03_develop_grammar_with_chunkparser(): function ch07_05_tag_pattern_np_containing_gerund (line 30) | def ch07_05_tag_pattern_np_containing_gerund(): function ch07_06_coordinated_noun_phrases (line 42) | def ch07_06_coordinated_noun_phrases(): function ch07_07_chunker_eval (line 56) | def ch07_07_chunker_eval(): function ch07_13a_tag_seqs_for_np (line 71) | def ch07_13a_tag_seqs_for_np(): function ch07_13c_better_chunker (line 89) | def ch07_13c_better_chunker(): function _chunk2brackets (line 102) | def _chunk2brackets(sent): function _chunk2iob (line 109) | def _chunk2iob(sent): function ch07_16a_penn_treebank (line 116) | def ch07_16a_penn_treebank(): function main (line 123) | def main(): FILE: src/book/ch08.py function sentence_parse_example (line 8) | def sentence_parse_example(): function simple_cfg (line 25) | def simple_cfg(): function parsing_types (line 79) | def parsing_types(): function _chart_init_wfst (line 100) | def _chart_init_wfst(tokens, grammar): function _chart_complete_wfst (line 108) | def _chart_complete_wfst(wfst, tokens, grammar, trace=False): function _chart_display (line 123) | def _chart_display(wfst, tokens): function chart_parsing (line 131) | def chart_parsing(): function dependency_grammar (line 148) | def dependency_grammar(): function _grammar_filter (line 163) | def _grammar_filter(tree): function grammar_development_with_treebank (line 168) | def grammar_development_with_treebank(): function word_valency (line 176) | def word_valency(): function _give_give (line 187) | def _give_give(t): function _give_sent (line 192) | def _give_sent(t): function _give_print_node (line 195) | def _give_print_node(t, width): function give_gave_usage (line 202) | def give_gave_usage(): function pcfg_parser (line 210) | def pcfg_parser(): function main (line 236) | def main(): FILE: src/book/ch09.py function _grammatical_lex2fs (line 7) | def _grammatical_lex2fs(word): function grammatical_features (line 15) | def grammatical_features(): function the_dog_runs (line 24) | def the_dog_runs(): function sample_grammar (line 62) | def sample_grammar(): function feature_structures (line 69) | def feature_structures(): function feature_structure_unification (line 83) | def feature_structure_unification(): function sentence_parsing (line 105) | def sentence_parsing(): function main (line 115) | def main(): FILE: src/book/ch10.py function english_to_sql (line 8) | def english_to_sql(): function logic_parser (line 22) | def logic_parser(): function first_order_logic (line 38) | def first_order_logic(): function truth_model (line 45) | def truth_model(): function main (line 65) | def main(): FILE: src/brown_dict/dict_build.py function normalize_brown_postags (line 16) | def normalize_brown_postags(): function retag_brown_words (line 36) | def retag_brown_words(tag_map): function compose_record (line 45) | def compose_record(word, wordpos_fd): FILE: src/brown_dict/phrase_seqs.py function normalize_ptb_tags (line 18) | def normalize_ptb_tags(): function get_chunks (line 38) | def get_chunks(tree, phrase_type, tags): function index_of (line 50) | def index_of(tag): function update_trans_freqs (line 58) | def update_trans_freqs(trans_freqs, tag_seq): FILE: src/brown_dict/predict.py function load_word_dict (line 7) | def load_word_dict(dict_file): function load_phrase_tags (line 21) | def load_phrase_tags(phrase_tag_file): function assert_true (line 33) | def assert_true(fn, message): function tag_to_index (line 37) | def tag_to_index(tag): function index_to_tag (line 45) | def index_to_tag(index): function predict_likely_pos (line 53) | def predict_likely_pos(prev_tag, trans_probs): function predict_pos (line 58) | def predict_pos(word, word_dict): function predict_if_noun (line 65) | def predict_if_noun(word, word_dict): function predict_if_noun_phrase (line 68) | def predict_if_noun_phrase(phrase, trans_probs, phrase_tags): FILE: src/cener/bootstrap.py function iotag (line 13) | def iotag(token): function modify_tags (line 27) | def modify_tags(pairs): function partition_pairs (line 44) | def partition_pairs(pairs): function main (line 56) | def main(): FILE: src/cener/cener.py function train_ner (line 8) | def train_ner(pickle_file): function get_trained_ner (line 36) | def get_trained_ner(pickle_file): function test_ner (line 42) | def test_ner(input_file, classifier): function main (line 66) | def main(): FILE: src/cener/cener_lib.py function train_pos_tagger (line 6) | def train_pos_tagger(): function ce_phrases (line 18) | def ce_phrases(): function ce_phrase_words (line 41) | def ce_phrase_words(ce_phrases): function slice_matches (line 53) | def slice_matches(a1, a2): function slots_available (line 66) | def slots_available(matched_slots, start, end): function promote_coreferences (line 73) | def promote_coreferences(tuple, ce_words): function tag (line 81) | def tag(sentence, pos_tagger, ce_phrases, ce_words): function shape (line 122) | def shape(word): function word_features (line 132) | def word_features(tagged_sent, wordpos): FILE: src/docsim/blogdoctest.py function main (line 8) | def main(): FILE: src/docsim/docsim.py function preprocess (line 14) | def preprocess(fnin, fnout): function train (line 36) | def train(fnin): function test (line 51) | def test(tdMatrix, cats, fsim): function main (line 75) | def main(): FILE: src/docsim/scam_dist.py function _s_pos_or_zero (line 5) | def _s_pos_or_zero(x): function _s_zero_mask (line 8) | def _s_zero_mask(x, y): function _s_safe_divide (line 11) | def _s_safe_divide(x, y): function _assymetric_subset_measure (line 18) | def _assymetric_subset_measure(doc1, doc2): function scam_distance (line 26) | def scam_distance(doc1, doc2): FILE: src/drug_ner/apply_model.py function vectorize_ngrams (line 8) | def vectorize_ngrams(ngrams, vocab): FILE: src/drug_ner/co_train.py function conservative_min (line 13) | def conservative_min(xs): FILE: src/drug_ner/drug_ner_utils.py function is_punct (line 15) | def is_punct(c): function is_number (line 18) | def is_number(c): function str_to_ngrams (line 21) | def str_to_ngrams(instring, gram_size): function ngram_distrib (line 32) | def ngram_distrib(names, gram_size): function plot_ngram_distrib (line 38) | def plot_ngram_distrib(fd, nbest, title, gram_size): function truncate_fd (line 50) | def truncate_fd(fd, nbest): function vectorize (line 54) | def vectorize(ufile, pfile, max_feats): FILE: src/drug_ner/ngram_convert.py function build_ngram_text (line 5) | def build_ngram_text(infile, outfile): FILE: src/drug_ner/parse_drugbank.py class DrugXmlContentHandler (line 7) | class DrugXmlContentHandler(xml.sax.ContentHandler): method __init__ (line 9) | def __init__(self): method startElement (line 15) | def startElement(self, name, attrs): method endElement (line 18) | def endElement(self, name): method characters (line 21) | def characters(self, content): function write_list_to_file (line 28) | def write_list_to_file(lst, filename): FILE: src/entity-graph/05-find-corefs.py function get_coref_clusters (line 15) | def get_coref_clusters(ptext, nlp, offset=0): function partition_mentions_by_sentence (line 34) | def partition_mentions_by_sentence(mentions, ptext, para_id, nlp): FILE: src/entity-graph/06-find-matches.py function build_automaton (line 17) | def build_automaton(): function find_matches (line 55) | def find_matches(A, sent_text): function find_corefs (line 77) | def find_corefs(coref_file, sid): function replace_corefs (line 94) | def replace_corefs(sent_text, corefs): FILE: src/genetagger/file_reformatter.py function reformat (line 2) | def reformat(file_in, file_out, is_tagged): function main (line 23) | def main(): FILE: src/genetagger/hmm_gene_ner.py function findRareWords (line 13) | def findRareWords(train_file): function normalizeRareWord (line 24) | def normalizeRareWord(word, rareWords, replaceRare): function pad (line 44) | def pad(sent, tags=True): function calculateMetrics (line 60) | def calculateMetrics(actual, predicted): function writeResult (line 87) | def writeResult(fout, hmm, words): function bigramToUnigram (line 96) | def bigramToUnigram(bigrams): function calculateBackoffTransCPD (line 104) | def calculateBackoffTransCPD(tagsFD, transCFD, trans2CFD): class Accumulator (line 131) | class Accumulator: method __init__ (line 136) | def __init__(self, rareWords, replaceRare, useTrigrams): method addSentence (line 149) | def addSentence(self, sent, norm_func): function train (line 184) | def train(train_file, function validate (line 217) | def validate(hmm, validation_file, rareWords, replaceRare, useTrigrams): function test (line 250) | def test(hmm, test_file, result_file, rareWords, replaceRare, useTrigrams): function main (line 274) | def main(): FILE: src/hangman/game.py function ascii_fold (line 21) | def ascii_fold(s): function preprocess (line 26) | def preprocess(dictfile): function select_secret_word (line 41) | def select_secret_word(words): function find_all_match_positions (line 45) | def find_all_match_positions(secret_word, guess_char): function update_guessed_word (line 56) | def update_guessed_word(guessed_word, matched_positions, guessed_char): function is_solved (line 60) | def is_solved(guessed_word): function most_frequent_char (line 70) | def most_frequent_char(words, previously_guessed): function best_guess (line 82) | def best_guess(words, word_len, bad_guesses, good_guesses, guessed_word): function init_guess (line 93) | def init_guess(wordlen): function match_words_against_template (line 99) | def match_words_against_template(words, guessed_word): function replace_guessed_word (line 114) | def replace_guessed_word(guessed_word, matched_word): function single_round (line 121) | def single_round(words, debug=False): function multiple_rounds (line 151) | def multiple_rounds(words, num_games, report_file): FILE: src/langmodel/med_lang_model.py class LangModel (line 11) | class LangModel: method __init__ (line 12) | def __init__(self, order, alpha, sentences): method logprob (line 33) | def logprob(self, ngram): method prob (line 36) | def prob(self, ngram): function train (line 48) | def train(): function test (line 65) | def test(): function main (line 79) | def main(): FILE: src/langmodel/old_med_lang_model.py function train (line 10) | def train(): function test (line 37) | def test(langModel): function main (line 52) | def main(): FILE: src/medorleg/db_loader.py function is_empty (line 3) | def is_empty(conn): function create_tables (line 9) | def create_tables(conn): function gram_to_list (line 23) | def gram_to_list(gram): function populate_tables (line 26) | def populate_tables(conn, infn, t3n, t2n, t1n): function build_indexes (line 48) | def build_indexes(conn): function main (line 61) | def main(): FILE: src/medorleg/eval_model.py function normalize_numeric (line 11) | def normalize_numeric(x): function normalize_stopword (line 15) | def normalize_stopword(x, stopwords): function get_trigrams (line 18) | def get_trigrams(sentence, stopwords, porter): function get_base_counts (line 26) | def get_base_counts(conn, morl): function load_model_coeffs (line 32) | def load_model_coeffs(model): function calc_prob (line 39) | def calc_prob(trigrams, conn, coeffs, morl, n, v): function eval_model (line 62) | def eval_model(medmodelfn, legmodelfn, testfn, stopwords, porter, conn): function calc_acc (line 85) | def calc_acc(stats): function main (line 88) | def main(): FILE: src/medorleg/model_params.py function train (line 8) | def train(prefix): function main (line 17) | def main(): FILE: src/medorleg/ngram_counting_job.py class NGramCountingJob (line 6) | class NGramCountingJob(MRJob): method mapper_init (line 8) | def mapper_init(self): method mapper (line 31) | def mapper(self, key, value): method reducer (line 55) | def reducer(self, key, values): FILE: src/medorleg/preprocess.py function medical_plaintext (line 9) | def medical_plaintext(fn): function legal_plaintext (line 43) | def legal_plaintext(fn): function convert_to_plaintext (line 92) | def convert_to_plaintext(dir, ofn, category, func): function main (line 102) | def main(): FILE: src/medorleg/regression_data.py function get_base_counts (line 6) | def get_base_counts(conn, morl): function gram_to_list (line 12) | def gram_to_list(gram): function build_regdata (line 15) | def build_regdata(conn, morl, infn, outX, outY): function main (line 48) | def main(): FILE: src/medorleg/testset_splitter.py function main (line 3) | def main(): FILE: src/medorleg2/arffwriter.py function qq (line 5) | def qq(s): function save_arff (line 8) | def save_arff(X, y, vocab, fname): FILE: src/medorleg2/arffwriter_test.py function load_xy (line 12) | def load_xy(xfile, yfile): function print_timestamp (line 30) | def print_timestamp(message): function main (line 33) | def main(): FILE: src/medorleg2/classify.py function calc_ngrams (line 23) | def calc_ngrams(line): function generate_xy (line 33) | def generate_xy(texts, labels): function crossvalidate_model (line 49) | def crossvalidate_model(X, y, nfolds): function train_model (line 62) | def train_model(X, y, binmodel): function test_model (line 73) | def test_model(X, y, binmodel): function print_timestamp (line 83) | def print_timestamp(message): function usage (line 86) | def usage(): function main (line 90) | def main(): FILE: src/medorleg2/preprocess.py function medical_plaintext (line 12) | def medical_plaintext(fn): function legal_plaintext (line 46) | def legal_plaintext(fn): function parse_to_plaintext (line 96) | def parse_to_plaintext(dirs, labels, funcs, sent_file, label_file): function main (line 112) | def main(): FILE: src/phrases/interesting_phrases.py function isValid (line 8) | def isValid(word): function llr (line 15) | def llr(c1, c2, c12, n): function isLikelyNGram (line 31) | def isLikelyNGram(ngram, phrases): function main (line 37) | def main(): FILE: src/phrases/preprocess.py function main (line 6) | def main(): FILE: src/sameword/same_word_finder.py function similarity (line 5) | def similarity(w1, w2, sim=wn.path_similarity): function main (line 17) | def main(): FILE: src/semantic/short_sentence_similarity.py function get_best_synset_pair (line 28) | def get_best_synset_pair(word_1, word_2): function length_dist (line 49) | def length_dist(synset_1, synset_2): function hierarchy_dist (line 75) | def hierarchy_dist(synset_1, synset_2): function word_similarity (line 109) | def word_similarity(word_1, word_2): function most_similar_word (line 116) | def most_similar_word(word, word_set): function info_content (line 132) | def info_content(lookup_word): function semantic_vector (line 152) | def semantic_vector(words, joint_words, info_content_norm): function semantic_similarity (line 180) | def semantic_similarity(sentence_1, sentence_2, info_content_norm): function word_order_vector (line 194) | def word_order_vector(words, joint_words, windex): function word_order_similarity (line 223) | def word_order_similarity(sentence_1, sentence_2): function similarity (line 238) | def similarity(sentence_1, sentence_2, info_content_norm): FILE: src/similar-tweets-nmslib/03-query-times.py function lookup_tweet_by_id (line 19) | def lookup_tweet_by_id(tweet_id): FILE: src/stlclust/extract_stl.py function find_first (line 14) | def find_first(line, cs): FILE: src/stlclust/fuzz_similarity.py function compute_similarity (line 9) | def compute_similarity(s1, s2): FILE: src/topicmodel/bok_model.py function iter_docs (line 6) | def iter_docs(topdir): class MyBokCorpus (line 13) | class MyBokCorpus(object): method __init__ (line 15) | def __init__(self, topdir): method __iter__ (line 19) | def __iter__(self): FILE: src/topicmodel/bow_model.py function iter_docs (line 6) | def iter_docs(topdir, stoplist): class MyCorpus (line 15) | class MyCorpus(object): method __init__ (line 17) | def __init__(self, topdir, stoplist): method __iter__ (line 22) | def __iter__(self): FILE: src/topicmodel/keywords_merge.py function main (line 10) | def main():