SYMBOL INDEX (542 symbols across 34 files) FILE: doc/source/conf.py function skip (line 80) | def skip(app, what, name, obj, would_skip, options): function setup (line 85) | def setup(app): FILE: examples/_benchmarktools.py function add_timing (line 8) | def add_timing(label): function print_timings (line 13) | def print_timings(): FILE: examples/bundestag18_tfidf.py function del_special_chars (line 88) | def del_special_chars(t): function correct_contractions (line 97) | def correct_contractions(t): function correct_hyphenation (line 104) | def correct_hyphenation(t): FILE: tests/_testtools.py function strategy_2d_array (line 7) | def strategy_2d_array(dtype, minval=0, maxval=None, **kwargs): function strategy_dtm (line 30) | def strategy_dtm(): function strategy_dtm_small (line 34) | def strategy_dtm_small(): function strategy_2d_prob_distribution (line 38) | def strategy_2d_prob_distribution(): function strategy_tokens (line 42) | def strategy_tokens(*args, **kwargs): function strategy_lists_of_tokens (line 46) | def strategy_lists_of_tokens(*args, **kwargs): function strategy_texts (line 50) | def strategy_texts(*args, **kwargs): function strategy_texts_printable (line 54) | def strategy_texts_printable(): function strategy_str_str_dict (line 58) | def strategy_str_str_dict(keys_args, keys_kwargs, values_args, values_kw... function strategy_str_str_dict_printable (line 62) | def strategy_str_str_dict_printable(): FILE: tests/test_bow.py function test_doc_lengths (line 24) | def test_doc_lengths(dtm, matrix_type): function test_doc_frequencies (line 45) | def test_doc_frequencies(dtm, matrix_type): function test_doc_frequencies2 (line 77) | def test_doc_frequencies2(): function test_codoc_frequencies (line 94) | def test_codoc_frequencies(dtm, matrix_type, proportions): function test_codoc_frequencies2 (line 135) | def test_codoc_frequencies2(): function test_term_frequencies (line 153) | def test_term_frequencies(dtm, matrix_type): function test_tf_binary (line 192) | def test_tf_binary(dtm, matrix_type): function test_tf_proportions (line 232) | def test_tf_proportions(dtm, matrix_type): function test_tf_log (line 261) | def test_tf_log(dtm, matrix_type): function test_tf_double_norm (line 294) | def test_tf_double_norm(dtm, matrix_type, K): function test_idf (line 321) | def test_idf(dtm, matrix_type): function test_idf_probabilistic (line 341) | def test_idf_probabilistic(dtm, matrix_type): function test_tfidf (line 367) | def test_tfidf(dtm, matrix_type, tf_func, K, idf_func, smooth, smooth_lo... function test_tfidf_example (line 414) | def test_tfidf_example(): function test_sorted_terms (line 443) | def test_sorted_terms(dtm, matrix_type, lo_thresh, hi_thresh, top_n, asc... function test_sorted_terms_example (line 492) | def test_sorted_terms_example(): function test_sorted_terms_table (line 526) | def test_sorted_terms_table(dtm, matrix_type, lo_thresh, hi_thresh, top_... function test_dtm_to_dataframe (line 557) | def test_dtm_to_dataframe(dtm, matrix_type): function test_dtm_to_gensim_corpus_and_gensim_corpus_to_dtm (line 589) | def test_dtm_to_gensim_corpus_and_gensim_corpus_to_dtm(dtm, matrix_type): function test_dtm_and_vocab_to_gensim_corpus_and_dict (line 610) | def test_dtm_and_vocab_to_gensim_corpus_and_dict(dtm, matrix_type, as_ge... FILE: tests/test_corpus.py function spacy_instance_en_sm (line 57) | def spacy_instance_en_sm(): function corpus_en (line 62) | def corpus_en(): function corpus_en_module (line 67) | def corpus_en_module(): function corpora_en_serial_and_parallel (line 72) | def corpora_en_serial_and_parallel(): function corpora_en_serial_and_parallel_module (line 79) | def corpora_en_serial_and_parallel_module(): function corpora_en_serial_and_parallel_also_w_vectors_module (line 86) | def corpora_en_serial_and_parallel_also_w_vectors_module(): function corpus_de (line 94) | def corpus_de(): function corpus_de_module (line 99) | def corpus_de_module(): function test_datadirs (line 107) | def test_datadirs(): function test_fixtures_n_docs_and_doc_labels (line 113) | def test_fixtures_n_docs_and_doc_labels(corpus_en, corpus_de): function test_corpus_no_lang_given (line 124) | def test_corpus_no_lang_given(): function test_empty_corpus (line 129) | def test_empty_corpus(): function test_corpus_init (line 147) | def test_corpus_init(): function test_corpus_init_md_model_required (line 253) | def test_corpus_init_md_model_required(): function test_corpus_init_and_properties_hypothesis (line 272) | def test_corpus_init_and_properties_hypothesis(spacy_instance_en_sm, doc... function test_corpus_init_otherlang_by_langcode (line 346) | def test_corpus_init_otherlang_by_langcode(): function test_corpus_setitem_delitem (line 369) | def test_corpus_setitem_delitem(corpora_en_serial_and_parallel): function test_corpus_iter_contains (line 404) | def test_corpus_iter_contains(corpora_en_serial_and_parallel): function test_corpus_update (line 414) | def test_corpus_update(corpora_en_serial_and_parallel): function test_doc_tokens_hypothesis (line 453) | def test_doc_tokens_hypothesis(corpora_en_serial_and_parallel_module, **... function test_doc_lengths (line 577) | def test_doc_lengths(corpora_en_serial_and_parallel_module, select, as_t... function test_doc_token_lengths (line 614) | def test_doc_token_lengths(corpora_en_serial_and_parallel_module, select): function test_doc_num_sents (line 644) | def test_doc_num_sents(corpora_en_serial_and_parallel_module, select, as... function test_doc_sent_lengths (line 685) | def test_doc_sent_lengths(corpora_en_serial_and_parallel_module, apply_f... function test_doc_labels (line 716) | def test_doc_labels(corpora_en_serial_and_parallel_module, sort): function test_doc_labels_sample (line 729) | def test_doc_labels_sample(corpora_en_serial_and_parallel_module, n): function test_doc_texts (line 745) | def test_doc_texts(corpora_en_serial_and_parallel_module, collapse, sele... function test_doc_frequencies (line 793) | def test_doc_frequencies(corpora_en_serial_and_parallel_module, proporti... function test_doc_vectors (line 833) | def test_doc_vectors(corpora_en_serial_and_parallel_also_w_vectors_modul... function test_token_vectors (line 866) | def test_token_vectors(corpora_en_serial_and_parallel_also_w_vectors_mod... function test_spacydocs (line 905) | def test_spacydocs(corpora_en_serial_and_parallel_also_w_vectors_module,... function test_vocabulary_hypothesis (line 937) | def test_vocabulary_hypothesis(corpora_en_serial_and_parallel_module, se... function test_vocabulary_counts (line 986) | def test_vocabulary_counts(corpora_en_serial_and_parallel_module, select... function test_vocabulary_size (line 1039) | def test_vocabulary_size(corpora_en_serial_and_parallel_module, select, ... function test_tokens_table_hypothesis (line 1065) | def test_tokens_table_hypothesis(corpora_en_serial_and_parallel_module, ... function test_corpus_tokens_flattened (line 1118) | def test_corpus_tokens_flattened(corpora_en_serial_and_parallel_module, ... function test_corpus_num_tokens (line 1173) | def test_corpus_num_tokens(corpora_en_serial_and_parallel_module, select): function test_corpus_num_chars (line 1187) | def test_corpus_num_chars(corpora_en_serial_and_parallel_module, select): function test_corpus_unique_chars (line 1202) | def test_corpus_unique_chars(corpora_en_serial_and_parallel_module, sele... function test_corpus_collocations_hypothesis (line 1232) | def test_corpus_collocations_hypothesis(corpora_en_serial_and_parallel_m... function test_corpus_summary (line 1278) | def test_corpus_summary(corpora_en_serial_and_parallel_module, max_docum... function test_print_summary (line 1300) | def test_print_summary(capsys, corpora_en_serial_and_parallel_module): function test_dtm (line 1312) | def test_dtm(corpora_en_serial_and_parallel_module, select, as_table, dt... function test_ngrams_hypothesis (line 1386) | def test_ngrams_hypothesis(corpora_en_serial_and_parallel_module, n, joi... function test_kwic_hypothesis (line 1435) | def test_kwic_hypothesis(corpora_en_serial_and_parallel_module, **args): function test_kwic_example (line 1582) | def test_kwic_example(corpora_en_serial_and_parallel_module): function test_kwic_table_hypothesis (line 1625) | def test_kwic_table_hypothesis(corpora_en_serial_and_parallel_module, **... function test_save_load_corpus (line 1710) | def test_save_load_corpus(corpora_en_serial_and_parallel_module): function test_load_corpus_from_tokens_hypothesis (line 1732) | def test_load_corpus_from_tokens_hypothesis(corpora_en_serial_and_parall... function test_load_corpus_from_tokens_table (line 1811) | def test_load_corpus_from_tokens_table(corpora_en_serial_and_parallel, w... function test_serialize_deserialize_corpus (line 1852) | def test_serialize_deserialize_corpus(corpora_en_serial_and_parallel_mod... function test_corpus_add_files_and_from_files (line 1875) | def test_corpus_add_files_and_from_files(corpora_en_serial_and_parallel,... function test_corpus_add_folder_and_from_folder (line 1943) | def test_corpus_add_folder_and_from_folder(corpora_en_serial_and_paralle... function test_corpus_add_tabular_and_from_tabular (line 2024) | def test_corpus_add_tabular_and_from_tabular(corpora_en_serial_and_paral... function test_corpus_add_zip_and_from_zip (line 2114) | def test_corpus_add_zip_and_from_zip(corpora_en_serial_and_parallel, inp... function test_corpus_from_builtin_corpus (line 2150) | def test_corpus_from_builtin_corpus(max_workers, sample): function test_set_remove_document_attr (line 2185) | def test_set_remove_document_attr(corpora_en_serial_and_parallel, attrna... function test_set_remove_token_attr (line 2235) | def test_set_remove_token_attr(corpora_en_serial_and_parallel, attrname,... function test_corpus_retokenize (line 2297) | def test_corpus_retokenize(corpora_en_serial_and_parallel, testcase, inp... function test_transform_tokens_upper_lower (line 2326) | def test_transform_tokens_upper_lower(corpora_en_serial_and_parallel, te... function test_remove_chars_or_punctuation (line 2369) | def test_remove_chars_or_punctuation(corpora_en_serial_and_parallel, tes... function test_normalize_unicode (line 2397) | def test_normalize_unicode(corpora_en_serial_and_parallel, inplace): function test_simplify_unicode (line 2423) | def test_simplify_unicode(corpora_en_serial_and_parallel, method, inplace): function test_numbers_to_magnitudes (line 2452) | def test_numbers_to_magnitudes(corpora_en_serial_and_parallel, inplace): function test_lemmatize (line 2471) | def test_lemmatize(corpora_en_serial_and_parallel, inplace): function test_join_collocations_by_patterns (line 2494) | def test_join_collocations_by_patterns(corpora_en_serial_and_parallel, t... function test_join_collocations_by_statistic_hypothesis (line 2572) | def test_join_collocations_by_statistic_hypothesis(corpora_en_serial_and... function test_filter_tokens_by_mask (line 2624) | def test_filter_tokens_by_mask(corpora_en_serial_and_parallel, inverse, ... function test_filter_tokens (line 2670) | def test_filter_tokens(corpora_en_serial_and_parallel, testtype, search_... function test_filter_tokens_custom_attr_bug (line 2716) | def test_filter_tokens_custom_attr_bug(corpora_en_serial_and_parallel): function test_filter_for_pos (line 2738) | def test_filter_for_pos(corpora_en_serial_and_parallel, testtype, search... function test_filter_tokens_by_doc_frequency (line 2775) | def test_filter_tokens_by_doc_frequency(corpora_en_serial_and_parallel, ... function test_filter_documents (line 2846) | def test_filter_documents(corpora_en_serial_and_parallel, testtype, sear... function test_filter_documents_by_docattr (line 2924) | def test_filter_documents_by_docattr(corpora_en_serial_and_parallel, tes... function test_filter_documents_by_length (line 3004) | def test_filter_documents_by_length(corpora_en_serial_and_parallel, test... function test_filter_clean_tokens (line 3051) | def test_filter_clean_tokens(corpora_en_serial_and_parallel, remove_punc... function test_filter_tokens_with_kwic (line 3105) | def test_filter_tokens_with_kwic(corpora_en_serial_and_parallel, testtyp... function test_corpus_ngramify (line 3164) | def test_corpus_ngramify(corpora_en_serial_and_parallel, n, join_str, in... function test_corpus_sample (line 3203) | def test_corpus_sample(corpora_en_serial_and_parallel, n, inplace): function test_corpus_split_by_paragraph (line 3224) | def test_corpus_split_by_paragraph(corpora_en_serial_and_parallel, inpla... function test_corpus_join_documents (line 3258) | def test_corpus_join_documents(corpora_en_serial_and_parallel, join, glu... function test_builtin_corpora_info (line 3301) | def test_builtin_corpora_info(with_paths): function test_corpus_workflow_example1 (line 3330) | def test_corpus_workflow_example1(corpora_en_serial_and_parallel): function _check_corpus_inplace_modif (line 3400) | def _check_corpus_inplace_modif(corp_a, corp_b, inplace, check_attrs=Non... function _check_corpus_docs (line 3413) | def _check_corpus_docs(corp: c.Corpus, has_sents: bool): function _check_copies (line 3434) | def _check_copies(corp_a, corp_b, same_nlp_instance): function _check_copies_attrs (line 3446) | def _check_copies_attrs(corp_a, corp_b, check_attrs=None, dont_check_att... function _dataframes_equal (line 3478) | def _dataframes_equal(df1, df2, require_same_index=True): FILE: tests/test_corpusimport.py function test_import_corpus (line 12) | def test_import_corpus(): FILE: tests/test_tokenseq.py function test_token_lengths (line 29) | def test_token_lengths(tokens, expected): function test_token_lengths_hypothesis (line 35) | def test_token_lengths_hypothesis(tokens, as_array): function test_unique_chars_hypothesis (line 47) | def test_unique_chars_hypothesis(tokens): function test_collapse_tokens (line 62) | def test_collapse_tokens(tokens, tokens_as_array, collapse, collapse_as_... function test_simplify_unicode_chars (line 96) | def test_simplify_unicode_chars(token, method, ascii_encoding_errors): function test_strip_tags (line 129) | def test_strip_tags(value, expected): function test_pmi_hypothesis (line 138) | def test_pmi_hypothesis(xy, as_prob, n_total_factor, k, normalize): function test_simple_collocation_counts_hypothesis (line 172) | def test_simple_collocation_counts_hypothesis(xy): function test_token_collocations (line 231) | def test_token_collocations(args, expected): function test_token_collocations_hypothesis (line 252) | def test_token_collocations_hypothesis(sentences, threshold, min_count, ... function test_token_match (line 325) | def test_token_match(pattern, tokens, match_type, ignore_case, glob_meth... function test_token_match_multi_pattern (line 342) | def test_token_match_multi_pattern(pattern, tokens, match_type, ignore_c... function test_token_match_subsequent (line 347) | def test_token_match_subsequent(): function test_token_match_subsequent_hypothesis (line 372) | def test_token_match_subsequent_hypothesis(tokens, n_patterns): function test_token_glue_subsequent (line 399) | def test_token_glue_subsequent(): function test_token_glue_subsequent_hypothesis (line 416) | def test_token_glue_subsequent_hypothesis(tokens, n_patterns): function test_token_ngrams_hypothesis (line 449) | def test_token_ngrams_hypothesis(tokens, n, join, join_str, ngram_contai... function test_numbertoken_to_magnitude (line 527) | def test_numbertoken_to_magnitude(numbertoken, char, firstchar, below_on... FILE: tests/test_topicmod__eval_tools.py function test_split_dtm_for_cross_validation (line 15) | def test_split_dtm_for_cross_validation(dtm, matrix_type, n_folds): FILE: tests/test_topicmod_evaluate.py function test_metric_held_out_documents_wallach09 (line 18) | def test_metric_held_out_documents_wallach09(): function test_compute_models_parallel_lda_multi_vs_singleproc (line 100) | def test_compute_models_parallel_lda_multi_vs_singleproc(): function test_compute_models_parallel_lda_multiple_docs (line 130) | def test_compute_models_parallel_lda_multiple_docs(): function test_evaluation_all_engines_unavail_metric (line 204) | def test_evaluation_all_engines_unavail_metric(): function test_evaluation_lda_all_metrics_multi_vs_singleproc (line 213) | def test_evaluation_lda_all_metrics_multi_vs_singleproc(): function test_evaluation_gensim_all_metrics (line 279) | def test_evaluation_gensim_all_metrics(): function test_compute_models_parallel_gensim (line 313) | def test_compute_models_parallel_gensim(): function test_compute_models_parallel_gensim_multiple_docs (line 328) | def test_compute_models_parallel_gensim_multiple_docs(): function test_evaluation_sklearn_all_metrics (line 390) | def test_evaluation_sklearn_all_metrics(): function test_compute_models_parallel_sklearn (line 430) | def test_compute_models_parallel_sklearn(): function test_compute_models_parallel_sklearn_multiple_docs (line 445) | def test_compute_models_parallel_sklearn_multiple_docs(): function test_results_by_parameter_single_validation (line 509) | def test_results_by_parameter_single_validation(n_param_sets, n_params, ... FILE: tests/test_topicmod_model_io.py function test_save_load_ldamodel_pickle (line 16) | def test_save_load_ldamodel_pickle(): function test_ldamodel_top_topic_words (line 45) | def test_ldamodel_top_topic_words(topic_word, top_n): function test_ldamodel_top_word_topics (line 67) | def test_ldamodel_top_word_topics(topic_word, top_n): function test_ldamodel_top_doc_topics (line 88) | def test_ldamodel_top_doc_topics(doc_topic, top_n): function test_ldamodel_top_topic_docs (line 109) | def test_ldamodel_top_topic_docs(doc_topic, top_n): function test_ldamodel_full_topic_words (line 128) | def test_ldamodel_full_topic_words(topic_word): function test_ldamodel_full_doc_topics (line 143) | def test_ldamodel_full_doc_topics(doc_topic): function test_save_ldamodel_summary_to_excel (line 163) | def test_save_ldamodel_summary_to_excel(n_docs, n_topics, size_vocab, to... FILE: tests/test_topicmod_model_stats.py function test_top_n_from_distribution (line 17) | def test_top_n_from_distribution(n, distrib): function test_top_words_for_topics (line 40) | def test_top_words_for_topics(topic_word_distrib, vocab, top_n): function test_top_words_for_topics2 (line 66) | def test_top_words_for_topics2(): function test_get_marginal_topic_distrib (line 103) | def test_get_marginal_topic_distrib(dtm, n_topics): function test_get_marginal_word_distrib (line 127) | def test_get_marginal_word_distrib(dtm, n_topics): function test_get_word_distinctiveness (line 152) | def test_get_word_distinctiveness(dtm, n_topics): function test_get_word_saliency (line 177) | def test_get_word_saliency(dtm, n_topics): function test_get_most_or_least_salient_words (line 201) | def test_get_most_or_least_salient_words(dtm, n_topics, n_salient_words): function test_get_most_or_least_distinct_words (line 237) | def test_get_most_or_least_distinct_words(dtm, n_topics, n_distinct_words): function test_get_topic_word_relevance (line 273) | def test_get_topic_word_relevance(dtm, n_topics, lambda_): function test_get_most_or_least_relevant_words_for_topic (line 299) | def test_get_most_or_least_relevant_words_for_topic(dtm, n_topics, lambd... function test_generate_topic_labels_from_top_words (line 336) | def test_generate_topic_labels_from_top_words(dtm, n_topics, lambda_): function test_filter_topics (line 377) | def test_filter_topics(): function test_exclude_topics (line 461) | def test_exclude_topics(exclude, pass_topic_word, renormalize, return_ne... FILE: tests/test_topicmod_visualize.py function test_generate_wordclouds_for_topic_words (line 15) | def test_generate_wordclouds_for_topic_words(): function test_generate_wordclouds_for_document_topics (line 44) | def test_generate_wordclouds_for_document_topics(): function test_write_wordclouds_to_folder (line 75) | def test_write_wordclouds_to_folder(tmpdir): function test_plot_doc_topic_heatmap (line 104) | def test_plot_doc_topic_heatmap(doc_topic, make_topic_labels): function test_plot_topic_word_heatmap (line 125) | def test_plot_topic_word_heatmap(topic_word): FILE: tests/test_utils.py function test_enable_disable_logging (line 31) | def test_enable_disable_logging(caplog, level, fmt): function test_pickle_unpickle (line 80) | def test_pickle_unpickle(): function test_path_split (line 91) | def test_path_split(): function test_read_text_file (line 106) | def test_read_text_file(): function test_linebreaks_win2unix (line 119) | def test_linebreaks_win2unix(text): function test_empty_chararray (line 126) | def test_empty_chararray(): function test_as_chararray (line 136) | def test_as_chararray(x, as_numpy_array): function test_dict2df (line 154) | def test_dict2df(data, key_name, value_name, sort, asc): function test_applychain (line 197) | def test_applychain(expected, funcs, initial_arg): function test_flatten_list (line 212) | def test_flatten_list(l): function test_mat2d_window_from_indices (line 225) | def test_mat2d_window_from_indices(mat, n_row_indices, n_col_indices, co... function test_merge_dicts (line 272) | def test_merge_dicts(dicts, sort_keys, safe): function test_merge_sets (line 300) | def test_merge_sets(sets, safe): function test_sample_dict (line 317) | def test_sample_dict(d, n): function test_greedy_partitioning (line 333) | def test_greedy_partitioning(elems_dict, k): function test_combine_sparse_matrices_columnwise (line 355) | def test_combine_sparse_matrices_columnwise(): function test_split_func_args (line 467) | def test_split_func_args(testfn, testargs, expargs1, expargs2): FILE: tmtoolkit/__main__.py function _setup (line 44) | def _setup(args): function _help (line 114) | def _help(args): FILE: tmtoolkit/bow/bow_stats.py function doc_lengths (line 13) | def doc_lengths(dtm): function doc_frequencies (line 31) | def doc_frequencies(dtm, min_val=1, proportions=0): function word_cooccurrence (line 58) | def word_cooccurrence(dtm, min_val=1, proportions=0): function codoc_frequencies (line 66) | def codoc_frequencies(dtm, min_val=1, proportions=0): function term_frequencies (line 102) | def term_frequencies(dtm, proportions=0): function tf_binary (line 133) | def tf_binary(dtm): function tf_proportions (line 147) | def tf_proportions(dtm): function tf_log (line 173) | def tf_log(dtm, log_fn=np.log1p): function tf_double_norm (line 196) | def tf_double_norm(dtm, K=0.5): function idf (line 219) | def idf(dtm, smooth_log=1, smooth_df=1): function idf_probabilistic (line 248) | def idf_probabilistic(dtm, smooth=1): function tfidf (line 274) | def tfidf(dtm, tf_func=tf_proportions, idf_func=idf, **kwargs): function sorted_terms (line 314) | def sorted_terms(mat, vocab, lo_thresh=0, hi_tresh=None, top_n=None, asc... function sorted_terms_table (line 419) | def sorted_terms_table(mat, vocab, doc_labels, lo_thresh=0, hi_tresh=Non... FILE: tmtoolkit/bow/dtm.py function create_sparse_dtm (line 15) | def create_sparse_dtm(vocab, docs, n_unique_tokens, vocab_is_sorted=Fals... function dtm_to_dataframe (line 84) | def dtm_to_dataframe(dtm, doc_labels, vocab): function dtm_to_gensim_corpus (line 112) | def dtm_to_gensim_corpus(dtm): function gensim_corpus_to_dtm (line 141) | def gensim_corpus_to_dtm(corpus): function dtm_and_vocab_to_gensim_corpus_and_dict (line 157) | def dtm_and_vocab_to_gensim_corpus_and_dict(dtm, vocab, as_gensim_dictio... FILE: tmtoolkit/corpus/_common.py function simplified_pos (line 81) | def simplified_pos(pos: str, tagset: str = 'ud', default: str = '') -> str: FILE: tmtoolkit/corpus/_corpus.py class Corpus (line 33) | class Corpus: method __init__ (line 87) | def __init__(self, docs: Optional[Union[Dict[str, str], Sequence[Docum... method __str__ (line 300) | def __str__(self) -> str: method __repr__ (line 304) | def __repr__(self) -> str: method __len__ (line 314) | def __len__(self) -> int: method __getitem__ (line 322) | def __getitem__(self, k: Union[str, int, slice]) -> Union[Document, Li... method __setitem__ (line 341) | def __setitem__(self, doc_label: str, doc: Union[str, Doc, Document]): method __delitem__ (line 375) | def __delitem__(self, doc_label): method __iter__ (line 393) | def __iter__(self) -> Iterator[str]: method __contains__ (line 397) | def __contains__(self, doc_label) -> bool: method __copy__ (line 406) | def __copy__(self) -> Corpus: method __deepcopy__ (line 414) | def __deepcopy__(self, memodict=None) -> Corpus: method items (line 422) | def items(self) -> ItemsView[str, Document]: method keys (line 430) | def keys(self) -> KeysView[str]: method values (line 438) | def values(self) -> ValuesView[Document]: method get (line 446) | def get(self, *args) -> Document: method update (line 454) | def update(self, new_docs: Union[Dict[str, Union[str, Doc, Document]],... method uses_unigrams (line 491) | def uses_unigrams(self) -> bool: method spacy_token_attrs (line 496) | def spacy_token_attrs(self) -> Tuple[str, ...]: method token_attrs (line 503) | def token_attrs(self) -> Tuple[str, ...]: method custom_token_attrs_defaults (line 510) | def custom_token_attrs_defaults(self) -> Dict[str, Any]: method doc_attrs (line 515) | def doc_attrs(self) -> Tuple[str, ...]: method doc_attrs_defaults (line 520) | def doc_attrs_defaults(self) -> Dict[str, Any]: method ngrams (line 525) | def ngrams(self) -> int: method ngrams_join_str (line 530) | def ngrams_join_str(self) -> str: method language (line 535) | def language(self) -> str: method language_model (line 543) | def language_model(self) -> str: method has_sents (line 551) | def has_sents(self) -> bool: method doc_labels (line 556) | def doc_labels(self) -> List[str]: method n_docs (line 561) | def n_docs(self) -> int: method workers_docs (line 566) | def workers_docs(self) -> List[List[str]]: method max_workers (line 575) | def max_workers(self): method max_workers (line 580) | def max_workers(self, max_workers): method from_files (line 622) | def from_files(cls, files: Union[str, Collection[str], Dict[str, str]]... method from_folder (line 635) | def from_folder(cls, folder: str, **kwargs) -> Corpus: method from_tabular (line 648) | def from_tabular(cls, files: Union[str, Collection[str]], **kwargs) ->... method from_zip (line 663) | def from_zip(cls, zipfile: str, **kwargs) -> Corpus: method from_builtin_corpus (line 677) | def from_builtin_corpus(cls, corpus_label, **kwargs) -> Corpus: method _nlppipe (line 701) | def _nlppipe(self, docs: ValuesView[str]) -> Union[Iterator[Doc], Gene... method _init_bimaps (line 712) | def _init_bimaps(self): method _init_docs (line 720) | def _init_docs(self, docs: Dict[str, str]): method _init_document (line 750) | def _init_document(self, spacydoc: Doc, label: str): method _update_bimaps (line 779) | def _update_bimaps(self, which_docs: Union[str, Optional[Collection[st... method _update_workers_docs (line 844) | def _update_workers_docs(self, based_on_docs=None): method _serialize (line 859) | def _serialize(self, deepcopy_attrs: bool, store_nlp_instance_pointer:... method _deserialize (line 921) | def _deserialize(cls, data: Dict[str, Any]) -> Corpus: method _construct_from_func (line 960) | def _construct_from_func(cls, add_fn: Callable, *args, **kwargs) -> Co... FILE: tmtoolkit/corpus/_corpusfuncs.py class ParallelTask (line 61) | class ParallelTask: function _paralleltask (line 72) | def _paralleltask(corpus: Corpus, tokens: Dict[str, Any], force_serialpr... function parallelexec (line 81) | def parallelexec(collect_fn: Callable) -> Callable[[CorpusFunc], Callable]: function corpus_func_inplace_opt (line 144) | def corpus_func_inplace_opt(fn: Callable) -> Callable: function tabular_result_option (line 190) | def tabular_result_option(key: str, value: str) -> Callable: function corpus_func_update_bimaps (line 232) | def corpus_func_update_bimaps(which_attrs: Union[str, Optional[Collectio... function doc_tokens (line 269) | def doc_tokens(docs: Corpus, function doc_lengths (line 438) | def doc_lengths(docs: Corpus, select: Optional[Union[str, Collection[str... function doc_token_lengths (line 458) | def doc_token_lengths(docs: Corpus, select: Optional[Union[str, Collecti... function doc_num_sents (line 486) | def doc_num_sents(docs: Corpus, select: Optional[Union[str, Collection[s... function doc_sent_lengths (line 517) | def doc_sent_lengths(docs: Corpus, select: Optional[Union[str, Collectio... function doc_labels (line 544) | def doc_labels(docs: Corpus, sort: bool = True) -> List[str]: function doc_labels_sample (line 558) | def doc_labels_sample(docs: Corpus, n: int) -> Set[str]: function doc_texts (line 572) | def doc_texts(docs: Corpus, select: Optional[Union[str, Collection[str]]... function doc_frequencies (line 611) | def doc_frequencies(docs: Corpus, select: Optional[Union[str, Collection... function doc_vectors (line 671) | def doc_vectors(docs: Union[Corpus, Dict[str, Doc]], select: Optional[Un... function token_vectors (line 693) | def token_vectors(docs: Union[Corpus, Dict[str, Doc]], select: Optional[... function spacydocs (line 719) | def spacydocs(docs: Corpus, select: Optional[Union[str, Collection[str]]... function vocabulary (line 758) | def vocabulary(docs: Corpus, select: Optional[Union[str, Collection[str]... function vocabulary_counts (line 799) | def vocabulary_counts(docs: Corpus, select: Optional[Union[str, Collecti... function vocabulary_size (line 849) | def vocabulary_size(docs: Union[Corpus, Dict[str, List[str]]], select: O... function tokens_table (line 863) | def tokens_table(docs: Corpus, function corpus_tokens_flattened (line 982) | def corpus_tokens_flattened(docs: Corpus, select: Optional[Union[str, Co... function corpus_num_tokens (line 1022) | def corpus_num_tokens(docs: Corpus, select: Optional[Union[str, Collecti... function corpus_num_chars (line 1033) | def corpus_num_chars(docs: Corpus, select: Optional[Union[str, Collectio... function corpus_unique_chars (line 1044) | def corpus_unique_chars(docs: Corpus, select: Optional[Union[str, Collec... function corpus_collocations (line 1055) | def corpus_collocations(docs: Corpus, function corpus_summary (line 1139) | def corpus_summary(docs: Corpus, function print_summary (line 1196) | def print_summary(docs: Corpus, function dtm (line 1214) | def dtm(docs: Corpus, select: Optional[Union[str, Collection[str]]] = No... function ngrams (line 1288) | def ngrams(docs: Corpus, n: int, select: Optional[Union[str, Collection[... function kwic (line 1316) | def kwic(docs: Corpus, search_tokens: Any, context_size: Union[int, Tupl... function kwic_table (line 1409) | def kwic_table(docs: Corpus, search_tokens: Any, context_size: Union[int... function corpus_add_files (line 1489) | def corpus_add_files(docs: Corpus, files: Union[str, Collection[str], Di... function corpus_add_folder (line 1535) | def corpus_add_folder(docs: Corpus, folder: str, valid_extensions: Colle... function corpus_add_tabular (line 1614) | def corpus_add_tabular(docs: Corpus, files: Union[str, Collection[str]], function corpus_add_zip (line 1659) | def corpus_add_zip(docs: Corpus, zipfile: str, valid_extensions: Collect... function save_corpus_to_picklefile (line 1760) | def save_corpus_to_picklefile(docs: Corpus, picklefile: str) -> None: function load_corpus_from_picklefile (line 1774) | def load_corpus_from_picklefile(picklefile: str) -> Corpus: function load_corpus_from_tokens (line 1790) | def load_corpus_from_tokens(tokens: Dict[str, Any], function load_corpus_from_tokens_table (line 1831) | def load_corpus_from_tokens_table(tokens: pd.DataFrame, function serialize_corpus (line 1883) | def serialize_corpus(docs: Corpus, deepcopy_attrs: bool = True) -> Dict[... function deserialize_corpus (line 1896) | def deserialize_corpus(serialized_corpus_data: dict) -> Corpus: function set_document_attr (line 1912) | def set_document_attr(docs: Corpus, /, attrname: str, data: Dict[str, An... function remove_document_attr (line 1940) | def remove_document_attr(docs: Corpus, /, attrname: str, inplace: bool =... function set_token_attr (line 1964) | def set_token_attr(docs: Corpus, /, attrname: str, data: Dict[str, Any],... function remove_token_attr (line 2027) | def remove_token_attr(docs: Corpus, /, attrname: str, inplace: bool = Tr... function corpus_retokenize (line 2055) | def corpus_retokenize(docs: Corpus, collapse: Optional[str] = ' ', inpla... function transform_tokens (line 2098) | def transform_tokens(docs: Corpus, /, func: Callable, select: Optional[U... function to_lowercase (line 2148) | def to_lowercase(docs: Corpus, /, select: Optional[Union[str, Collection... function to_uppercase (line 2161) | def to_uppercase(docs: Corpus, /, select: Optional[Union[str, Collection... function remove_chars (line 2174) | def remove_chars(docs: Corpus, /, chars: Iterable[str], select: Optional... function remove_punctuation (line 2189) | def remove_punctuation(docs: Corpus, /, select: Optional[Union[str, Coll... function normalize_unicode (line 2204) | def normalize_unicode(docs: Corpus, /, select: Optional[Union[str, Colle... function simplify_unicode (line 2222) | def simplify_unicode(docs: Corpus, /, select: Optional[Union[str, Collec... function numbers_to_magnitudes (line 2248) | def numbers_to_magnitudes(docs: Corpus, /, select: Optional[Union[str, C... function lemmatize (line 2294) | def lemmatize(docs: Corpus, /, select: Optional[Union[str, Collection[st... function join_collocations_by_patterns (line 2324) | def join_collocations_by_patterns(docs: Corpus, /, patterns: Sequence[str], function join_collocations_by_statistic (line 2414) | def join_collocations_by_statistic(docs: Corpus, /, threshold: float, function filter_tokens_by_mask (line 2522) | def filter_tokens_by_mask(docs: Corpus, /, mask: Dict[str, Union[List[bo... function remove_tokens_by_mask (line 2570) | def remove_tokens_by_mask(docs: Corpus, /, mask: Dict[str, Union[List[bo... function filter_tokens (line 2587) | def filter_tokens(docs: Corpus, /, search_tokens: Any, by_attr: Optional... function remove_tokens (line 2634) | def remove_tokens(docs: Corpus, /, search_tokens: Any, by_attr: Optional... function filter_for_pos (line 2664) | def filter_for_pos(docs: Corpus, /, search_pos: Union[str, Collection[st... function filter_tokens_by_doc_frequency (line 2704) | def filter_tokens_by_doc_frequency(docs: Corpus, /, which: str, df_thres... function remove_common_tokens (line 2752) | def remove_common_tokens(docs: Corpus, /, df_threshold: Union[int, float... function remove_uncommon_tokens (line 2769) | def remove_uncommon_tokens(docs: Corpus, /, df_threshold: Union[int, flo... function filter_documents_by_mask (line 2788) | def filter_documents_by_mask(docs: Corpus, /, mask: Dict[str, bool], inv... function remove_documents_by_mask (line 2819) | def remove_documents_by_mask(docs: Corpus, /, mask: Dict[str, bool], inp... function find_documents (line 2835) | def find_documents(docs: Corpus, /, search_tokens: Any, by_attr: Optiona... function filter_documents (line 2886) | def filter_documents(docs: Corpus, /, search_tokens: Any, by_attr: Optio... function remove_documents (line 2935) | def remove_documents(docs: Corpus, /, search_tokens: Any, by_attr: Optio... function filter_documents_by_docattr (line 2967) | def filter_documents_by_docattr(docs: Corpus, /, search_tokens: Any, by_... function remove_documents_by_docattr (line 3005) | def remove_documents_by_docattr(docs: Corpus, /, search_tokens: Any, by_... function filter_documents_by_label (line 3033) | def filter_documents_by_label(docs: Corpus, /, search_tokens: Any, match... function remove_documents_by_label (line 3063) | def remove_documents_by_label(docs: Corpus, /, search_tokens: Any, match... function filter_documents_by_length (line 3091) | def filter_documents_by_length(docs: Corpus, /, relation: str, threshold... function remove_documents_by_length (line 3116) | def remove_documents_by_length(docs: Corpus, /, relation: str, threshold... function filter_clean_tokens (line 3132) | def filter_clean_tokens(docs: Corpus, /, function filter_tokens_with_kwic (line 3273) | def filter_tokens_with_kwic(docs: Corpus, /, search_tokens: Any, function corpus_ngramify (line 3330) | def corpus_ngramify(docs: Corpus, /, n: int, join_str: str = ' ', inplac... function corpus_sample (line 3347) | def corpus_sample(docs: Corpus, /, n: int, inplace: bool = True) -> Opti... function corpus_split_by_paragraph (line 3370) | def corpus_split_by_paragraph(docs: Corpus, /, paragraph_linebreaks: int... function corpus_split_by_token (line 3400) | def corpus_split_by_token(docs: Corpus, /, split: str, new_doc_label_fmt... function corpus_join_documents (line 3477) | def corpus_join_documents(docs: Corpus, /, join: Dict[str, Union[str, Li... function builtin_corpora_info (line 3610) | def builtin_corpora_info(with_paths: bool = False) -> Union[List[str], D... function _filter_documents (line 3637) | def _filter_documents(chunk, search_tokens, match_type, ignore_case, glo... function _build_kwic_parallel (line 3664) | def _build_kwic_parallel(docs, search_tokens, context_size, by_attr, mat... function _finalize_kwic_results (line 3745) | def _finalize_kwic_results(kwic_results, only_non_empty, glue, as_tables... function _create_embed_tokens_for_collocations (line 3807) | def _create_embed_tokens_for_collocations(docs: Corpus, embed_tokens_min... function _apply_collocations (line 3839) | def _apply_collocations(tokenmat: np.ndarray, function _comparison_operator_from_str (line 3884) | def _comparison_operator_from_str(which: str, common_alias=False, equal=... function _match_against (line 3910) | def _match_against(docs: Union[Corpus, Dict[str, Document]], by_attr: st... function _check_filter_args (line 3918) | def _check_filter_args(**kwargs): function _token_pattern_matches (line 3927) | def _token_pattern_matches(tokens: Dict[str, List[Any]], search_tokens: ... function _load_text_from_files (line 3951) | def _load_text_from_files(files: Collection[str], function _load_text_from_tabular_files (line 4006) | def _load_text_from_tabular_files(files: Union[str, Collection[str]], function _spacydocs_for_vectors (line 4088) | def _spacydocs_for_vectors(docs, select, collapse): function _single_str_to_set (line 4107) | def _single_str_to_set(select: Optional[Union[str, Collection[str]]], ch... FILE: tmtoolkit/corpus/_document.py class Document (line 23) | class Document: method __init__ (line 36) | def __init__(self, bimaps: Optional[Dict[str, bidict]], label: str, ha... method __len__ (line 85) | def __len__(self) -> int: method __repr__ (line 93) | def __repr__(self) -> str: method __str__ (line 102) | def __str__(self) -> str: method __getitem__ (line 110) | def __getitem__(self, attr: str) -> list: method __setitem__ (line 122) | def __setitem__(self, attr: str, values: Union[Sequence, np.ndarray]): method __delitem__ (line 158) | def __delitem__(self, attr: str): method __copy__ (line 172) | def __copy__(self) -> Document: method label (line 181) | def label(self) -> str: method has_sents (line 186) | def has_sents(self) -> bool: method token_attrs (line 195) | def token_attrs(self) -> List[str]: method _serialize (line 203) | def _serialize(self, store_bimaps_pointer: bool) -> Dict[str, Any]: method _deserialize (line 220) | def _deserialize(cls, data: Dict[str, Any], **kwargs) -> Document: function document_token_attr (line 246) | def document_token_attr(d: Document, function document_from_attrs (line 394) | def document_from_attrs(bimaps: Dict[str, bidict], function _chop_along_sentences (line 533) | def _chop_along_sentences(tok: Union[list, np.ndarray], FILE: tmtoolkit/corpus/_nltk_extras.py function stem (line 14) | def stem(docs: Corpus, /, language: Optional[str] = None, FILE: tmtoolkit/corpus/visualize.py function plot_doc_lengths_hist (line 27) | def plot_doc_lengths_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_vocab_counts_hist (line 66) | def plot_vocab_counts_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_doc_frequencies_hist (line 106) | def plot_doc_frequencies_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_num_sents_hist (line 151) | def plot_num_sents_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_sent_lengths_hist (line 189) | def plot_sent_lengths_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_token_lengths_hist (line 227) | def plot_token_lengths_hist(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function plot_num_sents_vs_sent_length (line 265) | def plot_num_sents_vs_sent_length(fig: plt.Figure, ax: plt.Axes, docs: C... function plot_ranked_vocab_counts (line 339) | def plot_ranked_vocab_counts(fig: plt.Figure, ax: plt.Axes, docs: Corpus, function _add_axis_scale_info (line 443) | def _add_axis_scale_info(axislbl: str, log: bool): function _plot_hist (line 450) | def _plot_hist(fig: plt.Figure, ax: plt.Axes, x: np.ndarray, FILE: tmtoolkit/tokenseq.py function numbertoken_to_magnitude (line 35) | def numbertoken_to_magnitude(numbertoken: str, char: str = '0', firstcha... function simplify_unicode_chars (line 90) | def simplify_unicode_chars(token: str, method: str = 'icu', ascii_encodi... function strip_tags (line 126) | def strip_tags(value: str) -> str: function unique_chars (line 151) | def unique_chars(tokens: Iterable[str]) -> Set[str]: function token_lengths (line 164) | def token_lengths(tokens: Union[Iterable[str], np.ndarray]) -> List[int]: function collapse_tokens (line 174) | def collapse_tokens(tokens: Union[Iterable[str], np.ndarray], collapse: ... function pmi (line 186) | def pmi(x: np.ndarray, y: np.ndarray, xy: np.ndarray, n_total: Optional[... function simple_collocation_counts (line 234) | def simple_collocation_counts(x: Optional[np.ndarray], y: Optional[np.nd... function token_collocations (line 248) | def token_collocations(sentences: List[List[StrOrInt]], threshold: Optio... function token_match (line 357) | def token_match(pattern: Any, tokens: Union[List[str], np.ndarray], function token_match_multi_pattern (line 422) | def token_match_multi_pattern(search_tokens: Any, tokens: Union[List[str... function token_match_subsequent (line 452) | def token_match_subsequent(patterns: Sequence, tokens: Union[list, np.nd... function token_join_subsequent (line 524) | def token_join_subsequent(tokens: Union[List[str], np.ndarray], matches:... function token_ngrams (line 622) | def token_ngrams(tokens: Sequence, n: int, join: bool = True, join_str: ... function index_windows_around_matches (line 693) | def index_windows_around_matches(matches: np.ndarray, left: int, right: ... class _MLStripper (line 757) | class _MLStripper(HTMLParser): method __init__ (line 761) | def __init__(self): method handle_data (line 766) | def handle_data(self, d): method get_data (line 769) | def get_data(self): function _strip_once (line 773) | def _strip_once(value): FILE: tmtoolkit/topicmod/_eval_tools.py function split_dtm_for_cross_validation (line 11) | def split_dtm_for_cross_validation(dtm, n_folds, shuffle_docs=True): class FakedGensimDict (line 59) | class FakedGensimDict: method __init__ (line 63) | def __init__(self, data): method from_vocab (line 71) | def from_vocab(vocab): method __iter__ (line 74) | def __iter__(self): method keys (line 78) | def keys(self): FILE: tmtoolkit/topicmod/evaluate.py function metric_held_out_documents_wallach09 (line 25) | def metric_held_out_documents_wallach09(dtm_test, theta_test, phi_train,... function metric_cao_juan_2009 (line 140) | def metric_cao_juan_2009(topic_word_distrib): function metric_arun_2010 (line 158) | def metric_arun_2010(topic_word_distrib, doc_topic_distrib, doc_lengths): function metric_griffiths_2004 (line 196) | def metric_griffiths_2004(logliks): function metric_coherence_mimno_2011 (line 226) | def metric_coherence_mimno_2011(topic_word_distrib, dtm, top_n=20, eps=1... function metric_coherence_gensim (line 302) | def metric_coherence_gensim(measure, topic_word_distrib=None, gensim_mod... function results_by_parameter (line 427) | def results_by_parameter(res, param, sort_by=None, sort_desc=False): FILE: tmtoolkit/topicmod/model_io.py function ldamodel_top_topic_words (line 20) | def ldamodel_top_topic_words(topic_word_distrib, vocab, top_n=10, val_fm... function ldamodel_top_word_topics (line 55) | def ldamodel_top_word_topics(topic_word_distrib, vocab, top_n=10, val_fm... function ldamodel_top_doc_topics (line 91) | def ldamodel_top_doc_topics(doc_topic_distrib, doc_labels, top_n=3, val_... function ldamodel_top_topic_docs (line 127) | def ldamodel_top_topic_docs(doc_topic_distrib, doc_labels, top_n=3, val_... function ldamodel_full_topic_words (line 164) | def ldamodel_full_topic_words(topic_word_distrib, vocab, colname_rowinde... function ldamodel_full_doc_topics (line 191) | def ldamodel_full_doc_topics(doc_topic_distrib, doc_labels, colname_rowi... function print_ldamodel_distribution (line 219) | def print_ldamodel_distribution(distrib, row_labels, val_labels, top_n=10): function print_ldamodel_topic_words (line 245) | def print_ldamodel_topic_words(topic_word_distrib, vocab, top_n=10, row_... function print_ldamodel_doc_topics (line 262) | def print_ldamodel_doc_topics(doc_topic_distrib, doc_labels, top_n=3, va... function save_ldamodel_summary_to_excel (line 280) | def save_ldamodel_summary_to_excel(excel_file, topic_word_distrib, doc_t... function save_ldamodel_to_pickle (line 382) | def save_ldamodel_to_pickle(picklefile, model, vocab, doc_labels, dtm=No... function load_ldamodel_from_pickle (line 398) | def load_ldamodel_from_pickle(picklefile, **kwargs): FILE: tmtoolkit/topicmod/model_stats.py function marginal_topic_distrib (line 24) | def marginal_topic_distrib(doc_topic_distrib, doc_lengths): function marginal_word_distrib (line 39) | def marginal_word_distrib(topic_word_distrib, p_t): function most_probable_words (line 52) | def most_probable_words(vocab, topic_word_distrib, doc_topic_distrib, do... function least_probable_words (line 71) | def least_probable_words(vocab, topic_word_distrib, doc_topic_distrib, d... function _words_by_marginal_word_prob (line 90) | def _words_by_marginal_word_prob(vocab, topic_word_distrib, doc_topic_di... function _words_by_score (line 100) | def _words_by_score(words, score, least_to_most, n=None): function word_saliency (line 126) | def word_saliency(topic_word_distrib, doc_topic_distrib, doc_lengths): function _words_by_salience_score (line 142) | def _words_by_salience_score(vocab, topic_word_distrib, doc_topic_distri... function most_salient_words (line 148) | def most_salient_words(vocab, topic_word_distrib, doc_topic_distrib, doc... function least_salient_words (line 166) | def least_salient_words(vocab, topic_word_distrib, doc_topic_distrib, do... function word_distinctiveness (line 187) | def word_distinctiveness(topic_word_distrib, p_t): function _words_by_distinctiveness_score (line 205) | def _words_by_distinctiveness_score(vocab, topic_word_distrib, doc_topic... function most_distinct_words (line 214) | def most_distinct_words(vocab, topic_word_distrib, doc_topic_distrib, do... function least_distinct_words (line 232) | def least_distinct_words(vocab, topic_word_distrib, doc_topic_distrib, d... function topic_word_relevance (line 254) | def topic_word_relevance(topic_word_distrib, doc_topic_distrib, doc_leng... function _check_relevant_words_for_topic_args (line 279) | def _check_relevant_words_for_topic_args(vocab, rel_mat, topic): function most_relevant_words_for_topic (line 290) | def most_relevant_words_for_topic(vocab, rel_mat, topic, n=None): function least_relevant_words_for_topic (line 307) | def least_relevant_words_for_topic(vocab, rel_mat, topic, n=None): function generate_topic_labels_from_top_words (line 327) | def generate_topic_labels_from_top_words(topic_word_distrib, doc_topic_d... function top_n_from_distribution (line 384) | def top_n_from_distribution(distrib, top_n=10, row_labels=None, col_labe... function top_words_for_topics (line 454) | def top_words_for_topics(topic_word_distrib, top_n=None, vocab=None, ret... function _join_value_and_label_dfs (line 514) | def _join_value_and_label_dfs(vals, labels, top_n, val_fmt=None, row_lab... function filter_topics (line 555) | def filter_topics(search_pattern, vocab, topic_word_distrib, top_n=None,... function exclude_topics (line 647) | def exclude_topics(excl_topic_indices, doc_topic_distrib, topic_word_dis... FILE: tmtoolkit/topicmod/parallel.py class MultiprocModelsRunner (line 27) | class MultiprocModelsRunner: method __init__ (line 32) | def __init__(self, worker_class, data, varying_parameters=None, consta... method __del__ (line 78) | def __del__(self): method shutdown_workers (line 82) | def shutdown_workers(self): method run (line 99) | def run(self): method _setup_workers (line 173) | def _setup_workers(self, worker_class): method _new_worker (line 187) | def _new_worker(self, worker_class, i, task_queue, results_queue, data): method _prepare_data (line 192) | def _prepare_data(data): class MultiprocModelsWorkerABC (line 226) | class MultiprocModelsWorkerABC(mp.Process): method __init__ (line 233) | def __init__(self, worker_id, tasks_queue, results_queue, data, method run (line 270) | def run(self): method fit_model (line 290) | def fit_model(self, data, params): method send_results (line 300) | def send_results(self, doc, params, results): class MultiprocEvaluationRunner (line 314) | class MultiprocEvaluationRunner(MultiprocModelsRunner): method __init__ (line 319) | def __init__(self, worker_class, available_metrics, data, varying_para... method _new_worker (line 369) | def _new_worker(self, worker_class, i, task_queue, results_queue, data): class MultiprocEvaluationWorkerABC (line 375) | class MultiprocEvaluationWorkerABC(MultiprocModelsWorkerABC): method __init__ (line 380) | def __init__(self, worker_id, function _merge_params (line 414) | def _merge_params(varying_parameters, constant_parameters): FILE: tmtoolkit/topicmod/tm_gensim.py class MultiprocModelsWorkerGensim (line 44) | class MultiprocModelsWorkerGensim(MultiprocModelsWorkerABC): method fit_model (line 51) | def fit_model(self, data, params, return_data=False): class MultiprocEvaluationWorkerGensim (line 77) | class MultiprocEvaluationWorkerGensim(MultiprocEvaluationWorkerABC, Mult... method fit_model (line 82) | def fit_model(self, data, params, return_data=False): function compute_models_parallel (line 154) | def compute_models_parallel(data, varying_parameters=None, constant_para... function evaluate_topic_models (line 182) | def evaluate_topic_models(data, varying_parameters, constant_parameters=... function _get_model_perplexity (line 225) | def _get_model_perplexity(model, eval_corpus): FILE: tmtoolkit/topicmod/tm_lda.py class MultiprocModelsWorkerLDA (line 57) | class MultiprocModelsWorkerLDA(MultiprocModelsWorkerABC): method fit_model (line 64) | def fit_model(self, data, params): class MultiprocEvaluationWorkerLDA (line 72) | class MultiprocEvaluationWorkerLDA(MultiprocEvaluationWorkerABC, Multipr... method fit_model (line 77) | def fit_model(self, data, params): function compute_models_parallel (line 179) | def compute_models_parallel(data, varying_parameters=None, constant_para... function evaluate_topic_models (line 207) | def evaluate_topic_models(data, varying_parameters, constant_parameters=... FILE: tmtoolkit/topicmod/tm_sklearn.py class MultiprocModelsWorkerSklearn (line 63) | class MultiprocModelsWorkerSklearn(MultiprocModelsWorkerABC): method fit_model (line 70) | def fit_model(self, data, params, return_data=False): class MultiprocEvaluationWorkerSklearn (line 88) | class MultiprocEvaluationWorkerSklearn(MultiprocEvaluationWorkerABC, Mul... method fit_model (line 93) | def fit_model(self, data, params, return_data=False): function compute_models_parallel (line 182) | def compute_models_parallel(data, varying_parameters=None, constant_para... function evaluate_topic_models (line 211) | def evaluate_topic_models(data, varying_parameters, constant_parameters=... function _get_normalized_topic_word_distrib (line 254) | def _get_normalized_topic_word_distrib(lda_instance): FILE: tmtoolkit/topicmod/visualize.py function _wordcloud_color_func_black (line 26) | def _wordcloud_color_func_black(word, font_size, position, orientation, ... function write_wordclouds_to_folder (line 40) | def write_wordclouds_to_folder(wordclouds, folder, file_name_fmt='{label... function generate_wordclouds_for_topic_words (line 61) | def generate_wordclouds_for_topic_words(topic_word_distrib, vocab, top_n... function generate_wordclouds_for_document_topics (line 85) | def generate_wordclouds_for_document_topics(doc_topic_distrib, doc_label... function generate_wordclouds_from_distribution (line 110) | def generate_wordclouds_from_distribution(distrib, row_labels, val_label... function generate_wordcloud_from_probabilities_and_words (line 152) | def generate_wordcloud_from_probabilities_and_words(prob, words, return_... function generate_wordcloud_from_weights (line 180) | def generate_wordcloud_from_weights(weights, return_image=True, wordclou... function plot_topic_word_ranked_prob (line 215) | def plot_topic_word_ranked_prob(fig, ax, topic_word_distrib, n, function plot_doc_topic_ranked_prob (line 244) | def plot_doc_topic_ranked_prob(fig, ax, doc_topic_distrib, n, function plot_prob_distrib_ranked_prob (line 273) | def plot_prob_distrib_ranked_prob(fig, ax, data, x_limit, log_scale=True... function plot_doc_topic_heatmap (line 372) | def plot_doc_topic_heatmap(fig, ax, doc_topic_distrib, doc_labels, topic... function plot_topic_word_heatmap (line 453) | def plot_topic_word_heatmap(fig, ax, topic_word_distrib, vocab, topic_la... function plot_heatmap (line 533) | def plot_heatmap(fig, ax, data, function plot_eval_results (line 627) | def plot_eval_results(eval_results, metric=None, param=None, function parameters_for_ldavis (line 844) | def parameters_for_ldavis(topic_word_distrib, doc_topic_distrib, dtm, vo... FILE: tmtoolkit/utils.py function enable_logging (line 30) | def enable_logging(level: int = logging.INFO, fmt: str = '%(asctime)s:%(... function set_logging_level (line 67) | def set_logging_level(level: int) -> None: function disable_logging (line 81) | def disable_logging() -> None: function pickle_data (line 95) | def pickle_data(data: Any, picklefile: str, **kwargs) -> None: function unpickle_file (line 111) | def unpickle_file(picklefile: str, **kwargs) -> Any: function path_split (line 129) | def path_split(path: str, base: Optional[List[str]] = None) -> List[str]: function read_text_file (line 157) | def read_text_file(fpath: str, encoding: str, read_size: int = -1, force... function linebreaks_win2unix (line 176) | def linebreaks_win2unix(text: str) -> str: function empty_chararray (line 192) | def empty_chararray() -> np.ndarray: function as_chararray (line 201) | def as_chararray(x: Union[np.ndarray, Sequence]) -> np.ndarray: function mat2d_window_from_indices (line 222) | def mat2d_window_from_indices(mat: np.ndarray, function combine_sparse_matrices_columnwise (line 261) | def combine_sparse_matrices_columnwise(matrices: Sequence, function dict2df (line 385) | def dict2df(data: dict, key_name: str = 'key', value_name: str = 'value'... function applychain (line 417) | def applychain(funcs: Iterable[Callable], initial_arg: Any) -> Any: function flatten_list (line 435) | def flatten_list(l: Iterable[Iterable]) -> list: function _merge_updatable (line 452) | def _merge_updatable(containers: Sequence, init_fn: Callable, safe: bool... function merge_dicts (line 463) | def merge_dicts(dicts: Sequence[dict], sort_keys: bool = False, safe: bo... function merge_sets (line 480) | def merge_sets(sets: Sequence[set], safe: bool = False) -> set: function sample_dict (line 491) | def sample_dict(d: dict, n: int) -> dict: function greedy_partitioning (line 502) | def greedy_partitioning(elems_dict: Dict[str, Union[int, float]], k: int... function argsort (line 548) | def argsort(seq: Sequence) -> List[int]: function split_func_args (line 558) | def split_func_args(fn: Callable, args: Dict[str, Any]) -> Tuple[Dict[st...