gitextract_1gc608id/

├── .gitignore
├── README.md
├── data/
│   ├── brown_dict/
│   │   ├── brown_dict.csv
│   │   ├── brown_dict_updated.csv
│   │   ├── brown_tags.csv
│   │   ├── np_tags.csv
│   │   ├── pos_trans.csv
│   │   └── ptb_tags.csv
│   └── entity-graph/
│       ├── db-acronyms.txt
│       ├── db-article.txt
│       ├── gpe_syns-updated.csv
│       ├── org_syns-updated.csv
│       └── person_syns-updated.csv
├── nbproject/
│   ├── private/
│   │   └── private.xml
│   ├── project.properties
│   └── project.xml
└── src/
    ├── book/
    │   ├── ch02.py
    │   ├── ch02_ex.py
    │   ├── ch03.py
    │   ├── ch03_ex.py
    │   ├── ch04_ex.py
    │   ├── ch05.py
    │   ├── ch05_ex.py
    │   ├── ch06.py
    │   ├── ch07.py
    │   ├── ch07_ex.py
    │   ├── ch08.py
    │   ├── ch09.py
    │   └── ch10.py
    ├── brown_dict/
    │   ├── dict_build.py
    │   ├── phrase_seqs.py
    │   └── predict.py
    ├── cener/
    │   ├── bootstrap.py
    │   ├── ce_phrases.txt
    │   ├── cener.py
    │   ├── cener_lib.py
    │   ├── cnet_reviews.txt
    │   ├── cnet_reviews_sents.txt
    │   └── test.txt
    ├── docsim/
    │   ├── blogdoctest.py
    │   ├── docsim.py
    │   ├── scam_dist.py
    │   └── sugar-coffee-cocoa-docs.txt
    ├── drug_ner/
    │   ├── apply_model.py
    │   ├── apply_regex_model.py
    │   ├── co_train.py
    │   ├── drug_ner_utils.py
    │   ├── ngram_convert.py
    │   └── parse_drugbank.py
    ├── entity-graph/
    │   ├── 01-preprocess-data.py
    │   ├── 02-find-entities.py
    │   ├── 03-cluster-entity-mentions.ipynb
    │   ├── 04-generate-entity-sets.py
    │   ├── 05-find-corefs.py
    │   ├── 06-find-matches.py
    │   ├── 07-create-graphs.py
    │   └── 08-explore-graph.ipynb
    ├── genetagger/
    │   ├── file_reformatter.py
    │   ├── gene.test
    │   ├── gene.train
    │   ├── gene.validate
    │   └── hmm_gene_ner.py
    ├── hangman/
    │   ├── game.py
    │   └── gamestats.py
    ├── langmodel/
    │   ├── med_lang_model.py
    │   ├── old_med_lang_model.py
    │   └── sentences.test
    ├── medorleg/
    │   ├── README.md
    │   ├── db_loader.py
    │   ├── eval_model.py
    │   ├── model_params.py
    │   ├── ngram_counting_job.py
    │   ├── preprocess.py
    │   ├── regression_data.py
    │   └── testset_splitter.py
    ├── medorleg2/
    │   ├── arffwriter.py
    │   ├── arffwriter_test.py
    │   ├── classify.py
    │   └── preprocess.py
    ├── phrases/
    │   ├── interesting_phrases.py
    │   └── preprocess.py
    ├── sameword/
    │   └── same_word_finder.py
    ├── semantic/
    │   └── short_sentence_similarity.py
    ├── similar-tweets-nmslib/
    │   ├── 01-load-sqlite3.py
    │   ├── 02-generate-vectors.py
    │   ├── 03-query-times.py
    │   ├── 04-chart-times.py
    │   └── README.md
    ├── stlclust/
    │   ├── cluster_titles.py
    │   ├── extract_stl.py
    │   └── fuzz_similarity.py
    └── topicmodel/
        ├── bok_model.py
        ├── bow_model.py
        ├── gensim_preprocess.py
        ├── gensim_word2vec.py
        ├── kea_preprocess.py
        ├── keywords_merge.py
        ├── lda_model.py
        ├── lsi_model.py
        ├── num_topics.py
        ├── viz_doctopic_distrib.py
        ├── viz_topics_scatter.py
        ├── viz_topics_wordcloud.py
        └── word2vec_cluster_plot.py