Repository: zhangxiangxiao/glyph
Branch: master
Commit: df6ef3262156
Files: 833
Total size: 789.9 KB

Directory structure:
gitextract_d_gx1n5i/

├── LICENSE
├── README.md
├── data/
│   ├── 11st/
│   │   ├── construct_rr.py
│   │   ├── create_post.py
│   │   ├── create_review.py
│   │   ├── segment_rr_word.lua
│   │   └── segment_word.py
│   ├── README.md
│   ├── chinanews/
│   │   └── construct_topic.py
│   ├── data/
│   │   └── README.txt
│   ├── dianping/
│   │   ├── combine_gram_count.lua
│   │   ├── construct_charbag.lua
│   │   ├── construct_chargram.lua
│   │   ├── construct_chartoken.lua
│   │   ├── construct_code.lua
│   │   ├── construct_pinyin.py
│   │   ├── construct_reviews.lua
│   │   ├── construct_string.lua
│   │   ├── construct_tfidf.lua
│   │   ├── construct_word.lua
│   │   ├── construct_wordbag.lua
│   │   ├── construct_wordgram.lua
│   │   ├── construct_wordtoken.lua
│   │   ├── convert_string_code.lua
│   │   ├── count_chargram.lua
│   │   ├── count_wordgram.lua
│   │   ├── limit_code.lua
│   │   ├── limit_csvlines.sh
│   │   ├── queue.lua
│   │   ├── remove_duplication.py
│   │   ├── remove_null.sh
│   │   ├── segment_roman_word.lua
│   │   ├── segment_word.py
│   │   ├── select_data.lua
│   │   ├── shuffle_lines.sh
│   │   ├── sort_gram_count.sh
│   │   ├── sort_gram_list.sh
│   │   ├── split_lines.sh
│   │   └── split_train.lua
│   ├── ifeng/
│   │   └── construct_topic.py
│   ├── jd/
│   │   ├── count_data.lua
│   │   ├── create_comment.py
│   │   ├── limit_length.lua
│   │   └── sort_data.sh
│   ├── joint/
│   │   ├── combine_word.lua
│   │   └── combine_word_list.lua
│   ├── nytimes/
│   │   ├── construct_topic.py
│   │   └── count_class.lua
│   └── rakuten/
│       ├── construct_hepburn.py
│       ├── create_review.py
│       └── segment_word.py
├── doc/
│   └── dianping.md
├── embednet/
│   ├── archive/
│   │   ├── 11stbinary_temporal12length512feature256.sh
│   │   ├── 11stbinary_temporal12length512feature256byte.sh
│   │   ├── 11stbinary_temporal12length512feature256roman.sh
│   │   ├── 11stbinary_temporal12length512feature256romanword.sh
│   │   ├── 11stbinary_temporal12length512feature256word.sh
│   │   ├── 11stbinary_temporal8length486feature256.sh
│   │   ├── 11stbinary_temporal8length486feature256byte.sh
│   │   ├── 11stbinary_temporal8length486feature256roman.sh
│   │   ├── 11stbinary_temporal8length486feature256romanword.sh
│   │   ├── 11stbinary_temporal8length486feature256word.sh
│   │   ├── 11stfull_temporal12length512feature256.sh
│   │   ├── 11stfull_temporal12length512feature256byte.sh
│   │   ├── 11stfull_temporal12length512feature256roman.sh
│   │   ├── 11stfull_temporal12length512feature256romanword.sh
│   │   ├── 11stfull_temporal12length512feature256word.sh
│   │   ├── 11stfull_temporal8length486feature256.sh
│   │   ├── 11stfull_temporal8length486feature256byte.sh
│   │   ├── 11stfull_temporal8length486feature256roman.sh
│   │   ├── 11stfull_temporal8length486feature256romanword.sh
│   │   ├── 11stfull_temporal8length486feature256word.sh
│   │   ├── amazonbinary_temporal12length512feature256.sh
│   │   ├── amazonbinary_temporal12length512feature256word.sh
│   │   ├── amazonbinary_temporal8length486feature256.sh
│   │   ├── amazonbinary_temporal8length486feature256word.sh
│   │   ├── amazonfull_temporal12length512feature256.sh
│   │   ├── amazonfull_temporal12length512feature256word.sh
│   │   ├── amazonfull_temporal8length486feature256.sh
│   │   ├── amazonfull_temporal8length486feature256word.sh
│   │   ├── chinanews_temporal12length512feature256.sh
│   │   ├── chinanews_temporal12length512feature256byte.sh
│   │   ├── chinanews_temporal12length512feature256roman.sh
│   │   ├── chinanews_temporal12length512feature256romanword.sh
│   │   ├── chinanews_temporal12length512feature256word.sh
│   │   ├── chinanews_temporal8length486feature256.sh
│   │   ├── chinanews_temporal8length486feature256byte.sh
│   │   ├── chinanews_temporal8length486feature256roman.sh
│   │   ├── chinanews_temporal8length486feature256romanword.sh
│   │   ├── chinanews_temporal8length486feature256word.sh
│   │   ├── dianping_temporal12length512feature256.sh
│   │   ├── dianping_temporal12length512feature256byte.sh
│   │   ├── dianping_temporal12length512feature256roman.sh
│   │   ├── dianping_temporal12length512feature256romanword.sh
│   │   ├── dianping_temporal12length512feature256word.sh
│   │   ├── dianping_temporal8length486feature256.sh
│   │   ├── dianping_temporal8length486feature256byte.sh
│   │   ├── dianping_temporal8length486feature256roman.sh
│   │   ├── dianping_temporal8length486feature256romanword.sh
│   │   ├── dianping_temporal8length486feature256word.sh
│   │   ├── ifeng_temporal12length512feature256.sh
│   │   ├── ifeng_temporal12length512feature256byte.sh
│   │   ├── ifeng_temporal12length512feature256roman.sh
│   │   ├── ifeng_temporal12length512feature256romanword.sh
│   │   ├── ifeng_temporal12length512feature256word.sh
│   │   ├── ifeng_temporal8length486feature256.sh
│   │   ├── ifeng_temporal8length486feature256byte.sh
│   │   ├── ifeng_temporal8length486feature256roman.sh
│   │   ├── ifeng_temporal8length486feature256romanword.sh
│   │   ├── ifeng_temporal8length486feature256word.sh
│   │   ├── jdbinary_temporal12length512feature256.sh
│   │   ├── jdbinary_temporal12length512feature256byte.sh
│   │   ├── jdbinary_temporal12length512feature256roman.sh
│   │   ├── jdbinary_temporal12length512feature256romanword.sh
│   │   ├── jdbinary_temporal12length512feature256word.sh
│   │   ├── jdbinary_temporal8length486feature256.sh
│   │   ├── jdbinary_temporal8length486feature256byte.sh
│   │   ├── jdbinary_temporal8length486feature256roman.sh
│   │   ├── jdbinary_temporal8length486feature256romanword.sh
│   │   ├── jdbinary_temporal8length486feature256word.sh
│   │   ├── jdfull_temporal12length512feature256.sh
│   │   ├── jdfull_temporal12length512feature256byte.sh
│   │   ├── jdfull_temporal12length512feature256roman.sh
│   │   ├── jdfull_temporal12length512feature256romanword.sh
│   │   ├── jdfull_temporal12length512feature256word.sh
│   │   ├── jdfull_temporal8length486feature256.sh
│   │   ├── jdfull_temporal8length486feature256byte.sh
│   │   ├── jdfull_temporal8length486feature256roman.sh
│   │   ├── jdfull_temporal8length486feature256romanword.sh
│   │   ├── jdfull_temporal8length486feature256word.sh
│   │   ├── jointbinary_temporal12length512feature256.sh
│   │   ├── jointbinary_temporal12length512feature256byte.sh
│   │   ├── jointbinary_temporal12length512feature256roman.sh
│   │   ├── jointbinary_temporal12length512feature256romanword.sh
│   │   ├── jointbinary_temporal12length512feature256word.sh
│   │   ├── jointbinary_temporal8length486feature256.sh
│   │   ├── jointbinary_temporal8length486feature256byte.sh
│   │   ├── jointbinary_temporal8length486feature256roman.sh
│   │   ├── jointbinary_temporal8length486feature256romanword.sh
│   │   ├── jointbinary_temporal8length486feature256word.sh
│   │   ├── jointfull_temporal12length512feature256.sh
│   │   ├── jointfull_temporal12length512feature256byte.sh
│   │   ├── jointfull_temporal12length512feature256roman.sh
│   │   ├── jointfull_temporal12length512feature256romanword.sh
│   │   ├── jointfull_temporal12length512feature256word.sh
│   │   ├── jointfull_temporal8length486feature256.sh
│   │   ├── jointfull_temporal8length486feature256byte.sh
│   │   ├── jointfull_temporal8length486feature256roman.sh
│   │   ├── jointfull_temporal8length486feature256romanword.sh
│   │   ├── jointfull_temporal8length486feature256word.sh
│   │   ├── nytimes_temporal12length512feature256.sh
│   │   ├── nytimes_temporal12length512feature256word.sh
│   │   ├── nytimes_temporal8length486feature256.sh
│   │   ├── nytimes_temporal8length486feature256word.sh
│   │   ├── rakutenbinary_temporal12length512feature256.sh
│   │   ├── rakutenbinary_temporal12length512feature256byte.sh
│   │   ├── rakutenbinary_temporal12length512feature256roman.sh
│   │   ├── rakutenbinary_temporal12length512feature256romanword.sh
│   │   ├── rakutenbinary_temporal12length512feature256word.sh
│   │   ├── rakutenbinary_temporal8length486feature256.sh
│   │   ├── rakutenbinary_temporal8length486feature256byte.sh
│   │   ├── rakutenbinary_temporal8length486feature256roman.sh
│   │   ├── rakutenbinary_temporal8length486feature256romanword.sh
│   │   ├── rakutenbinary_temporal8length486feature256word.sh
│   │   ├── rakutenfull_temporal12length512feature256.sh
│   │   ├── rakutenfull_temporal12length512feature256byte.sh
│   │   ├── rakutenfull_temporal12length512feature256roman.sh
│   │   ├── rakutenfull_temporal12length512feature256romanword.sh
│   │   ├── rakutenfull_temporal12length512feature256word.sh
│   │   ├── rakutenfull_temporal8length486feature256.sh
│   │   ├── rakutenfull_temporal8length486feature256byte.sh
│   │   ├── rakutenfull_temporal8length486feature256roman.sh
│   │   ├── rakutenfull_temporal8length486feature256romanword.sh
│   │   └── rakutenfull_temporal8length486feature256word.sh
│   ├── config.lua
│   ├── data.lua
│   ├── driver.lua
│   ├── model.lua
│   ├── unittest/
│   │   ├── data.lua
│   │   ├── driver.lua
│   │   ├── model.lua
│   │   ├── model_cudnn.lua
│   │   ├── model_cunn.lua
│   │   ├── test.lua
│   │   ├── test_cuda.lua
│   │   ├── train.lua
│   │   └── train_cuda.lua
│   └── visualizer.lua
├── fasttext/
│   └── archive/
│       ├── 11stbinary_charbigram.sh
│       ├── 11stbinary_charbigram_evaluation.sh
│       ├── 11stbinary_charbigram_tuned.sh
│       ├── 11stbinary_charpentagram.sh
│       ├── 11stbinary_charpentagram_evaluation.sh
│       ├── 11stbinary_charpentagram_tuned.sh
│       ├── 11stbinary_charunigram.sh
│       ├── 11stbinary_charunigram_evaluation.sh
│       ├── 11stbinary_charunigram_tuned.sh
│       ├── 11stbinary_wordbigram.sh
│       ├── 11stbinary_wordbigram_evaluation.sh
│       ├── 11stbinary_wordbigram_tuned.sh
│       ├── 11stbinary_wordbigramroman.sh
│       ├── 11stbinary_wordbigramroman_evaluation.sh
│       ├── 11stbinary_wordbigramroman_tuned.sh
│       ├── 11stbinary_wordpentagram.sh
│       ├── 11stbinary_wordpentagram_evaluation.sh
│       ├── 11stbinary_wordpentagram_tuned.sh
│       ├── 11stbinary_wordpentagramroman.sh
│       ├── 11stbinary_wordpentagramroman_evaluation.sh
│       ├── 11stbinary_wordpentagramroman_tuned.sh
│       ├── 11stbinary_wordunigram.sh
│       ├── 11stbinary_wordunigram_evaluation.sh
│       ├── 11stbinary_wordunigram_tuned.sh
│       ├── 11stbinary_wordunigramroman.sh
│       ├── 11stbinary_wordunigramroman_evaluation.sh
│       ├── 11stbinary_wordunigramroman_tuned.sh
│       ├── 11stfull_charbigram.sh
│       ├── 11stfull_charbigram_evaluation.sh
│       ├── 11stfull_charbigram_tuned.sh
│       ├── 11stfull_charpentagram.sh
│       ├── 11stfull_charpentagram_evaluation.sh
│       ├── 11stfull_charpentagram_tuned.sh
│       ├── 11stfull_charunigram.sh
│       ├── 11stfull_charunigram_evaluation.sh
│       ├── 11stfull_charunigram_tuned.sh
│       ├── 11stfull_wordbigram.sh
│       ├── 11stfull_wordbigram_evaluation.sh
│       ├── 11stfull_wordbigram_tuned.sh
│       ├── 11stfull_wordbigramroman.sh
│       ├── 11stfull_wordbigramroman_evaluation.sh
│       ├── 11stfull_wordbigramroman_tuned.sh
│       ├── 11stfull_wordpentagram.sh
│       ├── 11stfull_wordpentagram_evaluation.sh
│       ├── 11stfull_wordpentagram_tuned.sh
│       ├── 11stfull_wordpentagramroman.sh
│       ├── 11stfull_wordpentagramroman_evaluation.sh
│       ├── 11stfull_wordpentagramroman_tuned.sh
│       ├── 11stfull_wordunigram.sh
│       ├── 11stfull_wordunigram_evaluation.sh
│       ├── 11stfull_wordunigram_tuned.sh
│       ├── 11stfull_wordunigramroman.sh
│       ├── 11stfull_wordunigramroman_evaluation.sh
│       ├── 11stfull_wordunigramroman_tuned.sh
│       ├── amazonbinary_charbigram.sh
│       ├── amazonbinary_charbigram_evaluation.sh
│       ├── amazonbinary_charbigram_tuned.sh
│       ├── amazonbinary_charpentagram.sh
│       ├── amazonbinary_charpentagram_evaluation.sh
│       ├── amazonbinary_charpentagram_tuned.sh
│       ├── amazonbinary_charunigram.sh
│       ├── amazonbinary_charunigram_evaluation.sh
│       ├── amazonbinary_charunigram_tuned.sh
│       ├── amazonbinary_wordbigram.sh
│       ├── amazonbinary_wordbigram_evaluation.sh
│       ├── amazonbinary_wordbigram_tuned.sh
│       ├── amazonbinary_wordpentagram.sh
│       ├── amazonbinary_wordpentagram_evaluation.sh
│       ├── amazonbinary_wordpentagram_tuned.sh
│       ├── amazonbinary_wordunigram.sh
│       ├── amazonbinary_wordunigram_evaluation.sh
│       ├── amazonbinary_wordunigram_tuned.sh
│       ├── amazonfull_charbigram.sh
│       ├── amazonfull_charbigram_evaluation.sh
│       ├── amazonfull_charbigram_tuned.sh
│       ├── amazonfull_charpentagram.sh
│       ├── amazonfull_charpentagram_evaluation.sh
│       ├── amazonfull_charpentagram_tuned.sh
│       ├── amazonfull_charunigram.sh
│       ├── amazonfull_charunigram_evaluation.sh
│       ├── amazonfull_charunigram_tuned.sh
│       ├── amazonfull_wordbigram.sh
│       ├── amazonfull_wordbigram_evaluation.sh
│       ├── amazonfull_wordbigram_tuned.sh
│       ├── amazonfull_wordpentagram.sh
│       ├── amazonfull_wordpentagram_evaluation.sh
│       ├── amazonfull_wordpentagram_tuned.sh
│       ├── amazonfull_wordunigram.sh
│       ├── amazonfull_wordunigram_evaluation.sh
│       ├── amazonfull_wordunigram_tuned.sh
│       ├── chinanews_charbigram.sh
│       ├── chinanews_charbigram_evaluation.sh
│       ├── chinanews_charbigram_tuned.sh
│       ├── chinanews_charpentagram.sh
│       ├── chinanews_charpentagram_evaluation.sh
│       ├── chinanews_charpentagram_tuned.sh
│       ├── chinanews_charunigram.sh
│       ├── chinanews_charunigram_evaluation.sh
│       ├── chinanews_charunigram_tuned.sh
│       ├── chinanews_wordbigram.sh
│       ├── chinanews_wordbigram_evaluation.sh
│       ├── chinanews_wordbigram_tuned.sh
│       ├── chinanews_wordbigramroman.sh
│       ├── chinanews_wordbigramroman_evaluation.sh
│       ├── chinanews_wordbigramroman_tuned.sh
│       ├── chinanews_wordpentagram.sh
│       ├── chinanews_wordpentagram_evaluation.sh
│       ├── chinanews_wordpentagram_tuned.sh
│       ├── chinanews_wordpentagramroman.sh
│       ├── chinanews_wordpentagramroman_evaluation.sh
│       ├── chinanews_wordpentagramroman_tuned.sh
│       ├── chinanews_wordunigram.sh
│       ├── chinanews_wordunigram_evaluation.sh
│       ├── chinanews_wordunigram_tuned.sh
│       ├── chinanews_wordunigramroman.sh
│       ├── chinanews_wordunigramroman_evaluation.sh
│       ├── chinanews_wordunigramroman_tuned.sh
│       ├── dianping_charbigram.sh
│       ├── dianping_charbigram_evaluation.sh
│       ├── dianping_charbigram_tuned.sh
│       ├── dianping_charpentagram.sh
│       ├── dianping_charpentagram_evaluation.sh
│       ├── dianping_charpentagram_tuned.sh
│       ├── dianping_charunigram.sh
│       ├── dianping_charunigram_evaluation.sh
│       ├── dianping_charunigram_tuned.sh
│       ├── dianping_wordbigram.sh
│       ├── dianping_wordbigram_evaluation.sh
│       ├── dianping_wordbigram_tuned.sh
│       ├── dianping_wordbigramroman.sh
│       ├── dianping_wordbigramroman_evaluation.sh
│       ├── dianping_wordbigramroman_tuned.sh
│       ├── dianping_wordpentagram.sh
│       ├── dianping_wordpentagram_evaluation.sh
│       ├── dianping_wordpentagram_tuned.sh
│       ├── dianping_wordpentagramroman.sh
│       ├── dianping_wordpentagramroman_evaluation.sh
│       ├── dianping_wordpentagramroman_tuned.sh
│       ├── dianping_wordunigram.sh
│       ├── dianping_wordunigram_evaluation.sh
│       ├── dianping_wordunigram_tuned.sh
│       ├── dianping_wordunigramroman.sh
│       ├── dianping_wordunigramroman_evaluation.sh
│       ├── dianping_wordunigramroman_tuned.sh
│       ├── ifeng_charbigram.sh
│       ├── ifeng_charbigram_evaluation.sh
│       ├── ifeng_charbigram_tuned.sh
│       ├── ifeng_charpentagram.sh
│       ├── ifeng_charpentagram_evaluation.sh
│       ├── ifeng_charpentagram_tuned.sh
│       ├── ifeng_charunigram.sh
│       ├── ifeng_charunigram_evaluation.sh
│       ├── ifeng_charunigram_tuned.sh
│       ├── ifeng_wordbigram.sh
│       ├── ifeng_wordbigram_evaluation.sh
│       ├── ifeng_wordbigram_tuned.sh
│       ├── ifeng_wordbigramroman.sh
│       ├── ifeng_wordbigramroman_evaluation.sh
│       ├── ifeng_wordbigramroman_tuned.sh
│       ├── ifeng_wordpentagram.sh
│       ├── ifeng_wordpentagram_evaluation.sh
│       ├── ifeng_wordpentagram_tuned.sh
│       ├── ifeng_wordpentagramroman.sh
│       ├── ifeng_wordpentagramroman_evaluation.sh
│       ├── ifeng_wordpentagramroman_tuned.sh
│       ├── ifeng_wordunigram.sh
│       ├── ifeng_wordunigram_evaluation.sh
│       ├── ifeng_wordunigram_tuned.sh
│       ├── ifeng_wordunigramroman.sh
│       ├── ifeng_wordunigramroman_evaluation.sh
│       ├── ifeng_wordunigramroman_tuned.sh
│       ├── jdbinary_charbigram.sh
│       ├── jdbinary_charbigram_evaluation.sh
│       ├── jdbinary_charbigram_tuned.sh
│       ├── jdbinary_charpentagram.sh
│       ├── jdbinary_charpentagram_evaluation.sh
│       ├── jdbinary_charpentagram_tuned.sh
│       ├── jdbinary_charunigram.sh
│       ├── jdbinary_charunigram_evaluation.sh
│       ├── jdbinary_charunigram_tuned.sh
│       ├── jdbinary_wordbigram.sh
│       ├── jdbinary_wordbigram_evaluation.sh
│       ├── jdbinary_wordbigram_tuned.sh
│       ├── jdbinary_wordbigramroman.sh
│       ├── jdbinary_wordbigramroman_evaluation.sh
│       ├── jdbinary_wordbigramroman_tuned.sh
│       ├── jdbinary_wordpentagram.sh
│       ├── jdbinary_wordpentagram_evaluation.sh
│       ├── jdbinary_wordpentagram_tuned.sh
│       ├── jdbinary_wordpentagramroman.sh
│       ├── jdbinary_wordpentagramroman_evaluation.sh
│       ├── jdbinary_wordpentagramroman_tuned.sh
│       ├── jdbinary_wordunigram.sh
│       ├── jdbinary_wordunigram_evaluation.sh
│       ├── jdbinary_wordunigram_tuned.sh
│       ├── jdbinary_wordunigramroman.sh
│       ├── jdbinary_wordunigramroman_evaluation.sh
│       ├── jdbinary_wordunigramroman_tuned.sh
│       ├── jdfull_charbigram.sh
│       ├── jdfull_charbigram_evaluation.sh
│       ├── jdfull_charbigram_tuned.sh
│       ├── jdfull_charpentagram.sh
│       ├── jdfull_charpentagram_evaluation.sh
│       ├── jdfull_charpentagram_tuned.sh
│       ├── jdfull_charunigram.sh
│       ├── jdfull_charunigram_evaluation.sh
│       ├── jdfull_charunigram_tuned.sh
│       ├── jdfull_wordbigram.sh
│       ├── jdfull_wordbigram_evaluation.sh
│       ├── jdfull_wordbigram_tuned.sh
│       ├── jdfull_wordbigramroman.sh
│       ├── jdfull_wordbigramroman_evaluation.sh
│       ├── jdfull_wordbigramroman_tuned.sh
│       ├── jdfull_wordpentagram.sh
│       ├── jdfull_wordpentagram_evaluation.sh
│       ├── jdfull_wordpentagram_tuned.sh
│       ├── jdfull_wordpentagramroman.sh
│       ├── jdfull_wordpentagramroman_evaluation.sh
│       ├── jdfull_wordpentagramroman_tuned.sh
│       ├── jdfull_wordunigram.sh
│       ├── jdfull_wordunigram_evaluation.sh
│       ├── jdfull_wordunigram_tuned.sh
│       ├── jdfull_wordunigramroman.sh
│       ├── jdfull_wordunigramroman_evaluation.sh
│       ├── jdfull_wordunigramroman_tuned.sh
│       ├── jointbinary_charbigram.sh
│       ├── jointbinary_charbigram_evaluation.sh
│       ├── jointbinary_charbigram_tuned.sh
│       ├── jointbinary_charpentagram.sh
│       ├── jointbinary_charpentagram_evaluation.sh
│       ├── jointbinary_charpentagram_tuned.sh
│       ├── jointbinary_charunigram.sh
│       ├── jointbinary_charunigram_evaluation.sh
│       ├── jointbinary_charunigram_tuned.sh
│       ├── jointbinary_wordbigram.sh
│       ├── jointbinary_wordbigram_evaluation.sh
│       ├── jointbinary_wordbigram_tuned.sh
│       ├── jointbinary_wordbigramroman.sh
│       ├── jointbinary_wordbigramroman_evaluation.sh
│       ├── jointbinary_wordbigramroman_tuned.sh
│       ├── jointbinary_wordpentagram.sh
│       ├── jointbinary_wordpentagram_evaluation.sh
│       ├── jointbinary_wordpentagram_tuned.sh
│       ├── jointbinary_wordpentagramroman.sh
│       ├── jointbinary_wordpentagramroman_evaluation.sh
│       ├── jointbinary_wordpentagramroman_tuned.sh
│       ├── jointbinary_wordunigram.sh
│       ├── jointbinary_wordunigram_evaluation.sh
│       ├── jointbinary_wordunigram_tuned.sh
│       ├── jointbinary_wordunigramroman.sh
│       ├── jointbinary_wordunigramroman_evaluation.sh
│       ├── jointbinary_wordunigramroman_tuned.sh
│       ├── jointfull_charbigram.sh
│       ├── jointfull_charbigram_evaluation.sh
│       ├── jointfull_charbigram_tuned.sh
│       ├── jointfull_charpentagram.sh
│       ├── jointfull_charpentagram_evaluation.sh
│       ├── jointfull_charpentagram_tuned.sh
│       ├── jointfull_charunigram.sh
│       ├── jointfull_charunigram_evaluation.sh
│       ├── jointfull_charunigram_tuned.sh
│       ├── jointfull_wordbigram.sh
│       ├── jointfull_wordbigram_evaluation.sh
│       ├── jointfull_wordbigram_tuned.sh
│       ├── jointfull_wordbigramroman.sh
│       ├── jointfull_wordbigramroman_evaluation.sh
│       ├── jointfull_wordbigramroman_tuned.sh
│       ├── jointfull_wordpentagram.sh
│       ├── jointfull_wordpentagram_evaluation.sh
│       ├── jointfull_wordpentagram_tuned.sh
│       ├── jointfull_wordpentagramroman.sh
│       ├── jointfull_wordpentagramroman_evaluation.sh
│       ├── jointfull_wordpentagramroman_tuned.sh
│       ├── jointfull_wordunigram.sh
│       ├── jointfull_wordunigram_evaluation.sh
│       ├── jointfull_wordunigram_tuned.sh
│       ├── jointfull_wordunigramroman.sh
│       ├── jointfull_wordunigramroman_evaluation.sh
│       ├── jointfull_wordunigramroman_tuned.sh
│       ├── nytimes_charbigram.sh
│       ├── nytimes_charbigram_evaluation.sh
│       ├── nytimes_charbigram_tuned.sh
│       ├── nytimes_charpentagram.sh
│       ├── nytimes_charpentagram_evaluation.sh
│       ├── nytimes_charpentagram_tuned.sh
│       ├── nytimes_charunigram.sh
│       ├── nytimes_charunigram_evaluation.sh
│       ├── nytimes_charunigram_tuned.sh
│       ├── nytimes_wordbigram.sh
│       ├── nytimes_wordbigram_evaluation.sh
│       ├── nytimes_wordbigram_tuned.sh
│       ├── nytimes_wordpentagram.sh
│       ├── nytimes_wordpentagram_evaluation.sh
│       ├── nytimes_wordpentagram_tuned.sh
│       ├── nytimes_wordunigram.sh
│       ├── nytimes_wordunigram_evaluation.sh
│       ├── nytimes_wordunigram_tuned.sh
│       ├── rakutenbinary_charbigram.sh
│       ├── rakutenbinary_charbigram_evaluation.sh
│       ├── rakutenbinary_charbigram_tuned.sh
│       ├── rakutenbinary_charpentagram.sh
│       ├── rakutenbinary_charpentagram_evaluation.sh
│       ├── rakutenbinary_charpentagram_tuned.sh
│       ├── rakutenbinary_charunigram.sh
│       ├── rakutenbinary_charunigram_evaluation.sh
│       ├── rakutenbinary_charunigram_tuned.sh
│       ├── rakutenbinary_wordbigram.sh
│       ├── rakutenbinary_wordbigram_evaluation.sh
│       ├── rakutenbinary_wordbigram_tuned.sh
│       ├── rakutenbinary_wordbigramroman.sh
│       ├── rakutenbinary_wordbigramroman_evaluation.sh
│       ├── rakutenbinary_wordbigramroman_tuned.sh
│       ├── rakutenbinary_wordpentagram.sh
│       ├── rakutenbinary_wordpentagram_evaluation.sh
│       ├── rakutenbinary_wordpentagram_tuned.sh
│       ├── rakutenbinary_wordpentagramroman.sh
│       ├── rakutenbinary_wordpentagramroman_evaluation.sh
│       ├── rakutenbinary_wordpentagramroman_tuned.sh
│       ├── rakutenbinary_wordunigram.sh
│       ├── rakutenbinary_wordunigram_evaluation.sh
│       ├── rakutenbinary_wordunigram_tuned.sh
│       ├── rakutenbinary_wordunigramroman.sh
│       ├── rakutenbinary_wordunigramroman_evaluation.sh
│       ├── rakutenbinary_wordunigramroman_tuned.sh
│       ├── rakutenfull_charbigram.sh
│       ├── rakutenfull_charbigram_evaluation.sh
│       ├── rakutenfull_charbigram_tuned.sh
│       ├── rakutenfull_charpentagram.sh
│       ├── rakutenfull_charpentagram_evaluation.sh
│       ├── rakutenfull_charpentagram_tuned.sh
│       ├── rakutenfull_charunigram.sh
│       ├── rakutenfull_charunigram_evaluation.sh
│       ├── rakutenfull_charunigram_tuned.sh
│       ├── rakutenfull_wordbigram.sh
│       ├── rakutenfull_wordbigram_evaluation.sh
│       ├── rakutenfull_wordbigram_tuned.sh
│       ├── rakutenfull_wordbigramroman.sh
│       ├── rakutenfull_wordbigramroman_evaluation.sh
│       ├── rakutenfull_wordbigramroman_tuned.sh
│       ├── rakutenfull_wordpentagram.sh
│       ├── rakutenfull_wordpentagram_evaluation.sh
│       ├── rakutenfull_wordpentagram_tuned.sh
│       ├── rakutenfull_wordpentagramroman.sh
│       ├── rakutenfull_wordpentagramroman_evaluation.sh
│       ├── rakutenfull_wordpentagramroman_tuned.sh
│       ├── rakutenfull_wordunigram.sh
│       ├── rakutenfull_wordunigram_evaluation.sh
│       ├── rakutenfull_wordunigram_tuned.sh
│       ├── rakutenfull_wordunigramroman.sh
│       ├── rakutenfull_wordunigramroman_evaluation.sh
│       └── rakutenfull_wordunigramroman_tuned.sh
├── glyphnet/
│   ├── archive/
│   │   ├── 11stbinary_spatial6temporal8length486feature256.sh
│   │   ├── 11stbinary_spatial8temporal12length512feature256.sh
│   │   ├── 11stfull_spatial6temporal8length486feature256.sh
│   │   ├── 11stfull_spatial8temporal12length512feature256.sh
│   │   ├── amazonbinary_spatial6temporal8length486feature256.sh
│   │   ├── amazonbinary_spatial8temporal12length512feature256.sh
│   │   ├── amazonfull_spatial6temporal8length486feature256.sh
│   │   ├── amazonfull_spatial8temporal12length512feature256.sh
│   │   ├── chinanews_spatial6temporal8length486feature256.sh
│   │   ├── chinanews_spatial8temporal12length512feature256.sh
│   │   ├── dianping_spatial6temporal8length486feature256.sh
│   │   ├── dianping_spatial8temporal12length512feature256.sh
│   │   ├── ifeng_spatial6temporal8length486feature256.sh
│   │   ├── ifeng_spatial8temporal12length512feature256.sh
│   │   ├── jdbinary_spatial6temporal8length486feature256.sh
│   │   ├── jdbinary_spatial8temporal12length512feature256.sh
│   │   ├── jdfull_spatial6temporal8length486feature256.sh
│   │   ├── jdfull_spatial8temporal12length512feature256.sh
│   │   ├── jointbinary_spatial6temporal8length486feature256.sh
│   │   ├── jointbinary_spatial8temporal12length512feature256.sh
│   │   ├── jointfull_spatial6temporal8length486feature256.sh
│   │   ├── jointfull_spatial8temporal12length512feature256.sh
│   │   ├── nytimes_spatial6temporal8length486feature256.sh
│   │   ├── nytimes_spatial8temporal12length512feature256.sh
│   │   ├── rakutenbinary_spatial6temporal8length486feature256.sh
│   │   ├── rakutenbinary_spatial8temporal12length512feature256.sh
│   │   ├── rakutenfull_spatial6temporal8length486feature256.sh
│   │   └── rakutenfull_spatial8temporal12length512feature256.sh
│   ├── config.lua
│   ├── data.lua
│   ├── driver.lua
│   ├── main.lua
│   ├── model.lua
│   ├── modules/
│   │   ├── TemporalConvolutionCudnn.lua
│   │   ├── TemporalConvolutionMM.lua
│   │   ├── TemporalMaxPoolingCudnn.lua
│   │   └── TemporalMaxPoolingMM.lua
│   ├── modules.lua
│   ├── scroll.lua
│   ├── scroll.ui
│   ├── test.lua
│   ├── train.lua
│   ├── unittest/
│   │   ├── data.lua
│   │   ├── driver.lua
│   │   ├── model.lua
│   │   ├── model_cuda.lua
│   │   ├── model_cudnn.lua
│   │   ├── modules_temporal.lua
│   │   ├── modules_temporal_cudnn.lua
│   │   ├── test.lua
│   │   ├── test_cuda.lua
│   │   ├── train.lua
│   │   └── train_cuda.lua
│   └── visualizer.lua
├── linearnet/
│   ├── archive/
│   │   ├── 11stbinary_charbag.sh
│   │   ├── 11stbinary_charbagtfidf.sh
│   │   ├── 11stbinary_chargram.sh
│   │   ├── 11stbinary_chargramtfidf.sh
│   │   ├── 11stbinary_wordbag.sh
│   │   ├── 11stbinary_wordbagroman.sh
│   │   ├── 11stbinary_wordbagtfidf.sh
│   │   ├── 11stbinary_wordbagtfidfroman.sh
│   │   ├── 11stbinary_wordgram.sh
│   │   ├── 11stbinary_wordgramroman.sh
│   │   ├── 11stbinary_wordgramtfidf.sh
│   │   ├── 11stbinary_wordgramtfidfroman.sh
│   │   ├── 11stfull_charbag.sh
│   │   ├── 11stfull_charbagtfidf.sh
│   │   ├── 11stfull_chargram.sh
│   │   ├── 11stfull_chargramtfidf.sh
│   │   ├── 11stfull_wordbag.sh
│   │   ├── 11stfull_wordbagroman.sh
│   │   ├── 11stfull_wordbagtfidf.sh
│   │   ├── 11stfull_wordbagtfidfroman.sh
│   │   ├── 11stfull_wordgram.sh
│   │   ├── 11stfull_wordgramroman.sh
│   │   ├── 11stfull_wordgramtfidf.sh
│   │   ├── 11stfull_wordgramtfidfroman.sh
│   │   ├── amazonbinary_charbag.sh
│   │   ├── amazonbinary_charbagtfidf.sh
│   │   ├── amazonbinary_chargram.sh
│   │   ├── amazonbinary_chargramtfidf.sh
│   │   ├── amazonbinary_wordbag.sh
│   │   ├── amazonbinary_wordbagtfidf.sh
│   │   ├── amazonbinary_wordgram.sh
│   │   ├── amazonbinary_wordgramtfidf.sh
│   │   ├── amazonfull_charbag.sh
│   │   ├── amazonfull_charbagtfidf.sh
│   │   ├── amazonfull_chargram.sh
│   │   ├── amazonfull_chargramtfidf.sh
│   │   ├── amazonfull_wordbag.sh
│   │   ├── amazonfull_wordbagtfidf.sh
│   │   ├── amazonfull_wordgram.sh
│   │   ├── amazonfull_wordgramtfidf.sh
│   │   ├── chinanews_charbag.sh
│   │   ├── chinanews_charbagtfidf.sh
│   │   ├── chinanews_chargram.sh
│   │   ├── chinanews_chargramtfidf.sh
│   │   ├── chinanews_wordbag.sh
│   │   ├── chinanews_wordbagroman.sh
│   │   ├── chinanews_wordbagtfidf.sh
│   │   ├── chinanews_wordbagtfidfroman.sh
│   │   ├── chinanews_wordgram.sh
│   │   ├── chinanews_wordgramroman.sh
│   │   ├── chinanews_wordgramtfidf.sh
│   │   ├── chinanews_wordgramtfidfroman.sh
│   │   ├── dianping_charbag.sh
│   │   ├── dianping_charbagtfidf.sh
│   │   ├── dianping_chargram.sh
│   │   ├── dianping_chargramtfidf.sh
│   │   ├── dianping_wordbag.sh
│   │   ├── dianping_wordbagroman.sh
│   │   ├── dianping_wordbagtfidf.sh
│   │   ├── dianping_wordbagtfidfroman.sh
│   │   ├── dianping_wordgram.sh
│   │   ├── dianping_wordgramroman.sh
│   │   ├── dianping_wordgramtfidf.sh
│   │   ├── dianping_wordgramtfidfroman.sh
│   │   ├── ifeng_charbag.sh
│   │   ├── ifeng_charbagtfidf.sh
│   │   ├── ifeng_chargram.sh
│   │   ├── ifeng_chargramtfidf.sh
│   │   ├── ifeng_wordbag.sh
│   │   ├── ifeng_wordbagroman.sh
│   │   ├── ifeng_wordbagtfidf.sh
│   │   ├── ifeng_wordbagtfidfroman.sh
│   │   ├── ifeng_wordgram.sh
│   │   ├── ifeng_wordgramroman.sh
│   │   ├── ifeng_wordgramtfidf.sh
│   │   ├── ifeng_wordgramtfidfroman.sh
│   │   ├── jdbinary_charbag.sh
│   │   ├── jdbinary_charbagtfidf.sh
│   │   ├── jdbinary_chargram.sh
│   │   ├── jdbinary_chargramtfidf.sh
│   │   ├── jdbinary_wordbag.sh
│   │   ├── jdbinary_wordbagroman.sh
│   │   ├── jdbinary_wordbagtfidf.sh
│   │   ├── jdbinary_wordbagtfidfroman.sh
│   │   ├── jdbinary_wordgram.sh
│   │   ├── jdbinary_wordgramroman.sh
│   │   ├── jdbinary_wordgramtfidf.sh
│   │   ├── jdbinary_wordgramtfidfroman.sh
│   │   ├── jdfull_charbag.sh
│   │   ├── jdfull_charbagtfidf.sh
│   │   ├── jdfull_chargram.sh
│   │   ├── jdfull_chargramtfidf.sh
│   │   ├── jdfull_wordbag.sh
│   │   ├── jdfull_wordbagroman.sh
│   │   ├── jdfull_wordbagtfidf.sh
│   │   ├── jdfull_wordbagtfidfroman.sh
│   │   ├── jdfull_wordgram.sh
│   │   ├── jdfull_wordgramroman.sh
│   │   ├── jdfull_wordgramtfidf.sh
│   │   ├── jdfull_wordgramtfidfroman.sh
│   │   ├── jointbinary_charbag.sh
│   │   ├── jointbinary_charbagtfidf.sh
│   │   ├── jointbinary_chargram.sh
│   │   ├── jointbinary_chargramtfidf.sh
│   │   ├── jointbinary_wordbag.sh
│   │   ├── jointbinary_wordbagroman.sh
│   │   ├── jointbinary_wordbagtfidf.sh
│   │   ├── jointbinary_wordbagtfidfroman.sh
│   │   ├── jointbinary_wordgram.sh
│   │   ├── jointbinary_wordgramroman.sh
│   │   ├── jointbinary_wordgramtfidf.sh
│   │   ├── jointbinary_wordgramtfidfroman.sh
│   │   ├── jointfull_charbag.sh
│   │   ├── jointfull_charbagtfidf.sh
│   │   ├── jointfull_chargram.sh
│   │   ├── jointfull_chargramtfidf.sh
│   │   ├── jointfull_wordbag.sh
│   │   ├── jointfull_wordbagroman.sh
│   │   ├── jointfull_wordbagtfidf.sh
│   │   ├── jointfull_wordbagtfidfroman.sh
│   │   ├── jointfull_wordgram.sh
│   │   ├── jointfull_wordgramroman.sh
│   │   ├── jointfull_wordgramtfidf.sh
│   │   ├── jointfull_wordgramtfidfroman.sh
│   │   ├── nytimes_charbag.sh
│   │   ├── nytimes_charbagtfidf.sh
│   │   ├── nytimes_chargram.sh
│   │   ├── nytimes_chargramtfidf.sh
│   │   ├── nytimes_wordbag.sh
│   │   ├── nytimes_wordbagtfidf.sh
│   │   ├── nytimes_wordgram.sh
│   │   ├── nytimes_wordgramtfidf.sh
│   │   ├── rakutenbinary_charbag.sh
│   │   ├── rakutenbinary_charbagtfidf.sh
│   │   ├── rakutenbinary_chargram.sh
│   │   ├── rakutenbinary_chargramtfidf.sh
│   │   ├── rakutenbinary_wordbag.sh
│   │   ├── rakutenbinary_wordbagroman.sh
│   │   ├── rakutenbinary_wordbagtfidf.sh
│   │   ├── rakutenbinary_wordbagtfidfroman.sh
│   │   ├── rakutenbinary_wordgram.sh
│   │   ├── rakutenbinary_wordgramroman.sh
│   │   ├── rakutenbinary_wordgramtfidf.sh
│   │   ├── rakutenbinary_wordgramtfidfroman.sh
│   │   ├── rakutenfull_charbag.sh
│   │   ├── rakutenfull_charbagtfidf.sh
│   │   ├── rakutenfull_chargram.sh
│   │   ├── rakutenfull_chargramtfidf.sh
│   │   ├── rakutenfull_wordbag.sh
│   │   ├── rakutenfull_wordbagroman.sh
│   │   ├── rakutenfull_wordbagtfidf.sh
│   │   ├── rakutenfull_wordbagtfidfroman.sh
│   │   ├── rakutenfull_wordgram.sh
│   │   ├── rakutenfull_wordgramroman.sh
│   │   ├── rakutenfull_wordgramtfidf.sh
│   │   └── rakutenfull_wordgramtfidfroman.sh
│   ├── config.lua
│   ├── data.lua
│   ├── driver.lua
│   ├── model.lua
│   ├── queue.lua
│   ├── test.lua
│   ├── train.lua
│   └── unittest/
│       ├── data.lua
│       ├── driver.lua
│       ├── model.lua
│       ├── test.lua
│       └── train.lua
├── models/
│   ├── README.txt
│   ├── embednet/
│   │   └── README.txt
│   ├── fasttext/
│   │   └── README.txt
│   ├── glyphnet/
│   │   └── README.txt
│   ├── linearnet/
│   │   └── README.txt
│   └── onehotnet/
│       └── README.txt
├── onehotnet/
│   ├── archive/
│   │   ├── 11stbinary_onehot4temporal12length2048feature256.sh
│   │   ├── 11stbinary_onehot4temporal12length2048feature256roman.sh
│   │   ├── 11stbinary_onehot4temporal8length1944feature256.sh
│   │   ├── 11stbinary_onehot4temporal8length1944feature256roman.sh
│   │   ├── 11stfull_onehot4temporal12length2048feature256.sh
│   │   ├── 11stfull_onehot4temporal12length2048feature256roman.sh
│   │   ├── 11stfull_onehot4temporal8length1944feature256.sh
│   │   ├── 11stfull_onehot4temporal8length1944feature256roman.sh
│   │   ├── amazonbinary_onehot4temporal12length2048feature256.sh
│   │   ├── amazonbinary_onehot4temporal8length1944feature256.sh
│   │   ├── amazonfull_onehot4temporal12length2048feature256.sh
│   │   ├── amazonfull_onehot4temporal8length1944feature256.sh
│   │   ├── chinanews_onehot4temporal12length2048feature256.sh
│   │   ├── chinanews_onehot4temporal12length2048feature256roman.sh
│   │   ├── chinanews_onehot4temporal8length1944feature256.sh
│   │   ├── chinanews_onehot4temporal8length1944feature256roman.sh
│   │   ├── dianping_onehot4temporal12length2048feature256.sh
│   │   ├── dianping_onehot4temporal12length2048feature256roman.sh
│   │   ├── dianping_onehot4temporal8length1944feature256.sh
│   │   ├── dianping_onehot4temporal8length1944feature256roman.sh
│   │   ├── ifeng_onehot4temporal12length2048feature256.sh
│   │   ├── ifeng_onehot4temporal12length2048feature256roman.sh
│   │   ├── ifeng_onehot4temporal8length1944feature256.sh
│   │   ├── ifeng_onehot4temporal8length1944feature256roman.sh
│   │   ├── jdbinary_onehot4temporal12length2048feature256.sh
│   │   ├── jdbinary_onehot4temporal12length2048feature256roman.sh
│   │   ├── jdbinary_onehot4temporal8length1944feature256.sh
│   │   ├── jdbinary_onehot4temporal8length1944feature256roman.sh
│   │   ├── jdfull_onehot4temporal12length2048feature256.sh
│   │   ├── jdfull_onehot4temporal12length2048feature256roman.sh
│   │   ├── jdfull_onehot4temporal8length1944feature256.sh
│   │   ├── jdfull_onehot4temporal8length1944feature256roman.sh
│   │   ├── jointbinary_onehot4temporal12length2048feature256.sh
│   │   ├── jointbinary_onehot4temporal12length2048feature256roman.sh
│   │   ├── jointbinary_onehot4temporal8length1944feature256.sh
│   │   ├── jointbinary_onehot4temporal8length1944feature256roman.sh
│   │   ├── jointfull_onehot4temporal12length2048feature256.sh
│   │   ├── jointfull_onehot4temporal12length2048feature256roman.sh
│   │   ├── jointfull_onehot4temporal8length1944feature256.sh
│   │   ├── jointfull_onehot4temporal8length1944feature256roman.sh
│   │   ├── nytimes_onehot4temporal12length2048feature256.sh
│   │   ├── nytimes_onehot4temporal8length1944feature256.sh
│   │   ├── rakutenbinary_onehot4temporal12length2048feature256.sh
│   │   ├── rakutenbinary_onehot4temporal12length2048feature256roman.sh
│   │   ├── rakutenbinary_onehot4temporal8length1944feature256.sh
│   │   ├── rakutenbinary_onehot4temporal8length1944feature256roman.sh
│   │   ├── rakutenfull_onehot4temporal12length2048feature256.sh
│   │   ├── rakutenfull_onehot4temporal12length2048feature256roman.sh
│   │   ├── rakutenfull_onehot4temporal8length1944feature256.sh
│   │   └── rakutenfull_onehot4temporal8length1944feature256roman.sh
│   ├── config.lua
│   ├── data.lua
│   ├── driver.lua
│   ├── model.lua
│   └── unittest/
│       ├── data.lua
│       ├── driver.lua
│       ├── model.lua
│       ├── model_cuda.lua
│       ├── model_cudnn.lua
│       ├── test.lua
│       ├── test_cuda.lua
│       ├── train.lua
│       └── train_cuda.lua
└── unifont/
    ├── createunifont.lua
    ├── unifont/
    │   └── README.txt
    └── visualize.lua

================================================
FILE CONTENTS
================================================

================================================
FILE: LICENSE
================================================
BSD 3-Clause License

Copyright (c) 2017, Xiang Zhang
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
# Glyph

This repository is used to publish all the code used for the following article:

[Xiang Zhang, Yann LeCun, Which Encoding is the Best for Text Classification in Chinese, English, Japanese and Korean?, arXiv 1708.02657](https://arxiv.org/abs/1708.02657)

The code and datasets are completely released as of January 2018, including all the code for crawling, preprocessing and training on the datasets. However, the documentation may not be complete yet. That said, readers could refer to the `doc` directory for an example in reproducing all the results for the Dianping dataset, and extend that to other datasets in similar ways.

## Reproducibility Manifesto

If anyone sees a number in our paper, there is a script one can execute to reproduce it. No responsibility should be imposed on the user to figure out any experimental parameter barried in the paper's content.

## Datasets

The `data` directory contains the preprocessing scripts for all the datasets used in the paper. These datasets are released separately of their processing source code. See below for details.

### Summary

The following table is a summary of the datasets. Most of them have millions of samples for training.

| Dataset        | Language     | Classes | Train      | Test      |
|----------------|--------------|---------|------------|-----------|
| Dianping       | Chinese      | 2       | 2,000,000  | 500,000   |
| JD full        | Chinese      | 5       | 3,000,000  | 250,000   |
| JD binary      | Chinese      | 2       | 4,000,000  | 360,000   |
| Rakuten full   | Japanese     | 5       | 4,000,000  | 500,000   |
| Rakuten binary | Japanese     | 2       | 3,400,000  | 400,000   |
| 11st full      | Korean       | 5       | 750,000    | 100,000   |
| 11st binary    | Korean       | 2       | 4,000,000  | 400,000   |
| Amazon full    | English      | 5       | 3,000,000  | 650,000   |
| Amazon binary  | English      | 2       | 3,600,000  | 400,000   |
| Ifeng          | Chinese      | 5       | 800,000    | 50,000    |
| Chinanews      | Chinese      | 7       | 1,400,000  | 112,000   |
| NYTimes        | English      | 7       | 1,400,000  | 105,000   |
| Joint full     | Multilingual | 5       | 10,750,000 | 1,500,000 |
| Joint binary   | Multilingual | 2       | 15,000,000 | 1,560,000 |

### Download

Datasets are released separtely of the source code via links from Google Drive. *These datasets should only be used for the purpose of research*.

| Dataset        | Train                          | Test                          |
|----------------|--------------------------------|-------------------------------|
| Dianping       | [Link](https://goo.gl/uKPxyo)  | [Link](https://goo.gl/2QZpLx) |
| JD full        | [Link](https://goo.gl/u3vsak)  | [Link](https://goo.gl/hLZRky) |
| JD binary      | [Link](https://goo.gl/ZPj1ip)  | [Link](https://goo.gl/bqiEfP) |
| Rakuten full   | [Link](https://goo.gl/A7y14i)  | [Link](https://goo.gl/ve4mup) |
| Rakuten binary | [Link](https://goo.gl/3kYQ2f)  | [Link](https://goo.gl/m8FpeH) |
| 11st full      | [Link](https://goo.gl/F1oPBX)  | [Link](https://goo.gl/ZpTLND) |
| 11st binary    | [Link](https://goo.gl/8Qi7ao)  | [Link](https://goo.gl/nbBhFq) |
| Amazon full    | [Link](https://goo.gl/UzQWaj)  | [Link](https://goo.gl/EXkzWs) |
| Amazon binary  | [Link](https://goo.gl/u7AxWS)  | [Link](https://goo.gl/2fft8x) |
| Ifeng          | [Link](https://goo.gl/AtKsq4)  | [Link](https://goo.gl/tLWojy) |
| Chinanews      | [Link](https://goo.gl/1p4kdx)  | [Link](https://goo.gl/rxvhCJ) |
| NYTimes        | [Link](https://goo.gl/2hZeqd)  | [Link](https://goo.gl/66EDa5) |
| Joint full     | [Link](https://goo.gl/AJfzLC)  | [Link](https://goo.gl/mibMsV) |
| Joint binary   | [Link](https://goo.gl/YLMqNe)  | [Link](https://goo.gl/WRXQuJ) |

## GNU Unifont

The `glyphnet` scripts require the GNU Unifont character images to run. The file `unifont-8.0.01.t7b.xz` can be downloaded via [this link](https://goo.gl/aFxYHq).


================================================
FILE: data/11st/construct_rr.py
================================================
#!/usr/bin/python3

'''
Convert Korean datasets to Revised Romanization of Korean (RR, MC2000)
Copyright 2016 Xiang Zhang

Usage: python3 construct_hepburn.py -i [input] -o [output]
'''

# Input file
INPUT = '../data/11st/sentiment/full_train.csv'
# Output file
OUTPUT = '../data/11st/sentiment/full_train_rr.csv'

import argparse
import csv
import hanja
import unidecode

# Hangul romanization libraries
from hangul_romanize import Transliter
from hangul_romanize.rule import academic

# Main program
def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    transliter = Transliter(academic)

    convertRoman(transliter)

def romanizeText(transliter, text):
    text = text.strip()
    if text != '':
        hangul_text = hanja.translate(text, 'substitution')
        return transliter.translit(hangul_text)
    return text

# Convert the text in Chinese to pintin
def convertRoman(transliter):
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            new_row.append(unidecode.unidecode(romanizeText(
                        transliter, row[i])).strip().replace('\n','\\n'))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))

if __name__ == '__main__':
    main()


================================================
FILE: data/11st/create_post.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of reviews
Copyright 2016 Xiang Zhang

Usage: python3 create_post.py -i [input file pattern] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/11st/post/*.json.xz'
OUTPUT = '../data/11st/sentiment/post.csv'

def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file pattern', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    createData()

def createData():
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    files = glob.glob(INPUT)
    n = 0
    filecount = 0
    for filename in files:
        filecount = filecount + 1
        print('Processing file {}/{}: {}. Processed items {}.'.format(
                filecount, len(files), filename, n))
        try:
            ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
            for line in ifd:
                review = json.loads(line)
                star = review.get('star', '')
                title = review.get('title', '')
                content = review.get('content', '')
                if star != '':
                    n = n + 1
                    writer.writerow([star, title.replace('\n', '\\n'),
                                     content.replace('\n', '\\n')])
            ifd.close()
        except Exception as e:
            print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/11st/create_review.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of reviews
Copyright 2016 Xiang Zhang

Usage: python3 create_review.py -i [input file pattern] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/11st/review/*.json.xz'
OUTPUT = '../data/11st/sentiment/review.csv'

def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file pattern', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    createData()

def createData():
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    files = glob.glob(INPUT)
    n = 0
    filecount = 0
    for filename in files:
        filecount = filecount + 1
        print('Processing file {}/{}: {}. Processed items {}.'.format(
                filecount, len(files), filename, n))
        try:
            ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
            for line in ifd:
                review = json.loads(line)
                star = review.get('star', '')
                title = review.get('title', '')
                content = review.get('content', '')
                if star != '':
                    n = n + 1
                    writer.writerow([star, title.replace('\n', '\\n'),
                                     content.replace('\n', '\\n')])
            ifd.close()
        except Exception as e:
            print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/11st/segment_rr_word.lua
================================================
--[[
Create romananized word data from romanized data in csv for Korean
Copyright 2016 Xiang Zhang

Usage: th segment_rr_word.lua [input] [output] [list] [read]
--]]

local ffi = require('ffi')
local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/11st/sentiment/full_train_rr.csv'
   local output = arg[2] or '../data/11st/sentiment/full_train_rr_word.csv'
   local list = arg[3] or '../data/11st/sentiment/full_train_rr_word_list.csv'
   local read = (arg[4] == 'true')

   local word_index, word_total
   if read then
      print('Reading word index')
      word_index, word_total = joe.readWords(list)
   else
      print('Counting words')
      local word_count, word_freq = joe.splitWords(input)
      print('Sorting words by count')
      word_index, word_total = joe.sortWords(list, word_count, word_freq)
   end

   print('Constructing word index output')
   joe.constructWords(input, output, word_index, word_total)
end

function joe.readWords(list)
   local word_index = tds.Hash()
   local fd = io.open(list)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: '..n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      content[1] = content[1]:gsub('\\n', '\n')
      word_index[content[1]] = n
   end
   print('\rProcessed lines: '..n)
   fd:close()
   return word_index, n
end

function joe.splitWords(input)
   local word_count, word_freq = tds.Hash(), tds.Hash()
   local fd = io.open(input)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      field_set = {}
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         -- All punctuation characters except for hyphen "-"
         content[i] = content[i]:gsub(
            '([!"#$%%&\'()*+,./:;<=>?@%[\\%]^_`{|}~])', ' %1 ')
         for word in content[i]:gmatch('[%S]+') do
            word_count[word] = (word_count[word] or 0) + 1
            if not field_set[word] then
               field_set[word] = true
               word_freq[word] = (word_freq[word] or 0) + 1
            end
         end
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   -- Normalizing word frequencies
   for key, value in pairs(word_freq) do
      word_freq[key] = value / n
   end

   return word_count, word_freq
end

function joe.sortWords(list, word_count, word_freq)
   -- Sort the list of words
   word_list = tds.Vec()
   for word, _ in pairs(word_count) do
      word_list[#word_list + 1] = word
   end
   word_list:sort(function (w, v) return word_count[w] > word_count[v] end)

   -- Create the word index
   word_index = tds.Hash()
   for index, word in ipairs(word_list) do
      word_index[word] = index
   end

   -- Write it to file
   fd = io.open(list, 'w')
   for index, word in ipairs(word_list) do
      fd:write('"', word:gsub("\n", "\\n"):gsub("\"", "\"\""), '","',
               word_count[word], '","', word_freq[word], '"\n')
   end

   return word_index, #word_list
end

function joe.constructWords(input, output, word_index, word_total)
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')
   local n = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)

      ofd:write('"', content[1], '"')
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         -- All punctuation characters except for hyphen "-"
         content[i] = content[i]:gsub(
            '([!"#$%%&\'()*+,./:;<=>?@%[\\%]^_`{|}~])', ' %1 ')
         local first_write = true
         ofd:write(',"')
         for word in content[i]:gmatch('[%S]+') do
            local index = word_index[word] or word_total + 1
            if first_write then
               first_write = false
               ofd:write(index)
            else
               ofd:write(' ', index)
            end
         end
         ofd:write('"')
      end

      ofd:write('\n')
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/11st/segment_word.py
================================================
#!/usr/bin/python3

'''
Convert Korean datasets to Index of Words
Copyright 2016 Xiang Zhang

Usage: python3 construct_pinyin.py -i [input] -l [list] -o [output] [-r]
'''

#Input file
INPUT = '../data/11st/sentiment/full_train.csv'
#Output file
OUTPUT = '../data/11st/sentiment/full_train_word.csv'
# List file
LIST = '../data/11st/sentiment/full_train_word_list.csv'
# Read already defined word list
READ = False

# Korean dictionary path for MeCab
MECAB_DICT_PATH = '/home/xiang/.usr/lib/mecab/dic/mecab-ko-dic'

import argparse
import csv
from konlpy.tag import Mecab

# Main program
def main():
    global INPUT
    global OUTPUT
    global LIST

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)
    parser.add_argument('-l', '--list', help = 'Word list file', default = LIST)
    parser.add_argument(
        '-r', '--read', help = 'Read from list file', action = 'store_true')

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output
    LIST = args.list
    READ = args.read

    if READ:
        print('Reading word index')
        word_index = readWords()
    else:
        print('Counting words')
        word_count, word_freq = segmentWords()
        print('Sorting words by count')
        word_index = sortWords(word_count, word_freq)
    print('Constructing word index output')
    convertWords(word_index)

# Read from pre-existing word list
def readWords():
    # Open the files
    ifd = open(LIST, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_index = dict()
    n = 0
    for row in reader:
        word = row[0].replace('\\n', '\n')
        word_index[word] = n + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    return word_index

# Segment the text in Chinese
def segmentWords():
    mecab = Mecab(MECAB_DICT_PATH)
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_count = dict()
    word_freq = dict()
    n = 0
    for row in reader:
        field_set = set()
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = mecab.morphs(field)
            for word in field_list:
                word_count[word] = word_count.get(word, 0) + 1
                if word not in field_set:
                    field_set.add(word)
                    word_freq[word] = word_freq.get(word, 0) + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    # Normalizing word frequency
    for word in word_freq:
        word_freq[word] = float(word_freq[word]) / float(n)
    return word_count, word_freq

# Sort words for a given count dictionary object
def sortWords(word_count, word_freq):
    # Sort the words
    word_list = sorted(
        word_count, key = lambda word: word_count[word], reverse = True)
    # Open the files
    ofd = open(LIST, 'w', encoding = 'utf-8', newline = '')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over all the words
    word_index = dict()
    n = 0
    for i in range(len(word_list)):
        word = word_list[i]
        row = [word.replace('\n', '\\n'), str(word_count[word]),
               str(word_freq[word])]
        writer.writerow(row)
        word_index[word] = i + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing word: {}'.format(n), end = '')
    print('\rProcessed words: {}'.format(n))
    ofd.close()
    return word_index

# Convert the text in Chinese to word list
def convertWords(word_index):
    mecab = Mecab(MECAB_DICT_PATH)
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = mecab.morphs(field)
            new_row.append(' '.join(map(
                str, map(lambda word: word_index.get(word, len(word_index) + 1),
                         field_list))))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/README.md
================================================
# Datasets

This directory contains the preprocessing scripts for all the datasets used in the paper. These datasets are released separately of their processing source code.


================================================
FILE: data/chinanews/construct_topic.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of news articles
Copyright 2016 Xiang Zhang

Usage: python3 construct_topic.py -i [input directory] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/chinanews/article'
OUTPUT = '../data/chinanews/topic/news.csv'
CATEGORY_FILE = '../data/chinanews/category/category.json'

def main():
    global INPUT
    global OUTPUT
    global CATEGORY_FILE

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file directory', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)
    parser.add_argument(
        '-c', '--category', help = 'Category file', default = CATEGORY_FILE)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output
    CATEGORY_FILE = args.category

    createData()

def createData():
    # Open the category file
    classes = dict()
    cfd = open(CATEGORY_FILE, encoding = 'utf-8')
    i = 1
    for line in cfd:
        category = json.loads(line)
        classes[category['code']] = i
        i = i + 1
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    for prefix in classes:
        files = glob.glob(INPUT + '/' + prefix + '_*.json.xz')
        index = classes[prefix]
        n = 0
        filecount = 0
        for filename in files:
            filecount = filecount + 1
            print('Processing file {}/{}: {}. Processed items {}.'.format(
                    filecount, len(files), filename, n))
            try:
                ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
                for line in ifd:
                    news = json.loads(line)
                    title = news.get('title', '')
                    content = news.get('content', list())
                    abstract = ''
                    if len(content) > 0:
                        abstract = content[0]
                    n = n + 1
                    writer.writerow([index, title.replace('\n', '\\n'),
                                     abstract.replace('\n', '\\n')])
                ifd.close()
            except Exception as e:
                print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/data/README.txt
================================================
This directory should contain training and testing datasets.


================================================
FILE: data/dianping/combine_gram_count.lua
================================================
--[[
Combine sorted gram counts
Copyright 2016 Xiang Zhang

Usage: th combine_gram_count.lua [input_prefix] [output] [samples] [chunks]

Comment: This program also outputs lines with counts as the firt unquoted csv
   value, so that one can use GNU sort easily.
--]]

local io = require('io')
local math = require('math')
local string = require('string')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input_prefix = arg[1] or '../data/dianping/train_chargram_count_sort/'
   local output = arg[2] or '../data/dianping/train_chargram_count_combine.csv'
   local samples = arg[3] and tonumber(arg[3]) or 2000000
   local chunks = arg[4] and tonumber(arg[4]) or 100

   print('Combine chunks')
   joe.combineChunks(input_prefix, output, samples, chunks)
end

function joe.combineChunks(input_prefix, output, samples, chunks)
   local n = 0
   local ofd = io.open(output, 'w')
   local current = {}
   for i = 1, chunks do
      local ifd = io.open(input_prefix..i..'.csv')
      for line in ifd:lines() do
         n = n + 1
         if math.fmod(n, 100000) == 0 then
            io.write('\rProcessing line ', n)
            io.flush()
         end
         local content = joe.parseCSVLine(line)
         if current[1] ~= content[1] then
            if current[1] ~= nil then
               ofd:write(current[3], ',"', current[1], '","',
                         current[2]:gsub('"', '""'), '","',
                         current[4] / samples, '","', current[3], '"\n')
            end
            current = content
         else
            current[3] = current[3] + content[3]
            current[4] = current[4] + content[4]
         end
      end
      ifd:close()
   end
   ofd:write(current[3], ',"', current[1], '","',
             current[2]:gsub('"', '""'), '","',
             current[4] / samples, '","', current[3], '"\n')
   ofd:close()
   print('\rProcessed lines: '..n)
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_charbag.lua
================================================
--[[
Construct unicode character bag-of-element format from unicode serialization
Copyright 2016 Xiang Zhang

Usage: th construct_charbag.lua [input] [output] [list] [read] [limit] [replace]
--]]

local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_code.t7b'
   local output = arg[2] or '../data/dianping/train_charbag.t7b'
   local list = arg[3] or '../data/dianping/train_charbag_list.csv'
   local read = (arg[4] == 'true')
   local limit = arg[5] and tonumber(arg[5]) or 200000
   local replace = arg[6] and tonumber(arg[6]) or 200001

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Counting character')
   local count, freq = joe.countBag(data, limit, replace)
   print('Total number of values: '..count)

   if read == true then
      print('Reading frequency from '..list)
      freq = joe.readList(list)
   else
      print('Outputing frequency list to '..list)
      joe.writeList(freq, list)
   end

   print('Constructing character bag data')
   local bag = joe.constructBag(data, count, limit, replace)

   print('Saving to '..output)
   torch.save(output, bag)
end

function joe.writeList(freq, list)
   local fd = io.open(list, 'w')
   for i = 1, freq:size(1) do
      local char = (i <= 65536) and joe.utf8str(i - 1) or ''
      -- Do not print control characters
      if i < 11 or (i > 11 and i < 33) then
         char = ''
      end
      fd:write('"', i, '","', char:gsub('\n', '\\n'):gsub('"', '""'), '","',
               freq[i], '"\n')
   end
end

function joe.readList(list)
   local freq = {}
   local fd = io.open(list)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[2] = content[2]:gsub('\\n', '\n')
      freq[#freq + 1] = tonumber(content[3])
   end
   return torch.Tensor(freq)
end

function joe.countBag(data, limit, replace)
   local code, code_value = data.code, data.code_value

   local count = 0
   local freq = torch.zeros(math.max(limit, replace))
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
         end
         local index = {}
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            for l = 1, code[i][j][k][2] do
               local char = code_value[code[i][j][k][1] + l - 1]
               if char > limit then
                  char = replace
               end
               if not index[char] then
                  count = count + 1
                  index[char] = 1
                  freq[char] = freq[char] + 1
               else
                  index[char] = index[char] + 1
               end
            end
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   -- Normalizing the frequency
   local sum = 0
   for i = 1, #code do
      sum = sum + code[i]:size(1)
   end
   freq:div(sum)
   return count, freq
end

function joe.constructBag(data, count, limit, replace)
   local code, code_value = data.code, data.code_value
   local bag = {}
   local bag_index = torch.LongTensor(count)
   local bag_value = torch.DoubleTensor(count)

   local count = 0
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      bag[i] = torch.LongTensor(code[i]:size(1), 2)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
         end
         local index = {}
         local pointer = {}
         bag[i][j][1] = count + 1
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            for l = 1, code[i][j][k][2] do
               local char = code_value[code[i][j][k][1] + l - 1]
               if char > limit then
                  char = replace
               end
               if not index[char] then
                  count = count + 1
                  index[char] = 1
                  pointer[#pointer + 1] = char
               else
                  index[char] = index[char] + 1
               end
            end
         end
         table.sort(pointer)
         bag[i][j][2] = #pointer
         for m = 1, #pointer do
            bag_index[bag[i][j][1] + m - 1] = pointer[m]
            if pointer[m] > limit then
               bag_value[bag[i][j][1] + m - 1] = 0
            else
               bag_value[bag[i][j][1] + m - 1] = index[pointer[m]]
            end
         end
        if #pointer > 0 and
        bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then
           bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div(
              bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum())
        end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return {bag = bag, bag_index = bag_index, bag_value = bag_value}
end

joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
function joe.utf8str(decimal)
   local bytemarkers = joe.bytemarkers
   if decimal < 128 then return string.char(decimal) end
   local charbytes = {}
   for bytes,vals in ipairs(bytemarkers) do
      if decimal <= vals[1] then
        for b = bytes + 1, 2, -1 do
          local mod = decimal % 64
          decimal = (decimal - mod) / 64
          charbytes[b] = string.char(128+mod)
        end
        charbytes[1] = string.char(vals[2] + decimal)
        break
      end
    end
   return table.concat(charbytes)
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_chargram.lua
================================================
--[[
Construct unicode character ngrams format from unicode serialization
Copyright 2016 Xiang Zhang

Usage: th construct_chargram.lua [input] [output] [list] [read] [gram] [limit]
   [replace]
--]]

local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_code.t7b'
   local output = arg[2] or '../data/dianping/train_chargram.t7b'
   local list = arg[3] or '../data/dianping/train_chargram_list.csv'
   local read = (arg[4] == nil) or(arg[4] == 'true')
   local gram = arg[5] and tonumber(arg[5]) or 5
   local limit = arg[6] and tonumber(arg[6]) or 1000000
   local replace = arg[7] and tonumber(arg[7]) or 1000001

   print('Loading data from '..input)
   local data = torch.load(input)

   local freq, dict, ngrams
   if read == true then
      print('Reading frequency from '..list)
      freq, dict = joe.readList(list)
   else
      print('Constructing dictionary and frequency list')
      freq, dict, ngrams = joe.constructList(data, gram)
      print('Outputing frequency list to '..list)
      joe.writeList(freq, ngrams, list)
   end

   print('Counting character ngrams data')
   local count = joe.countBag(data, dict, gram, limit, replace)
   print('Total number of ngrams in data is '..count)

   print('Constructing character bag data')
   local bag = joe.constructBag(data, dict, count, gram, limit, replace)

   print('Saving to '..output)
   torch.save(output, bag)
end

function joe.constructList(data, gram)
   local count = tds.Hash()
   local docs = tds.Hash()
   local code, code_value = data.code, data.code_value

   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
            collectgarbage()
         end
         local index = {}
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            -- Iterate through the grams
            for n = 1, gram do
               -- Iterate through the positions
               for l = 1, code[i][j][k][2] - n + 1 do
                  local ngram = tostring(code_value[code[i][j][k][1] + l - 1])
                  for m = 2, n do
                     ngram = ngram..' '..tostring(
                        code_value[code[i][j][k][1] + l - 1 + m - 1])
                  end
                  if not index[ngram] then
                     docs[ngram] = (docs[ngram] or 0) + 1
                     index[ngram] = 0
                  end
                  index[ngram] = index[ngram] + 1
                  count[ngram] = (count[ngram] or 0) + 1
               end
            end
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   local ngrams = tds.Vec()
   for ngram, value in pairs(count) do
      ngrams[#ngrams + 1] = ngram
   end
   ngrams:sort(function(a, b) return count[a] > count[b] end)

   local sum = 0
   for i = 1, #code do
      sum = sum + code[i]:size(1)
   end

   local dict = tds.Hash()
   local freq = torch.Tensor(#ngrams)
   for index, ngram in ipairs(ngrams) do
      dict[ngram] = index
      freq[index] = (docs[ngram] or 0) / sum
   end

   return freq, dict, ngrams
end

function joe.writeList(freq, ngrams, list)
   local fd = io.open(list, 'w')
   for i = 1, freq:size(1) do
      local ngram_string = ''
      for code in ngrams[i]:gmatch('[%S]+') do
         local code = tonumber(code)
         local char = (code <= 65536 and (code > 32 or code == 11)) and
            joe.utf8str(code - 1) or ' '
         ngram_string = ngram_string..char
      end
      fd:write('"', ngrams[i], '","',
               ngram_string:gsub('\n', '\\n'):gsub('"', '""'), '","',
               freq[i], '"\n')
   end
end

function joe.readList(list)
   local freq_table = tds.Vec()
   local dict = tds.Hash()
   local fd = io.open(list)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[2] = content[2]:gsub('\\n', '\n')
      freq_table[#freq_table + 1] = tonumber(content[3])
      dict[content[1]] = #freq_table
   end

   local freq = torch.Tensor(#freq_table)
   for i, v in ipairs(freq_table) do
      freq[i] = v
   end
   return freq, dict
end

function joe.countBag(data, dict, gram, limit, replace)
   local count = 0
   local code, code_value = data.code, data.code_value

   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
            collectgarbage()
         end
         local index = {}
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            -- Iterate through the grams
            for n = 1, gram do
               -- Iterate through the positions
               for l = 1, code[i][j][k][2] - n + 1 do
                  local ngram = tostring(code_value[code[i][j][k][1] + l - 1])
                  for m = 2, n do
                     ngram = ngram..' '..tostring(
                        code_value[code[i][j][k][1] + l - 1 + m - 1])
                  end
                  local ngram_index = dict[ngram]
                  if ngram_index == nil or ngram_index > limit then
                     ngram_index = replace
                  end
                  if not index[ngram_index] then
                     index[ngram_index] = 0
                     count = count + 1
                  end
                  index[ngram_index] = index[ngram_index] + 1
               end
            end
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return count
end

function joe.constructBag(data, dict, count, gram, limit, replace)
   local code, code_value = data.code, data.code_value
   local bag = {}
   local bag_index = torch.LongTensor(count)
   local bag_value = torch.DoubleTensor(count)

   local count = 0
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      bag[i] = torch.LongTensor(code[i]:size(1), 2)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
            collectgarbage()
         end
         local index = {}
         local pointer = {}
         bag[i][j][1] = count + 1
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            -- Iterate through the grams
            for n = 1, gram do
               -- Iterate through the positions
               for l = 1, code[i][j][k][2] - n + 1 do
                  local ngram = tostring(code_value[code[i][j][k][1] + l - 1])
                  for m = 2, n do
                     ngram = ngram..' '..tostring(
                        code_value[code[i][j][k][1] + l - 1 + m - 1])
                  end
                  local ngram_index = dict[ngram]
                  if ngram_index == nil or ngram_index > limit then
                     ngram_index = replace
                  end
                  if not index[ngram_index] then
                     count = count + 1
                     index[ngram_index] = 0
                     pointer[#pointer + 1] = ngram_index
                  end
                  index[ngram_index] = index[ngram_index] + 1
               end
            end
         end
         table.sort(pointer)
         bag[i][j][2] = #pointer
         for m = 1, #pointer do
            bag_index[bag[i][j][1] + m - 1] = pointer[m]
            if pointer[m] > limit then
               bag_value[bag[i][j][1] + m - 1] = 0
            else
               bag_value[bag[i][j][1] + m - 1] = index[pointer[m]]
            end
         end
         if #pointer > 0 and
         bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then
            bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div(
               bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum())
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return {bag = bag, bag_index = bag_index, bag_value = bag_value}
end

joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
function joe.utf8str(decimal)
   local bytemarkers = joe.bytemarkers
   if decimal < 128 then return string.char(decimal) end
   local charbytes = {}
   for bytes,vals in ipairs(bytemarkers) do
      if decimal <= vals[1] then
        for b = bytes + 1, 2, -1 do
          local mod = decimal % 64
          decimal = (decimal - mod) / 64
          charbytes[b] = string.char(128+mod)
        end
        charbytes[1] = string.char(vals[2] + decimal)
        break
      end
    end
   return table.concat(charbytes)
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_chartoken.lua
================================================
--[[
Create chartoken format for fastText
Copyright 2017 Xiang Zhang

Usage: th construct_chartoken.lua [input] [output]
--]]

local bit32 = require('bit32')
local io = require('io')
local math = require('math')
local string = require('string')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train.csv'
   local output = arg[2] or '../data/dianping/train_chartoken.txt'

   print('Construct token')
   joe.constructToken(input, output)
end

function joe.constructToken(input, output)
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')

   local n = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      ofd:write('__label__', class)
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', ' '):gsub(
            '[%z\001-\031\127]', ' '):gsub('^%s*(.-)%s*$', '%1')
         local sequence = joe.utf8to32(content[i])
         for j, code in ipairs(sequence) do
            if code > 32 then
               ofd:write(' ', joe.utf8str(code))
            end
         end
      end

      ofd:write('\n')
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

-- UTF-8 decoding function
-- Ref: http://lua-users.org/wiki/LuaUnicode
function joe.utf8to32(utf8str)
   assert(type(utf8str) == 'string')
   local res, seq, val = {}, 0, nil
   for i = 1, #utf8str do
      local c = string.byte(utf8str, i)
      if seq == 0 then
         table.insert(res, val)
         seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
            c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or
            error('Invalid UTF-8 character sequence')
         val = bit32.band(c, 2^(8-seq) - 1)
      else
         val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F))
      end
      seq = seq - 1
   end
   table.insert(res, val)
   table.insert(res, 0)
   return res
end

-- UTF-8 encoding function
-- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol
--      -in-lua
function joe.utf8str(decimal)
   local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
   if decimal < 128 then return string.char(decimal) end
   local charbytes = {}
   for bytes,vals in ipairs(bytemarkers) do
      if decimal <= vals[1] then
         for b = bytes + 1, 2, -1 do
            local mod = decimal % 64
            decimal = (decimal - mod) / 64
            charbytes[b] = string.char(128+mod)
         end
         charbytes[1] = string.char(vals[2] + decimal)
         break
      end
   end
   return table.concat(charbytes)
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_code.lua
================================================
--[[
Construct unicode serialization format from string serialization format
Copyright 2015-2016 Xiang Zhang

Usage: th construct_code.lua [input] [output] [limit] [replace]
--]]

local bit32 = require('bit32')
local ffi = require('ffi')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_string.t7b'
   local output = arg[2] or '../data/dianping/train_code.t7b'
   local limit = arg[3] and tonumber(arg[3]) or 65536
   local replace = arg[4] and tonumber(arg[4]) or 33

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Counting UTF-8 code')
   local count = joe.countCode(data)
   print('Total number of codes: '..count)

   print('Constructing UTF-8 code data')
   local code = joe.constructCode(data, count, limit, replace)

   print('Saving to '..output)
   torch.save(output, code)
end

function joe.countCode(data)
   local index, content = data.index, data.content

   local count = 0
   -- Iterate through the classes
   for i = 1, #index do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, index[i]:size(1) do
         if math.fmod(j, 10000) == 0 then
            io.write('\rProcessing text: ', j, '/', index[i]:size(1))
            io.flush()
         end
         -- Iterate through the fields
         for k = 1, index[i][j]:size(1) do
            local text = ffi.string(
               torch.data(content:narrow(1, index[i][j][k][1], 1)))
            local sequence = joe.utf8to32(text)
            count = count + #sequence
         end
      end
      print('\rProcessed texts: '..index[i]:size(1)..'/'..index[i]:size(1))
   end

   return count
end

function joe.constructCode(data, count, limit, replace)
   local index, content = data.index, data.content
   local code = {}
   local code_value = torch.LongTensor(count)

   local p = 1
   -- Iterate through the classes
   for i = 1, #index do
      print('Processing for class '..i)
      code[i] = index[i]:clone():zero()
      -- Iterate through the samples
      for j = 1, index[i]:size(1) do
         if math.fmod(j, 10000) == 0 then
            io.write('\rProcessing text: ', j, '/', index[i]:size(1))
            io.flush()
         end
         -- Iterate through the fields
         for k = 1, index[i][j]:size(1) do
            local text = ffi.string(
               torch.data(content:narrow(1, index[i][j][k][1], 1)))
            local sequence = joe.utf8to32(text)
            code[i][j][k][1] = p
            code[i][j][k][2] = #sequence
            for l = 1, #sequence do
               code_value[p + l - 1] = sequence[l] + 1
               if limit and code_value[p + l - 1] > limit then
                  code_value[p + l - 1] = replace
               end
            end
            p = p + #sequence
         end
      end
      print('\rProcessed texts: '..index[i]:size(1)..'/'..index[i]:size(1))
   end

   return {code = code, code_value = code_value}
end

-- UTF-8 decoding function
-- Ref: http://lua-users.org/wiki/LuaUnicode
function joe.utf8to32(utf8str)
   assert(type(utf8str) == 'string')
   local res, seq, val = {}, 0, nil
   for i = 1, #utf8str do
      local c = string.byte(utf8str, i)
      if seq == 0 then
         table.insert(res, val)
         seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
            c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or
            error('Invalid UTF-8 character sequence')
         val = bit32.band(c, 2^(8-seq) - 1)
      else
         val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F))
      end
      seq = seq - 1
   end
   table.insert(res, val)
   table.insert(res, 0)
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_pinyin.py
================================================
#!/usr/bin/python3

'''
Convert Chinese datasets to Pinyin format
Copyright 2016 Xiang Zhang

Usage: python3 construct_pinyin.py -i [input] -o [output]
'''

#Input file
INPUT = '../data/dianping/train.csv'
#Output file
OUTPUT = '../data/dianping/train_pinyin.csv'

import argparse
import csv
import pypinyin
import unidecode

# Main program
def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    convertPinyin()

# Convert the text in Chinese to pintin
def convertPinyin():
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            new_row.append(' '.join(map(
                str.strip,
                map(lambda s: s.replace('\n', '\\n'),
                    map(unidecode.unidecode,
                        pypinyin.lazy_pinyin(
                            row[i], style = pypinyin.TONE2))))))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))

if __name__ == '__main__':
    main()


================================================
FILE: data/dianping/construct_reviews.lua
================================================
--[[
Create reviews in csv format from original txt file
Copyright 2015-2016 Xiang Zhang

Usage: th construct_reviews [input] [output]
--]]

local cjson = require('cjson')
local io = require('io')
local math = require('math')

local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/reviews.txt'
   local output = arg[2] or '../data/dianping/reviews.csv'

   local ifd = io.open(input)
   local ofd = io.open(output, "w")
   local n = 0
   local valid = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n, ', valid: ', valid)
         io.flush()
      end

      -- Skip the first line
      if n > 1 then
         -- Break content to url and json
         local point = line:find('%^')
         local data = line:sub(point + 2):gsub("^%s*(.-)%s*$", "%1")
         -- Parse the data
         local parsed = cjson.decode(data)
         local content = parsed.content:gsub("^%s*(.-)%s*$", "%1")
         local rate = tonumber(parsed.rate)
         -- Record to csv
         if rate and rate >= 0 and #content > 0 then
            valid = valid + 1
            content = content:gsub("\n", "\\n"):gsub("\"", "\"\"")
            ofd:write('"'..rate..'","'..content..'"\n')
         end
      end
   end
   ifd:close()
   ofd:close()
   print('\rProcessed lines: '..n..', valid: '..valid)
end

joe.main()
return joe

================================================
FILE: data/dianping/construct_string.lua
================================================
--[[
Create string serialization format from csv files
Copyright 2015-2016 Xiang Zhang

Usage: th construct_string.lua [input] [output]
--]]

local ffi = require('ffi')
local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train.csv'
   local output = arg[2] or '../data/dianping/train_string.t7b'

   print('Counting samples')
   local count, bytes, fields = joe.countSamples(input)
   for i, v in ipairs(count) do
      print('Number of samples in class '..i..': '..v)
   end
   print('Total number of bytes: '..bytes)
   print('Number of text fields: '..fields)

   print('Constructing data')
   local data = joe.constructData(input, count, bytes, fields)
   print('Saving to '..output)
   torch.save(output, data)
end

function joe.countSamples(input)
   local count = {}
   local bytes = 0
   local fields = nil
   local n = 0
   local fd = io.open(input)
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      count[class] = count[class] and count[class] + 1 or 1
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         bytes = bytes + content[i]:len() + 1
      end
      fields = fields or #content - 1
      if fields ~= #content - 1 then
         error('Number of fields is not '..fields..' at line '..n)
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return count, bytes, fields
end

function joe.constructData(input, count, bytes, fields)
   local data = torch.ByteTensor(bytes)
   local index = {}
   for i, v in ipairs(count) do
      index[i] = torch.LongTensor(v, fields, 2)
   end

   local progress = {}
   local n = 0
   local p = 1
   local fd = io.open(input)
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      progress[class] = progress[class] and progress[class] + 1 or 1
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         index[class][progress[class]][i - 1][1] = p
         index[class][progress[class]][i - 1][2] = content[i]:len()
         ffi.copy(torch.data(data:narrow(1, p, content[i]:len() + 1)),
                  content[i])
         p = p + content[i]:len() + 1
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return {content = data, index = index}
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_tfidf.lua
================================================
--[[
Construct tfidf format from bag format
Copyright 2016 Xiang Zhang

Usage: th construct_tfidf.lua [input] [output] [list] [limit]
--]]

local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_charbag.t7b'
   local output = arg[2] or '../data/dianping/train_charbagtfidf.t7b'
   local list = arg[3] or '../data/dianping/train_charbag_list.csv'
   local limit = arg[4] and tonumber(arg[4]) or 200000

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Loading frequency list from '..list)
   local freq = joe.readList(list)
   print('Frequency list length '..freq:size(1))

   print('Constructing bag-of-elements TFIDF data')
   local tfidf = joe.constructTfidf(data, freq, limit)

   print('Saving to '..output)
   torch.save(output, tfidf)
end

function joe.readList(list)
   local freq = {}
   local fd = io.open(list)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[2] = content[2]:gsub('\\n', '\n')
      freq[#freq + 1] = tonumber(content[3])
   end
   return torch.Tensor(freq)
end

function joe.constructTfidf(data, freq, limit)
   local bag, bag_index, bag_value = data.bag, data.bag_index, data.bag_value
   local tfidf_value = bag_value:clone()

   local freq = freq
   if freq:size(1) > limit then
      freq:narrow(1, limit + 1, freq:size(1) - limit):zero()
   elseif freq:size(1) < limit + 1 then
      local new_freq = freq.new(limit + 1):zero()
      new_freq:narrow(1, 1, freq:size(1)):copy(freq)
      freq = new_freq
   end

   freq:apply(function (x) return x > 0 and math.log(1/x) or 0 end)
   local indexed = freq:index(1, bag_index)
   tfidf_value:cmul(indexed)

   -- Iterate through the classes
   for i = 1, #bag do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, bag[i]:size(1) do
         if math.fmod(j, 10000) == 0 then
            io.write('\rProcessing sample: ', j, '/', bag[i]:size(1))
            io.flush()
         end
         if bag[i][j][2] > 0 and
         tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then
            tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):div(
               tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum())
         end
      end
      print('\rProcessed samples: '..bag[i]:size(1)..'/'..bag[i]:size(1))
   end

   return {bag = bag, bag_index = bag_index, bag_value = tfidf_value}
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_word.lua
================================================
--[[
Create word serialization format from csv files
Copyright 2015-2016 Xiang Zhang

Usage: th construct_word.lua [input] [output]
--]]

local ffi = require('ffi')
local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_word.csv'
   local output = arg[2] or '../data/dianping/train_word.t7b'

   print('Counting samples')
   local count, length, fields = joe.countSamples(input)
   for i, v in ipairs(count) do
      print('Number of samples in class '..i..': '..v)
   end
   print('Total number of words: '..length)
   print('Number of text fields: '..fields)

   print('Constructing data')
   local data = joe.constructData(input, count, length, fields)
   print('Saving to '..output)
   torch.save(output, data)
end

function joe.countSamples(input)
   local count = {}
   local length = 0
   local fields = nil
   local n = 0
   local fd = io.open(input)
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      count[class] = count[class] and count[class] + 1 or 1
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1')
         local _, current_length = content[i]:gsub('(%d+)', '%1')
         length = length + current_length
      end
      fields = fields or #content - 1
      if fields ~= #content - 1 then
         error('Number of fields is not '..fields..' at line '..n)
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return count, length, fields
end

function joe.constructData(input, count, length, fields)
   local data = torch.LongTensor(length)
   local index = {}
   for i, v in ipairs(count) do
      index[i] = torch.LongTensor(v, fields, 2)
   end

   local progress = {}
   local n = 0
   local p = 1
   local fd = io.open(input)
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      progress[class] = progress[class] and progress[class] + 1 or 1
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1')
         index[class][progress[class]][i - 1][1] = p
         local current_length = 0
         for word in content[i]:gmatch('%d+') do
            data[p] = tonumber(word)
            p = p + 1
         end
         index[class][progress[class]][i - 1][2] =
            p - index[class][progress[class]][i - 1][1]
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return {code = index, code_value = data}
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_wordbag.lua
================================================
--[[
Construct word bag-of-element format
Copyright 2016 Xiang Zhang

Usage: th construct_wordbag.lua [input] [output] [limit] [replace]
--]]

local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_word.t7b'
   local output = arg[2] or '../data/dianping/train_wordbag.t7b'
   local limit = arg[3] and tonumber(arg[3]) or 200000
   local replace = arg[4] and tonumber(arg[4]) or 200001

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Counting words')
   local count = joe.countBag(data, limit, replace)
   print('Total number of values: '..count)

   print('Constructing word bag data')
   local bag = joe.constructBag(data, count, limit, replace)

   print('Saving to '..output)
   torch.save(output, bag)
end

function joe.countBag(data, limit, replace)
   local code, code_value = data.code, data.code_value

   local count = 0
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
         end
         local index = {}
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            for l = 1, code[i][j][k][2] do
               local word = code_value[code[i][j][k][1] + l - 1]
               if word > limit then
                  word = replace
               end
               if not index[word] then
                  count = count + 1
                  index[word] = 1
               else
                  index[word] = index[word] + 1
               end
            end
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return count
end

function joe.constructBag(data, count, limit, replace)
   local code, code_value = data.code, data.code_value
   local bag = {}
   local bag_index = torch.LongTensor(count)
   local bag_value = torch.DoubleTensor(count)

   local count = 0
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      bag[i] = torch.LongTensor(code[i]:size(1), 2)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
         end
         local index = {}
         local pointer = {}
         bag[i][j][1] = count + 1
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            for l = 1, code[i][j][k][2] do
               local word = code_value[code[i][j][k][1] + l - 1]
               if word > limit then
                  word = replace
               end
               if not index[word] then
                  count = count + 1
                  index[word] = 1
                  pointer[#pointer + 1] = word
               else
                  index[word] = index[word] + 1
               end
            end
         end
         table.sort(pointer)
         bag[i][j][2] = #pointer
         for m = 1, #pointer do
            bag_index[bag[i][j][1] + m - 1] = pointer[m]
            if pointer[m] > limit then
               bag_value[bag[i][j][1] + m - 1] = 0
            else
               bag_value[bag[i][j][1] + m - 1] = index[pointer[m]]
            end
         end
         if #pointer > 0 and
         bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then
            bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div(
               bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum())
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return {bag = bag, bag_index = bag_index, bag_value = bag_value}
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_wordgram.lua
================================================
--[[
Constructngrams format from serialization
Copyright 2016 Xiang Zhang

Usage: th construct_wordgram.lua [input] [output] [list] [gram] [limit]
   [replace]
--]]

local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_word.t7b'
   local output = arg[2] or '../data/dianping/train_wordgram.t7b'
   local list = arg[3] or '../data/dianping/train_wordgram_list.csv'
   local gram = arg[4] and tonumber(arg[4]) or 5
   local limit = arg[5] and tonumber(arg[5]) or 1000000
   local replace = arg[6] and tonumber(arg[6]) or 1000001

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Reading frequency from '..list)
   local freq, dict = joe.readList(list)

   print('Counting character ngrams data')
   local count = joe.countBag(data, dict, gram, limit, replace)
   print('Total number of ngrams in data is '..count)

   print('Constructing character bag data')
   local bag = joe.constructBag(data, dict, count, gram, limit, replace)

   print('Saving to '..output)
   torch.save(output, bag)
end

function joe.readList(list)
   local freq_table = tds.Vec()
   local dict = tds.Hash()
   local fd = io.open(list)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[2] = content[2]:gsub('\\n', '\n')
      freq_table[#freq_table + 1] = tonumber(content[3])
      dict[content[1]] = #freq_table
   end

   local freq = torch.Tensor(#freq_table)
   for i, v in ipairs(freq_table) do
      freq[i] = v
   end
   return freq, dict
end

function joe.countBag(data, dict, gram, limit, replace)
   local count = 0
   local code, code_value = data.code, data.code_value

   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
            collectgarbage()
         end
         local index = {}
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            -- Iterate through the grams
            for n = 1, gram do
               -- Iterate through the positions
               for l = 1, code[i][j][k][2] - n + 1 do
                  local ngram = tostring(code_value[code[i][j][k][1] + l - 1])
                  for m = 2, n do
                     ngram = ngram..' '..tostring(
                        code_value[code[i][j][k][1] + l - 1 + m - 1])
                  end
                  local ngram_index = dict[ngram]
                  if ngram_index == nil or ngram_index > limit then
                     ngram_index = replace
                  end
                  if not index[ngram_index] then
                     index[ngram_index] = 0
                     count = count + 1
                  end
                  index[ngram_index] = index[ngram_index] + 1
               end
            end
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return count
end

function joe.constructBag(data, dict, count, gram, limit, replace)
   local code, code_value = data.code, data.code_value
   local bag = {}
   local bag_index = torch.LongTensor(count)
   local bag_value = torch.DoubleTensor(count)

   local count = 0
   -- Iterate through the classes
   for i = 1, #code do
      print('Processing for class '..i)
      bag[i] = torch.LongTensor(code[i]:size(1), 2)
      -- Iterate through the samples
      for j = 1, code[i]:size(1) do
         if math.fmod(j, 1000) == 0 then
            io.write('\rProcessing text: ', j, '/', code[i]:size(1))
            io.flush()
            collectgarbage()
         end
         local index = {}
         local pointer = {}
         bag[i][j][1] = count + 1
         -- Iterate through the fields
         for k = 1, code[i][j]:size(1) do
            -- Iterate through the grams
            for n = 1, gram do
               -- Iterate through the positions
               for l = 1, code[i][j][k][2] - n + 1 do
                  local ngram = tostring(code_value[code[i][j][k][1] + l - 1])
                  for m = 2, n do
                     ngram = ngram..' '..tostring(
                        code_value[code[i][j][k][1] + l - 1 + m - 1])
                  end
                  local ngram_index = dict[ngram]
                  if ngram_index == nil or ngram_index > limit then
                     ngram_index = replace
                  end
                  if not index[ngram_index] then
                     count = count + 1
                     index[ngram_index] = 0
                     pointer[#pointer + 1] = ngram_index
                  end
                  index[ngram_index] = index[ngram_index] + 1
               end
            end
         end
         table.sort(pointer)
         bag[i][j][2] = #pointer
         for m = 1, #pointer do
            bag_index[bag[i][j][1] + m - 1] = pointer[m]
            if pointer[m] > limit then
               bag_value[bag[i][j][1] + m - 1] = 0
            else
               bag_value[bag[i][j][1] + m - 1] = index[pointer[m]]
            end
         end
         if #pointer > 0 and
         bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then
            bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div(
               bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum())
         end
      end
      print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1))
   end

   return {bag = bag, bag_index = bag_index, bag_value = bag_value}
end

joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
function joe.utf8str(decimal)
   local bytemarkers = joe.bytemarkers
   if decimal < 128 then return string.char(decimal) end
   local charbytes = {}
   for bytes,vals in ipairs(bytemarkers) do
      if decimal <= vals[1] then
        for b = bytes + 1, 2, -1 do
          local mod = decimal % 64
          decimal = (decimal - mod) / 64
          charbytes[b] = string.char(128+mod)
        end
        charbytes[1] = string.char(vals[2] + decimal)
        break
      end
    end
   return table.concat(charbytes)
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/construct_wordtoken.lua
================================================
--[[
Construct word token format from csv files
Copyright 2017 Xiang Zhang

Usage: th construct_wordtoken [input] [list] [output]
--]]

local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_word.csv'
   local list = arg[2] or '../data/dianping/train_word_list.csv'
   local output = arg[3] or '../data/dianping/train_wordtoken.txt'

   print('Reading list from '..list)
   local word_list = joe.readList(list)

   print('Constructing word token')
   joe.constructToken(input, output, word_list)
end

function joe.readList(list)
   local word_list = tds.Vec()
   local fd = io.open(list)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      word_list[#word_list + 1] =
         content[1]:gsub('\\n', '\n'):gsub('[%z\001-\032\127]', ' '):gsub(
            '^%s*(.-)%s*$', '%1')
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return word_list
end

function joe.constructToken(input, output, word_list)
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')

   local n = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      ofd:write('__label__', class)
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1')
         for word in content[i]:gmatch('%d+') do
            local word_string = word_list[tonumber(word)] or '<unk>'
            ofd:write(' ', word_string)
         end
      end
      ofd:write('\n')
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/convert_string_code.lua
================================================
--[[
Convert string serialization to code
Copyright 2016 Xiang Zhang

Usage: th convert_string_code.lua [input] [output]
--]]

local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_string.t7b'
   local output = arg[2] or '../data/dianping/train_string_code.t7b'

   print('Reading from '..input)
   local input_data = torch.load(input)
   print('Converting to code format')
   local output_data = joe.convert(input_data)
   print('Saving to '..output)
   torch.save(output, output_data)
end

function joe.convert(input_data)
   local output_data = {}
   output_data.code = input_data.index
   output_data.code_value = input_data.content
   return output_data
end

joe.main()
return joe


================================================
FILE: data/dianping/count_chargram.lua
================================================
--[[
Parallelized chargram counting program
Copyright Xiang Zhang 2016

Usage: th count_chargram.lua [input] [output_prefix] [grams] [chunks] [threads]
   [batch] [buffer]

Comment: This program is a map-reduce like process. During map, each sample is
   separated into character-ngrams. During reduce, these character-ngrams are
   aggregated per-batch samples and output to file chunks. Which files chunk to
   put the gram is determined by a hash value of the gram string, therefore
   instances of the same gram always end up in the same file. This program is
   necessary because a linear aggregation program can easily overflow memory for
   several millions of samples.
--]]

local hash = require('hash')
local io = require('io')
local math = require('math')
local tds = require('tds')
local threads = require('threads')
local torch = require('torch')

local Queue = require('queue')

-- Library configurations
threads.serialization('threads.sharedserialize')

-- A Logic Named Joe
local joe = {}

-- Constant values
joe.SEED = 0

-- Main program entry
function joe.main()
   local input = arg[1] or '../data/dianping/train_code.t7b'
   local output_prefix = arg[2] or '../data/dianping/train_chargram_count/'
   local num_grams = arg[3] and tonumber(arg[3]) or 5
   local chunks = arg[4] and tonumber(arg[4]) or 100
   local num_threads = arg[5] and tonumber(arg[5]) or 10
   local batch = arg[6] and tonumber(arg[6]) or 100000
   local buffer = arg[7] and tonumber(arg[7]) or 1000

   print('Loading data from '..input)
   local data = torch.load(input)
   print('Opening output files with prefix '..output_prefix)
   local fds = {}
   for i = 1, chunks do
      fds[i] = io.open(output_prefix..tostring(i)..'.csv', 'w')
   end
   joe.fds = fds
   print('Setting finished threads to 0')
   joe.finished = 0
   print('Creating record')
   joe.record = tds.Hash()
   print('Setting item counter to 0')
   joe.count = 0
   print('Storing options')
   joe.batch = batch

   print('Creating queues')
   local queue = Queue(buffer)
   print('Creating mutex')
   local mutex = threads.Mutex()
   print('Creating '..num_threads..' threads')
   local init_thread = joe.initThread()
   local block = threads.Threads(num_threads, init_thread)
   block:specific(true)
   print('Deploying thread jobs')
   joe.deployThreads(data, num_grams, queue, mutex, block, num_threads)

   print('Entering main thread loop')
   while joe.finished < num_threads do
      local rpc = queue:pop()
      joe[rpc.func](unpack(rpc.arg))
   end
   if math.fmod(joe.count, batch) ~= 0 then
      print('Writing records to files at '..joe.count)
      joe.writeRecord()
   end

   print('Destroying mutex')
   mutex:free()
   print('Closing files')
   for _, fd in ipairs(fds) do
      fd:close()
   end

   print('Synchronizing and terminating the threads')
   block:synchronize()
   block:terminate()
end

-- Thread initialization callback
function joe.initThread()
   return function ()
      local torch = require('torch')
      local Queue = require('queue')
   end
end

-- Thread job deploying threads
function joe.deployThreads(data, num_grams, queue, mutex, block, num_threads)
   local progress = torch.LongTensor(2)
   progress[1] = 1
   progress[2] = 0
   for i = 1, num_threads do
      print('Deploying job for thread '..i)
      local thread_job = joe.threadJob(
         data, num_grams, queue, mutex:id(), progress, i)
      block:addjob(i, thread_job)
      local rpc = queue:pop()
      while rpc.func ~= 'notifyDeploy' do
         joe[rpc.func](unpack(rpc.arg))
         rpc = queue:pop()
      end
      print('rpc = notifyDeploy, thread = '..rpc.arg[1])
   end
end

-- Write records to file
function joe.writeRecord()
   for code, item in pairs(joe.record) do
      local chunk = hash.hash(code, joe.SEED, #joe.fds) + 1
      joe.fds[chunk]:write(
         '"', code, '","', item[1]:gsub('\n', '\\n'):gsub('"', '""'), '","',
         item[2], '","', item[3], '"\n')
   end
   joe.record = tds.Hash()
   collectgarbage()
end

-- Thread job
function joe.threadJob(data, num_grams, queue, mutex_id, progress, thread_id)
   local utf8str = joe.utf8str()
   return function()
      local math = require('math')
      local string = require('string')
      local threads = require('threads')
      local mutex = threads.Mutex(mutex_id)

      -- Notify the deployment
      queue:push{func = 'notifyDeploy', arg = {__threadid}}

      local code, code_value = data.code, data.code_value
      local class, item

      -- Obtain next sample
      local function nextSample()
         mutex:lock()
         if code[progress[1]] == nil then
            class = progress[1]
            item = progress[2]
         elseif code[progress[1]]:size(1) < progress[2] + 1 then
            progress[1] = progress[1] + 1
            progress[2] = 1
            class = progress[1]
            item = progress[2]
         else
            progress[2] = progress[2] + 1
            class = progress[1]
            item = progress[2]
         end
         mutex:unlock()
      end

      local n = 0
      nextSample()
      while code[class] ~= nil do
         n = n + 1
         if math.fmod(n, 100) == 0 then
            queue:push{
               func = 'print',
               arg = {__threadid,
                      'Processing class '..class..', item '..item..
                         ', total '..n}}
            collectgarbage()
         end
         local term_count, doc_count = {}, {}
         -- Iterate through the fields
         for i = 1, code[class][item]:size(1) do
            -- Iterate through the grams
            for j = 1, num_grams do
               -- Iterate through the positions
               for k = 1, code[class][item][i][2] - j + 1 do
                  local code_string = tostring(
                     code_value[code[class][item][i][1] + k - 1])
                  for l = 2, j do
                     code_string = code_string..' '..tostring(
                        code_value[code[class][item][i][1] + k - 1 + l - 1])
                  end
                  if not term_count[code_string] then
                     term_count[code_string] = 1
                     doc_count[code_string] = 1
                  else
                     term_count[code_string] = term_count[code_string] + 1
                  end
               end
            end
         end
         -- Compress record to data
         local items = {}
         for code_string, _ in pairs(term_count) do
            local gram_string = ''
            for value in code_string:gmatch('[%S]+') do
               local value = tonumber(value)
               gram_string = gram_string..
                  ((value <= 65536 and (value > 32 or value == 11)) and
                      utf8str(value - 1) or ' ')
            end
            items[#items + 1] = {
               code_string, gram_string, term_count[code_string],
               doc_count[code_string]}
         end
         -- Send data to record
         queue:push{func = 'recordItem', arg = {__threadid, items}}
         nextSample()
      end

      -- Notify main thread that this thread has ended
      queue:push{func = 'notifyExit', arg = {__threadid}}
   end
end

-- Record item
function joe.recordItem(thread_id, items)
   for _, item in pairs(items) do
      if joe.record[item[1]] then
         joe.record[item[1]][2] = joe.record[item[1]][2] + item[3]
         joe.record[item[1]][3] = joe.record[item[1]][3] + item[4]
      else
         joe.record[item[1]] = tds.Vec{item[2], item[3], item[4]}
      end
   end
   joe.count = joe.count + 1

   -- Check write
   if math.fmod(joe.count, joe.batch) == 0 then
      print('Writing records to files at '..joe.count)
      joe.writeRecord()
   end
end


-- Print information
function joe.print(thread_id, message)
   print('rpc = print, thread = '..thread_id..', message = '..message)
end

-- Notify exit
function joe.notifyExit(thread_id)
   joe.finished = joe.finished + 1
   print('rpc = notifyExit, thread = '..thread_id..
            ', finished = '..joe.finished)
end

-- UTF-8 encoding function
-- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol
--      -in-lua
function joe.utf8str()
   local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
   return function (decimal)
      local string = require('string')
      if decimal < 128 then return string.char(decimal) end
      local charbytes = {}
      for bytes,vals in ipairs(bytemarkers) do
         if decimal <= vals[1] then
            for b = bytes + 1, 2, -1 do
               local mod = decimal % 64
               decimal = (decimal - mod) / 64
               charbytes[b] = string.char(128+mod)
            end
            charbytes[1] = string.char(vals[2] + decimal)
            break
         end
      end
      return table.concat(charbytes)
   end
end

joe.main()
return joe


================================================
FILE: data/dianping/count_wordgram.lua
================================================
--[[
Parallelized wordgram counting program
Copyright Xiang Zhang 2016

Usage: th count_wordgram.lua [input] [output_prefix] [list] [grams] [chunks]
   [threads] [batch] [buffer]

Comment: This program is a map-reduce like process. During map, each sample is
   separated into character-ngrams. During reduce, these character-ngrams are
   aggregated per-batch samples and output to file chunks. Which files chunk to
   put the gram is determined by a hash value of the gram string, therefore
   instances of the same gram always end up in the same file. This program is
   necessary because a linear aggregation program can easily overflow memory for
   several millions of samples.
--]]

local hash = require('hash')
local io = require('io')
local math = require('math')
local tds = require('tds')
local threads = require('threads')
local torch = require('torch')

local Queue = require('queue')

-- Library configurations
threads.serialization('threads.sharedserialize')

-- A Logic Named Joe
local joe = {}

-- Constant values
joe.SEED = 0

-- Main program entry
function joe.main()
   local input = arg[1] or '../data/dianping/train_word.t7b'
   local output_prefix = arg[2] or '../data/dianping/train_wordgram_count/'
   local list = arg[3] or '../data/dianping/train_word_list.csv'
   local num_grams = arg[4] and tonumber(arg[4]) or 5
   local chunks = arg[5] and tonumber(arg[5]) or 100
   local num_threads = arg[6] and tonumber(arg[6]) or 10
   local batch = arg[7] and tonumber(arg[7]) or 100000
   local buffer = arg[8] and tonumber(arg[8]) or 1000

   print('Loading data from '..input)
   local data = torch.load(input)
   print('Loading list from '..list)
   local freq, word_list = joe.readList(list)
   print('Opening output files with prefix '..output_prefix)
   local fds = {}
   for i = 1, chunks do
      fds[i] = io.open(output_prefix..tostring(i)..'.csv', 'w')
   end
   joe.fds = fds
   print('Setting finished threads to 0')
   joe.finished = 0
   print('Creating record')
   joe.record = tds.Hash()
   print('Setting item counter to 0')
   joe.count = 0
   print('Storing options')
   joe.batch = batch

   print('Creating queues')
   local queue = Queue(buffer)
   print('Creating mutex')
   local mutex = threads.Mutex()
   print('Creating '..num_threads..' threads')
   local init_thread = joe.initThread()
   local block = threads.Threads(num_threads, init_thread)
   block:specific(true)
   print('Deploying thread jobs')
   joe.deployThreads(
      data, word_list, num_grams, queue, mutex, block, num_threads)

   print('Entering main thread loop')
   while joe.finished < num_threads do
      local rpc = queue:pop()
      joe[rpc.func](unpack(rpc.arg))
   end
   if math.fmod(joe.count, batch) ~= 0 then
      print('Writing records to files at '..joe.count)
      joe.writeRecord()
   end

   print('Destroying mutex')
   mutex:free()
   print('Closing files')
   for _, fd in ipairs(fds) do
      fd:close()
   end

   print('Synchronizing and terminating the threads')
   block:synchronize()
   block:terminate()
end

-- Thread initialization callback
function joe.initThread()
   return function ()
      local torch = require('torch')
      local Queue = require('queue')
   end
end

-- Thread job deploying threads
function joe.deployThreads(
      data, word_list, num_grams, queue, mutex, block, num_threads)
   local progress = torch.LongTensor(2)
   progress[1] = 1
   progress[2] = 0
   for i = 1, num_threads do
      print('Deploying job for thread '..i)
      local thread_job = joe.threadJob(
         data, word_list, num_grams, queue, mutex:id(), progress, i)
      block:addjob(i, thread_job)
      local rpc = queue:pop()
      while rpc.func ~= 'notifyDeploy' do
         joe[rpc.func](unpack(rpc.arg))
         rpc = queue:pop()
      end
      print('rpc = notifyDeploy, thread = '..rpc.arg[1])
   end
end

-- Write records to file
function joe.writeRecord()
   for code, item in pairs(joe.record) do
      local chunk = hash.hash(code, joe.SEED, #joe.fds) + 1
      joe.fds[chunk]:write(
         '"', code, '","', item[1]:gsub('\n', '\\n'):gsub('"', '""'), '","',
         item[2], '","', item[3], '"\n')
   end
   joe.record = tds.Hash()
   collectgarbage()
end

-- Thread job
function joe.threadJob(
      data, word_list, num_grams, queue, mutex_id, progress, thread_id)
   local utf8str = joe.utf8str()
   return function()
      local math = require('math')
      local string = require('string')
      local threads = require('threads')
      local mutex = threads.Mutex(mutex_id)

      -- Notify the deployment
      queue:push{func = 'notifyDeploy', arg = {__threadid}}

      local code, code_value = data.code, data.code_value
      local class, item

      -- Obtain next sample
      local function nextSample()
         mutex:lock()
         if code[progress[1]] == nil then
            class = progress[1]
            item = progress[2]
         elseif code[progress[1]]:size(1) < progress[2] + 1 then
            progress[1] = progress[1] + 1
            progress[2] = 1
            class = progress[1]
            item = progress[2]
         else
            progress[2] = progress[2] + 1
            class = progress[1]
            item = progress[2]
         end
         mutex:unlock()
      end

      local n = 0
      nextSample()
      while code[class] ~= nil do
         n = n + 1
         if math.fmod(n, 100) == 0 then
            queue:push{
               func = 'print',
               arg = {__threadid,
                      'Processing class '..class..', item '..item..
                         ', total '..n}}
            collectgarbage()
         end
         local term_count, doc_count = {}, {}
         -- Iterate through the fields
         for i = 1, code[class][item]:size(1) do
            -- Iterate through the grams
            for j = 1, num_grams do
               -- Iterate through the positions
               for k = 1, code[class][item][i][2] - j + 1 do
                  local code_string = tostring(
                     code_value[code[class][item][i][1] + k - 1])
                  for l = 2, j do
                     code_string = code_string..' '..tostring(
                        code_value[code[class][item][i][1] + k - 1 + l - 1])
                  end
                  if not term_count[code_string] then
                     term_count[code_string] = 1
                     doc_count[code_string] = 1
                  else
                     term_count[code_string] = term_count[code_string] + 1
                  end
               end
            end
         end
         -- Compress record to data
         local items = {}
         for code_string, _ in pairs(term_count) do
            local gram_string = ''
            for value in code_string:gmatch('[%S]+') do
               local value = tonumber(value)
               gram_string = gram_string..' '..(word_list[value] or '')
            end
            items[#items + 1] = {
               code_string, gram_string, term_count[code_string],
               doc_count[code_string]}
         end
         -- Send data to record
         queue:push{func = 'recordItem', arg = {__threadid, items}}
         nextSample()
      end

      -- Notify main thread that this thread has ended
      queue:push{func = 'notifyExit', arg = {__threadid}}
   end
end

-- Record item
function joe.recordItem(thread_id, items)
   for _, item in pairs(items) do
      if joe.record[item[1]] then
         joe.record[item[1]][2] = joe.record[item[1]][2] + item[3]
         joe.record[item[1]][3] = joe.record[item[1]][3] + item[4]
      else
         joe.record[item[1]] = tds.Vec{item[2], item[3], item[4]}
      end
   end
   joe.count = joe.count + 1

   -- Check write
   if math.fmod(joe.count, joe.batch) == 0 then
      print('Writing records to files at '..joe.count)
      joe.writeRecord()
   end
end


-- Print information
function joe.print(thread_id, message)
   print('rpc = print, thread = '..thread_id..', message = '..message)
end

-- Notify exit
function joe.notifyExit(thread_id)
   joe.finished = joe.finished + 1
   print('rpc = notifyExit, thread = '..thread_id..
            ', finished = '..joe.finished)
end

-- UTF-8 encoding function
-- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol
--      -in-lua
function joe.utf8str()
   local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}}
   return function (decimal)
      local string = require('string')
      if decimal < 128 then return string.char(decimal) end
      local charbytes = {}
      for bytes,vals in ipairs(bytemarkers) do
         if decimal <= vals[1] then
            for b = bytes + 1, 2, -1 do
               local mod = decimal % 64
               decimal = (decimal - mod) / 64
               charbytes[b] = string.char(128+mod)
            end
            charbytes[1] = string.char(vals[2] + decimal)
            break
         end
      end
      return table.concat(charbytes)
   end
end

function joe.readList(list)
   local freq = {}
   local word_list = tds.Hash()
   local fd = io.open(list)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[2] = content[2]:gsub('\\n', '\n')
      freq[#freq + 1] = tonumber(content[3])
      word_list[#freq] = content[1]:gsub('\\n', '\n')
   end
   return torch.Tensor(freq), word_list
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/limit_code.lua
================================================
--[[
Limit the maximum code value
Copyright 2016 Xiang Zhang

Usage: th limit_code.lua [input] [output] [limit]
--]]

local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_word.t7b'
   local output = arg[2] or '../data/dianping/train_word_limit.t7b'
   local limit = arg[3] and tonumber(arg[3]) or 200000

   print('Loading data from '..input)
   local data = torch.load(input)

   print('Limiting code to '..limit)
   local code = joe.limitCode(data, limit)

   print('Saving to '..output)
   torch.save(output, code)
end

function joe.limitCode(data, limit)
   local code, code_value = data.code, data.code_value
   local preserve = code_value:le(limit):long()
   local replace = code_value:gt(limit):long()
   code_value:cmul(preserve):add(replace:mul(limit + 1))
   return {code = code, code_value = code_value}
end

joe.main()
return joe


================================================
FILE: data/dianping/limit_csvlines.sh
================================================
#!/bin/bash

# Limit csv files to designated number of lines
# Copyright 2015 Xiang Zhang
#
# Usage: bash limit_csvlines.sh [input] [output] [limit]

set -x;
set -e;

head -n ${3:-1000001} $1 > $2;


================================================
FILE: data/dianping/queue.lua
================================================
--[[
Multithreaded queue based on tds
Copyright 2015 Xiang Zhang
--]]

local class = require('pl.class')
local ffi = require('ffi')
local serialize = require('threads.sharedserialize')
local tds = require('tds')
local threads = require('threads')
local torch = require('torch')

-- Append an underscore to distinguish between metatable and class name
local Queue_ = torch.class('Queue')

-- Constructor
-- n: buffer size
function Queue_:__init(size)
   self.data = tds.hash()
   self.pointer = torch.LongTensor(3):fill(1)
   self.pointer[3] = 0
   self.size = size or 10
   self.mutex = threads.Mutex()
   self.added_condition = threads.Condition()
   self.removed_condition = threads.Condition()
end

function Queue_:push(item)
   local storage = serialize.save(item)
   self.mutex:lock()
   while self.pointer[3] == self.size do
      self.removed_condition:wait(self.mutex)
   end
   self.data[self.pointer[1]] = storage:string()
   self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1
   self.pointer[3] = self.pointer[3] + 1
   self.mutex:unlock()
   self.added_condition:signal()
end

function Queue_:pop()
   self.mutex:lock()
   while self.pointer[3] == 0 do
      self.added_condition:wait(self.mutex)
   end
   local storage = torch.CharStorage():string(self.data[self.pointer[2]])
   self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1
   self.pointer[3] = self.pointer[3] - 1
   self.mutex:unlock()
   self.removed_condition:signal()
   local item = serialize.load(storage)
   return item
end

function Queue_:push_async(item)
   if self.pointer[3] == self.size then
      return
   end
   local storage = serialize.save(item)
   self.mutex:lock()
   if self.pointer[3] == self.size then
      self.mutex:unlock()
      return
   end
   self.data[self.pointer[1]] = storage:string()
   self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1
   self.pointer[3] = self.pointer[3] + 1
   self.mutex:unlock()
   self.added_condition:signal()
   return item
end

function Queue_:pop_async()
   if self.pointer[3] == 0 then
      return
   end
   self.mutex:lock()
   if self.pointer[3] == 0 then
      self.mutex:unlock()
      return
   end
   local storage = torch.CharStorage():string(self.data[self.pointer[2]])
   self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1
   self.pointer[3] = self.pointer[3] - 1
   self.mutex:unlock()
   self.removed_condition:signal()
   local item = serialize.load(storage)
   return item
end

function Queue_:free()
   self.mutex:free()
   self.added_condition:free()
   self.removed_condition:free()
end

function Queue_:__write(f)
   local data = self.data
   f:writeLong(torch.pointer(data))
   tds.C.tds_hash_retain(data)

   local pointer = self.pointer
   f:writeLong(torch.pointer(pointer))
   pointer:retain()

   f:writeObject(self.size)
   f:writeObject(self.mutex:id())
   f:writeObject(self.added_condition:id())
   f:writeObject(self.removed_condition:id())
end

function Queue_:__read(f)
   local data = f:readLong()
   data = ffi.cast('tds_hash&', data)
   ffi.gc(data, tds.C.tds_hash_free)
   self.data = data

   local pointer = f:readLong()
   pointer = torch.pushudata(pointer, 'torch.LongTensor')
   self.pointer = pointer
   
   self.size = f:readObject()
   self.mutex = threads.Mutex(f:readObject())
   self.added_condition = threads.Condition(f:readObject())
   self.removed_condition = threads.Condition(f:readObject())
end

-- Return class name, not the underscored metatable
return Queue


================================================
FILE: data/dianping/remove_duplication.py
================================================
#!/usr/bin/python3

'''
Remove duplication from csv format file
Copyright 2015 Xiang Zhang

Usage: python3 remove_duplication.py -i [input] -o [output]
'''

# Python 3 compatibility
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

# Input file
INPUT = '../data/dianping/reviews_nonull.csv'
# Output file
OUTPUT = '../data/dianping/reviews_nodup.csv'

import argparse
import csv

# Main program
def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    removeDuplicate()

# Deduplicate the text using python set
def removeDuplicate():
    # Open the files
    ifd = open(INPUT, newline = '', encoding = 'utf-8')
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    valid = 0
    s = set()
    for row in reader:
        line = ' '.join(row[1:])
        n = n + 1
        if line not in s:
            valid = valid + 1
            s.add(line)
            writer.writerow(row)
        if n % 10000 == 0:
            print('\rProcessing line: {}, valid: {}'.format(n, valid), end = '')
    print('\rProcessed lines: {}, valid: {}'.format(n, valid))

if __name__ == '__main__':
    main()


================================================
FILE: data/dianping/remove_null.sh
================================================
#!/bin/bash

# Remove NULL character from file
# Copyright 2015 Xiang Zhang
#
# Usage: bash remove_null.sh [input] [output]

set -x;
set -e;

tr -d '\000' < $1 > $2;


================================================
FILE: data/dianping/segment_roman_word.lua
================================================
--[[
Create romananized word data from romanized data in csv
Copyright 2016 Xiang Zhang

Usage: th segment_roman_word.lua [input] [output] [list] [read]
--]]

local ffi = require('ffi')
local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/dianping/train_pinyin.csv'
   local output = arg[2] or '../data/dianping/train_pinyin_word.csv'
   local list = arg[3] or '../data/dianping/train_pinyin_word_list.csv'
   local read = (arg[4] == 'true')

   local word_index, word_total
   if read then
      print('Reading word index')
      word_index, word_total = joe.readWords(list)
   else
      print('Counting words')
      local word_count, word_freq = joe.splitWords(input)
      print('Sorting words by count')
      word_index, word_total = joe.sortWords(list, word_count, word_freq)
   end

   print('Constructing word index output')
   joe.constructWords(input, output, word_index, word_total)
end

function joe.readWords(list)
   local word_index = tds.Hash()
   local fd = io.open(list)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: '..n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      content[1] = content[1]:gsub('\\n', '\n')
      word_index[content[1]] = n
   end
   print('\rProcessed lines: '..n)
   fd:close()
   return word_index, n
end

function joe.splitWords(input)
   local word_count, word_freq = tds.Hash(), tds.Hash()
   local fd = io.open(input)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      field_set = {}
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         content[i] = content[i]:gsub('(%p)', ' %1 ')
         for word in content[i]:gmatch('[%S]+') do
            word_count[word] = (word_count[word] or 0) + 1
            if not field_set[word] then
               field_set[word] = true
               word_freq[word] = (word_freq[word] or 0) + 1
            end
         end
      end
   end
   print('\rProcessed lines: '..n)
   fd:close()

   -- Normalizing word frequencies
   for key, value in pairs(word_freq) do
      word_freq[key] = value / n
   end

   return word_count, word_freq
end

function joe.sortWords(list, word_count, word_freq)
   -- Sort the list of words
   local word_list = tds.Vec()
   for word, _ in pairs(word_count) do
      word_list[#word_list + 1] = word
   end
   word_list:sort(function (w, v) return word_count[w] > word_count[v] end)

   -- Create the word index
   local word_index = tds.Hash()
   for index, word in ipairs(word_list) do
      word_index[word] = index
   end

   -- Write it to file
   local fd = io.open(list, 'w')
   for index, word in ipairs(word_list) do
      fd:write('"', word:gsub("\n", "\\n"):gsub("\"", "\"\""), '","',
               word_count[word], '","', word_freq[word], '"\n')
   end
   fd:close()

   return word_index, #word_list
end

function joe.constructWords(input, output, word_index, word_total)
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')
   local n = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)

      ofd:write('"', content[1], '"')
      for i = 2, #content do
         content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1")
         content[i] = content[i]:gsub('(%p)', ' %1 ')
         local first_write = true
         ofd:write(',"')
         for word in content[i]:gmatch('[%S]+') do
            local index = word_index[word] or word_total + 1
            if first_write then
               first_write = false
               ofd:write(index)
            else
               ofd:write(' ', index)
            end
         end
         ofd:write('"')
      end

      ofd:write('\n')
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/segment_word.py
================================================
#!/usr/bin/python3

'''
Convert Chinese datasets to Index of Words
Copyright 2016 Xiang Zhang

Usage: python3 segment_word.py -i [input] -l [list] -o [output] [-r]
'''

#Input file
INPUT = '../data/dianping/train.csv'
#Output file
OUTPUT = '../data/dianping/train_word.csv'
# List file
LIST = '../data/dianping/train_word_list.csv'
# Read already defined word list
READ = False

import argparse
import csv
import jieba

# Main program
def main():
    global INPUT
    global OUTPUT
    global LIST

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)
    parser.add_argument('-l', '--list', help = 'Word list file', default = LIST)
    parser.add_argument(
        '-r', '--read', help = 'Read from list file', action = 'store_true')

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output
    LIST = args.list
    READ = args.read

    if READ:
        print('Reading word index')
        word_index = readWords()
    else:
        print('Counting words')
        word_count, word_freq = segmentWords()
        print('Sorting words by count')
        word_index = sortWords(word_count, word_freq)
    print('Constructing word index output')
    convertWords(word_index)

# Read from pre-existing word list
def readWords():
    # Open the files
    ifd = open(LIST, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_index = dict()
    n = 0
    for row in reader:
        word = row[0].replace('\\n', '\n')
        word_index[word] = n + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    return word_index

# Segment the text in Chinese
def segmentWords():
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_count = dict()
    word_freq = dict()
    n = 0
    for row in reader:
        field_set = set()
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = jieba.cut(field)
            for word in field_list:
                word_count[word] = word_count.get(word, 0) + 1
                if word not in field_set:
                    field_set.add(word)
                    word_freq[word] = word_freq.get(word, 0) + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    # Normalizing word frequency
    for word in word_freq:
        word_freq[word] = float(word_freq[word]) / float(n)
    return word_count, word_freq

# Sort words for a given count dictionary object
def sortWords(word_count, word_freq):
    # Sort the words
    word_list = sorted(
        word_count, key = lambda word: word_count[word], reverse = True)
    # Open the files
    ofd = open(LIST, 'w', encoding = 'utf-8', newline = '')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over all the words
    word_index = dict()
    n = 0
    for i in range(len(word_list)):
        word = word_list[i]
        row = [word.replace('\n', '\\n'), str(word_count[word]),
               str(word_freq[word])]
        writer.writerow(row)
        word_index[word] = i + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing word: {}'.format(n), end = '')
    print('\rProcessed words: {}'.format(n))
    ofd.close()
    return word_index

# Convert the text in Chinese to word list
def convertWords(word_index):
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = jieba.cut(field)
            new_row.append(' '.join(map(
                str, map(lambda word: word_index.get(word, len(word_index) + 1),
                         field_list))))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/dianping/select_data.lua
================================================
--[[
Select data from non-duplicate datasets
Copyright 2015 Xiang Zhang

Usage: th select_data.lua [count] [input] [output]
--]]

local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local count = arg[1] or '../data/dianping/reviews_count.csv'
   local input = arg[2] or '../data/dianping/reviews_nodup.csv'
   local output = arg[3] or '../data/dianping/data.csv'

   local map = {}
   local index = {}
   local cfd = io.open(count)
   for line in cfd:lines() do
      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])
      local target = tonumber(content[2])
      local total = tonumber(content[3])
      local choose = tonumber(content[4])

      print('Constructing index '..class..'>'..target..': '..choose..'/'..total)
      map[class] = target
      index[class] = torch.ByteTensor(total):fill(1)
      local perm = torch.randperm(total)
      for i = 1, total - choose do
         index[class][perm[i]] = 0
      end
   end
   cfd:close()

   local n = 0
   local progress = {}
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 100000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])
      local target = map[class]

      progress[class] = progress[class] and progress[class] + 1 or 1
      if index[class] and index[class][progress[class]] == 1 then
         ofd:write(
            '"', target, '"', (line:sub(content[1]:len() + 3) or ''), '\n')
      end
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine (line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/dianping/shuffle_lines.sh
================================================
#!/bin/bash

# Shuffle lines in a text file
# Copyright 2017 Xiang Zhang
#
# Usage: bash shuffle_lines.sh [input] [output]

set -x;
set -e;

shuf $1 > $2;


================================================
FILE: data/dianping/sort_gram_count.sh
================================================
#!/bin/bash

# Sort distributed grams file
# Copyright 2016 Xiang Zhang
#
# Usage: bash sort_gram_count.sh [input_directory] [output_directory] [temporary] [memory]

set -x;
set -e;

for file in $1/*.csv; do
    sort -S ${4:-50%} -t ',' -k1,1 -T ${3:-/scratch} $file > $2/`basename $file`
done;


================================================
FILE: data/dianping/sort_gram_list.sh
================================================
#!/bin/bash

# Sort list of grams and cut the count
# Copyright 2016 Xiang Zhang
#
# Usage: bash sort_gram_list.sh [input] [output] [temporary] [memory]

set -x;
set -e;

sort -S ${4:-50%} -t ',' -k1,1nr -T ${3:-/scratch} $1 | cut -f 2- -d ',' > $2;


================================================
FILE: data/dianping/split_lines.sh
================================================
#!/bin/bash

# Split lines in a text file
# Copyright 2017 Xiang Zhang
#
# Usage: bash split_lines.sh [lines] [input] [output_prefix]
#
# Note: .txt postfix will be automatically added.

set -x;
set -e;

split -d -a 1 --additional-suffix=.txt -l $1 $2 $3;


================================================
FILE: data/dianping/split_train.lua
================================================
--[[
Split data into training and testing subsets
Copyright 2015 Xiang Zhang

Usage: th split_train [count] [input] [train] [test]
--]]

local io = require('io')
local math = require('math')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local count = arg[1] or '../data/dianping/data_count.csv'
   local input = arg[2] or '../data/dianping/data.csv'
   local train = arg[3] or '../data/dianping/train.csv'
   local test = arg[4] or '../data/dianping/test.csv'

   local index = {}
   local cfd = io.open(count)
   for line in cfd:lines() do
      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])
      local total = tonumber(content[2])
      local train_count = tonumber(content[3])
      local test_count = tonumber(content[4])

      print('Constructing index '..class..': '..train_count..
               ','..test_count..','..total)
      index[class] = torch.ByteTensor(total):zero()
      local perm = torch.randperm(total)
      for i = 1, test_count do
         index[class][perm[i]] = 1
      end
   end
   cfd:close()

   local n = 0
   local progress = {}
   local ifd = io.open(input)
   local trfd = io.open(train, 'w')
   local tefd = io.open(test, 'w')
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 100000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])

      progress[class] = progress[class] and progress[class] + 1 or 1
      if index[class] and index[class][progress[class]] == 0 then
         trfd:write(line, '\n')
      else
         tefd:write(line, '\n')
      end
   end
   print('\rProcessed lines: '..n)
   ifd:close()
   trfd:close()
   tefd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine (line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/ifeng/construct_topic.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of news articles
Copyright 2016 Xiang Zhang

Usage: python3 construct_topic.py -i [input directory] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/ifeng/article'
OUTPUT = '../data/ifeng/topic/news.csv'

# Classes
# 1: Mainlaind China Politics
# 2: International
# 3: Taiwan, Hong Kong and Macau Politics
# 4: Military
# 5: Society
CLASSES = {'11528': 1, '11574': 2, '11490': 3, '7609': 3, '4550': 4, '7837': 5}

def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file pattern', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    createData()

def createData():
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    for prefix in CLASSES:
        files = glob.glob(INPUT + '/' + prefix + '_*.json.xz')
        index = CLASSES[prefix]
        n = 0
        filecount = 0
        for filename in files:
            filecount = filecount + 1
            print('Processing file {}/{}: {}. Processed items {}.'.format(
                    filecount, len(files), filename, n))
            try:
                ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
                for line in ifd:
                    news = json.loads(line)
                    title = news.get('title', '')
                    content = news.get('content', list())
                    abstract = ''
                    if len(content) > 0:
                        abstract = content[0]
                    n = n + 1
                    writer.writerow([index, title.replace('\n', '\\n'),
                                     abstract.replace('\n', '\\n')])
                ifd.close()
            except Exception as e:
                print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/jd/count_data.lua
================================================
--[[
Count data for each class and length
Copyright 2016 Xiang Zhang

Usage: th count_data.lua [input] [output]
--]]

local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/jd/sentiment/comment_sorted_nonull.csv'
   local output = arg[2] or '../data/jd/sentiment/comment_sorted_count.t7b'

   print('Counting data')
   local count = joe.count(input)
   joe.count = count
   print('Saving to '..output)
   torch.save(output, count)
   print('Plotting result')
   joe.plot(count)
end

function joe.count(input)
   local count = {}
   local max_class = 0
   local max_length = 0
   local fd = io.open(input)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 100000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])
      local length = 0
      for i = 2, #content do
         length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len()
      end
      count[class] = count[class] or {}
      count[class][length] = (count[class][length] or 0) + 1

      if class > max_class then
         max_class = class
      end
      if length > max_length then
         max_length = length
      end
   end
   print('\rProcessed lines: '..n)
   print('total classes = '..max_class..', maximum length = '..max_length)
   fd:close()

   local result = torch.Tensor(max_class, max_length):zero()
   for class, class_count in pairs(count) do
      if class > 0 then
         for length, length_count in pairs(class_count) do
            if length > 0 then
               result[class][length] = length_count
            end
         end
      end
   end

   return result
end

function joe.plot(count)
   require('gnuplot')
   local cumulated = count:cumsum(2)
   local plots = {}
   for class = 1, cumulated:size(1) do
      plots[class] = {tostring(class), cumulated[class], '-'}
   end
   local figure = gnuplot.figure()
   gnuplot.plot(unpack(plots))
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine (line,sep)
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/jd/create_comment.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of comments
Copyright 2016 Xiang Zhang

Usage: python3 create_data.py -i [input file pattern] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/jd/comment/*.json.xz'
OUTPUT = '../data/jd/sentiment/comment.csv'

def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file pattern', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    createData()

def createData():
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    files = glob.glob(INPUT)
    n = 0
    filecount = 0
    for filename in files:
        filecount = filecount + 1
        print('Processing file {}/{}: {}. Processed items {}.'.format(
                filecount, len(files), filename, n))
        try:
            ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
            for line in ifd:
                review = json.loads(line)
                score = int(review['content'].get('score', -1))
                title = review['content'].get('title', '')
                content = review['content'].get('content', '')
                if score != -1:
                    n = n + 1
                    writer.writerow([score, title.replace('\n', '\\n'),
                                     content.replace('\n', '\\n')])
            ifd.close()
        except Exception as e:
            print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/jd/limit_length.lua
================================================
--[[
Limit length for data
Copyright 2016 Xiang Zhang

Usage: th limit_length.lua [input] [output] [min] [max]
--]]

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/jd/sentiment/comment_sorted_nonull.csv'
   local output = arg[2] or '../data/jd/sentiment/comment_sorted_limited.csv'
   local min = tonumber(arg[3] or 0)
   local max = tonumber(arg[4] or math.huge)

   print('Limiting data')
   joe.limit(input, output, min, max)
end

function joe.limit(input, output, min, max)
   local ifd = io.open(input)
   local ofd = io.open(output, 'w')
   local n = 0
   local m = 0
   for line in ifd:lines() do
      n = n + 1

      local content = joe.parseCSVLine(line)
      local length = 0
      for i = 2, #content do
         length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len()
      end

      if length >= min and length <= max then
         m = m + 1
         ofd:write(line, '\n')
      end

      if math.fmod(n, 100000) == 0 then
         io.write('\rProcessing line: ', n, ', Saved lines: ', m)
         io.flush()
      end
   end
   print('\rProcessed lines: '..n..', Saved lines: '..m)
   ifd:close()
   ofd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine (line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/jd/sort_data.sh
================================================
#!/bin/bash

# Sort comma-separated file starting from the second field
# Copyright 2016 Xiang Zhang
#
# Usage: bash sort_data.sh [input_file] [output_file] [temporary] [memory]

set -x;
set -e;

sort -S ${4:-50%} -t ',' -k2 -u -T ${3:-/scratch} $1 > $2;


================================================
FILE: data/joint/combine_word.lua
================================================
--[[
Combine two word data together
Copyright 2016 Xiang Zhang

Usage: th combine_word_list.lua [input_1] [list_1] [input_2] [list_2] ...
   [output] [list]
--]]

local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = {}
   local input_list = {}
   for i = 1, math.floor(#arg / 2) - 1 do
      input[i] = arg[2 * i - 1]
      input_list[i] = arg[2 * i]
   end
   local output = arg[math.floor(#arg / 2) * 2 - 1] or
      '../data/joint/binary_train_word.csv'
   local output_list = arg[math.floor(#arg / 2) * 2] or
      '../data/joint/binary_train_word_list.csv'

   print('Loading output list from '..output_list)
   local list, count, freq, dict = joe.readList(output_list)
   print('Opening output file '..output)
   local ofd = io.open(output, 'w')

   for i = 1, #input do
      print('Loading input list from '..input_list[i])
      local local_list, local_count, local_freq, local_dict =
         joe.readList(input_list[i])
      print('Building input to output map')
      local map = joe.buildMap(local_list, dict)
      print('Processing data from '..input[i])
      joe.processInput(input[i], map, ofd, list)
   end

   print('Closing output file '..output)
   ofd:close()
end

function joe.readList(file)
   local list = tds.Vec()
   local count = tds.Vec()
   local freq = tds.Vec()
   local dict = tds.Hash()
   local fd = io.open(file)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[1] = content[1]:gsub('\\n', '\n')
      list:insert(content[1])
      count:insert(tonumber(content[2]))
      freq:insert(tonumber(content[3]))
      dict[content[1]] = #list
   end
   fd:close()
   return list, count, freq, dict
end

function joe.buildMap(input_list, dict)
   local map = tds.Vec()
   for i = 1, #input_list do
      map[i] = dict[input_list[i]]
   end
   return map
end

function joe.processInput(input, map, ofd, list)
   local ifd = io.open(input)
   local n = 0
   for line in ifd:lines() do
      n = n + 1
      if math.fmod(n, 10000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      -- Write class
      local content = joe.parseCSVLine(line)
      ofd:write('"', content[1], '"')

      -- Write title and comment
      for i = 2, #content do
         ofd:write(',"')
         for word in content[i]:gmatch('%d+') do
            ofd:write(map[tonumber(word)] or #list + 1, ' ')
         end
         ofd:write('"')
      end

      -- Write end of line
      ofd:write('\n')
   end
   print('\rProcessed lines: '..n)
   ifd:close()
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/joint/combine_word_list.lua
================================================
--[[
Combine two word data together
Copyright 2016 Xiang Zhang

Usage: th combine_word_list.lua [list_1] [size_1] [list_2] [size_2] ... [output]
--]]

local io = require('io')
local math = require('math')
local tds = require('tds')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input_list = {}
   local input_size = {}
   for i = 1, math.floor(#arg / 2) do
      input_list[i] = arg[2 * i - 1]
      input_size[i] = arg[2 * i]
   end
   local output_list = arg[math.floor(#arg / 2) * 2 + 1] or
      '../data/joint/binary_train_word_list.csv'

   local word = {}
   for i = 1, #input_list do
      print('Loading list from '..input_list[i])
      local list, count, freq, dict = joe.readInputList(input_list[i])
      word[i] = {list = list, count = count, freq = freq, dict = dict}
   end
   print('Merging word lists')
   local list, count_table, freq_table, dict =
      joe.mergeWords(word, input_size)
   print('Writing merged word list to '..output_list)
   joe.writeOutputList(output_list, list, count_table, freq_table, dict)
end

function joe.readInputList(file)
   local list = tds.Vec()
   local count = tds.Vec()
   local freq = tds.Vec()
   local dict = tds.Hash()
   local fd = io.open(file)
   for line in fd:lines() do
      local content = joe.parseCSVLine(line)
      content[1] = content[1]:gsub('\\n', '\n')
      list:insert(content[1])
      count:insert(tonumber(content[2]))
      freq:insert(tonumber(content[3]))
      dict[content[1]] = #list
   end
   fd:close()
   return list, count, freq, dict
end

function joe.writeOutputList(file, list, count_table, freq_table, dict)
   local fd = io.open(file, 'w')
   for index, word in ipairs(list) do
      fd:write('"', word:gsub('\n', '\\n'):gsub('"', '""'), '","',
               count_table[word], '","', freq_table[word], '"\n')
   end
   fd:close()
end

function joe.mergeWords(word, size)
   local total_size = 0
   for i, s in ipairs(size) do
      total_size = total_size + s
   end

   local list = tds.Vec()
   local count_table = tds.Hash()
   local freq_table = tds.Hash()
   for i, w in ipairs(word) do
      for j, v in ipairs(w.list) do
         if count_table[v] == nil then
            list:insert(v)
            count_table[v] = w.count[j]
            freq_table[v] = w.freq[j] * size[i] / total_size
         else
            count_table[v] = count_table[v] + w.count[j]
            freq_table[v] = freq_table[v] + w.freq[j] * size[i] / total_size
         end
         if math.fmod(j, 100000) == 0 then
            io.write('\rProcessing list ', i, ': ', j, '/', #w.list)
            io.flush()
         end
      end
      print('\rProcessed list '..i..': '..(#w.list)..'/'..(#w.list))
   end

   print('Sorting merged word list')
   list:sort(function(a, b) return count_table[a] > count_table[b] end)

   print('Constructing merged word dictionary')
   local dict = tds.Hash()
   for i, w in ipairs(list) do
      dict[w] = i
   end

   return list, count_table, freq_table, dict
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine(line,sep) 
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/nytimes/construct_topic.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of news articles
Copyright 2016 Xiang Zhang

Usage: python3 construct_topic.py -i [input directory] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma
import re
import urllib.parse

INPUT = '../data/nytimes/article'
OUTPUT = '../data/nytimes/topic/news.csv'
CLASS = '../data/nytimes/topic/class.csv'

def main():
    global INPUT
    global OUTPUT
    global CLASS

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file directory', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)
    parser.add_argument(
        '-c', '--classes', help = 'Class file', default = CLASS)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output
    CLASS = args.classes

    createData()

def createData():
    # Open the category file
    classes = dict()
    count = 0
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    files = glob.glob(INPUT + '/*.json.xz')
    n = 0
    filecount = 0
    for filename in files:
        filecount = filecount + 1
        print('Processing file {}/{}: {}. Processed items {}.'.format(
                filecount, len(files), filename, n))
        try:
            ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
            for line in ifd:
                news = json.loads(line)
                title = news.get('title', '')
                content = news.get('content', list())
                abstract = ''
                if len(content) > 0:
                    abstract = content[0]
                url = news.get('url', '')
                if url != '':
                    path = urllib.parse.urlparse(url).path
                    start_match = re.match(r'/\d\d\d\d/\d\d/\d\d/', path)
                    end_match = re.match(r'/\d\d\d\d/\d\d/\d\d/[^/]+', path)
                    if start_match != None and end_match != None:
                        classname = path[start_match.end():end_match.end()]
                        if classes.get(classname, None) == None:
                            classes[classname] = count + 1
                            count = count + 1
                        index = classes[classname]
                        writer.writerow([index, title.replace('\n', '\\n'),
                                         abstract.replace('\n', '\\n')])
                n = n + 1
            ifd.close()
        except Exception as e:
            print('Exception (ignored): {}'.format(e))
    ofd.close()
    # Open the class file
    cfd = open(CLASS, 'w', newline = '', encoding = 'utf-8')
    class_writer = csv.writer(
        cfd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    for key in classes:
        class_writer.writerow([classes[key], key])
    cfd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/nytimes/count_class.lua
================================================
--[[
Count data for each class and length
Copyright 2016 Xiang Zhang

Usage: th count_data.lua [input] [output]
--]]

local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or '../data/nytimes/topic/news_sorted.csv'
   local output = arg[2] or '../data/nytimes/topic/news_sorted_class.t7b'

   print('Counting data')
   local count = joe.count(input)
   joe.count = count
   print('Saving to '..output)
   torch.save(output, count)
end

function joe.count(input)
   local count = {}
   local fd = io.open(input)
   local n = 0
   for line in fd:lines() do
      n = n + 1
      if math.fmod(n, 100000) == 0 then
         io.write('\rProcessing line: ', n)
         io.flush()
      end

      local content = joe.parseCSVLine(line)
      local class = tonumber(content[1])
      local length = 0
      for i = 2, #content do
         length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len()
      end
      count[class] = (count[class] or 0) + 1
   end
   print('\rProcessed lines: '..n)
   fd:close()

   return count
end

-- Parsing csv line
-- Ref: http://lua-users.org/wiki/LuaCsv
function joe.parseCSVLine (line,sep)
   local res = {}
   local pos = 1
   sep = sep or ','
   while true do 
      local c = string.sub(line,pos,pos)
      if (c == "") then break end
      if (c == '"') then
         -- quoted value (ignore separator within)
         local txt = ""
         repeat
            local startp,endp = string.find(line,'^%b""',pos)
            txt = txt..string.sub(line,startp+1,endp-1)
            pos = endp + 1
            c = string.sub(line,pos,pos) 
            if (c == '"') then txt = txt..'"' end 
            -- check first char AFTER quoted string, if it is another
            -- quoted string without separator, then append it
            -- this is the way to "escape" the quote char in a quote.
         until (c ~= '"')
         table.insert(res,txt)
         assert(c == sep or c == "")
         pos = pos + 1
      else
         -- no quotes used, just look for the first separator
         local startp,endp = string.find(line,sep,pos)
         if (startp) then 
            table.insert(res,string.sub(line,pos,startp-1))
            pos = endp + 1
         else
            -- no separator found -> use rest of string and terminate
            table.insert(res,string.sub(line,pos))
            break
         end 
      end
   end
   return res
end

joe.main()
return joe


================================================
FILE: data/rakuten/construct_hepburn.py
================================================
#!/usr/bin/python3

'''
Convert Japanese datasets to Hepburn Romanization
Copyright 2016 Xiang Zhang

Usage: python3 construct_hepburn.py -i [input] -o [output]
'''

# Input file
INPUT = '../data/rakuten/sentiment/full_train.csv'
# Output file
OUTPUT = '../data/rakuten/sentiment/full_train_hepburn.csv'

import argparse
import csv
import MeCab
import romkan
import unidecode

# Main program
def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    mecab = MeCab.Tagger()

    convertRoman(mecab)

def romanizeText(mecab, text):
    parsed = mecab.parse(text)
    result = list()
    for token in parsed.split('\n'):
        splitted = token.split('\t')
        if len(splitted) == 2:
            word = splitted[0]
            features = splitted[1].split(',')
            if len(features) > 7 and features[7] != '*':
                result.append(romkan.to_hepburn(features[7]))
            else:
                result.append(word)
    return result

# Convert the text in Chinese to pintin
def convertRoman(mecab):
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            new_row.append(' '.join(map(
                str.strip,
                map(lambda s: s.replace('\n', '\\n'),
                    map(unidecode.unidecode,
                        romanizeText(mecab, row[i]))))))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))

if __name__ == '__main__':
    main()


================================================
FILE: data/rakuten/create_review.py
================================================
#!/usr/bin/python3

'''
Create data from list of LZMA compressed archives of reviews
Copyright 2016 Xiang Zhang

Usage: python3 create_data.py -i [input file pattern] -o [output file]
'''

import argparse
import csv
import glob
import json
import lzma

INPUT = '../data/rakuten/review/*.json.xz'
OUTPUT = '../data/rakuten/sentiment/review.csv'

def main():
    global INPUT
    global OUTPUT

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-i', '--input', help = 'Input file pattern', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output

    createData()

def createData():
    # Open the output file
    ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Grab the files
    files = glob.glob(INPUT)
    n = 0
    filecount = 0
    for filename in files:
        filecount = filecount + 1
        print('Processing file {}/{}: {}. Processed items {}.'.format(
                filecount, len(files), filename, n))
        try:
            ifd = lzma.open(filename, 'rt', encoding = 'utf-8')
            for line in ifd:
                review = json.loads(line)
                rate = review.get('rate', '')
                title = review.get('title', '')
                comment = review.get('comment', '')
                if rate != '':
                    n = n + 1
                    writer.writerow([rate, title.replace('\n', '\\n'),
                                     comment.replace('\n', '\\n')])
            ifd.close()
        except Exception as e:
            print('Exception (ignored): {}'.format(e))
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: data/rakuten/segment_word.py
================================================
#!/usr/bin/python3

'''
Convert Japanese datasets to Index of Words
Copyright 2016 Xiang Zhang

Usage: python3 construct_pinyin.py -i [input] -l [list] -o [output] [-r]
'''

#Input file
INPUT = '../data/rakuten/sentiment/full_train.csv'
#Output file
OUTPUT = '../data/rakuten/sentiment/full_train_word.csv'
# List file
LIST = '../data/rakuten/sentiment/full_train_word_list.csv'
# Read already defined word list
READ = False

import argparse
import csv
import MeCab

# Main program
def main():
    global INPUT
    global OUTPUT
    global LIST

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help = 'Input file', default = INPUT)
    parser.add_argument(
        '-o', '--output', help = 'Output file', default = OUTPUT)
    parser.add_argument('-l', '--list', help = 'Word list file', default = LIST)
    parser.add_argument(
        '-r', '--read', help = 'Read from list file', action = 'store_true')

    args = parser.parse_args()

    INPUT = args.input
    OUTPUT = args.output
    LIST = args.list
    READ = args.read

    if READ:
        print('Reading word index')
        word_index = readWords()
    else:
        print('Counting words')
        word_count, word_freq = segmentWords()
        print('Sorting words by count')
        word_index = sortWords(word_count, word_freq)
    print('Constructing word index output')
    convertWords(word_index)

# Read from pre-existing word list
def readWords():
    # Open the files
    ifd = open(LIST, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_index = dict()
    n = 0
    for row in reader:
        word = row[0].replace('\\n', '\n')
        word_index[word] = n + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    return word_index

# Segment the text in Chinese
def segmentWords():
    mecab = MeCab.Tagger()
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    # Loop over the csv rows
    word_count = dict()
    word_freq = dict()
    n = 0
    for row in reader:
        field_set = set()
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = list()
            parsed_result = mecab.parse(field)
            for token in parsed_result.split('\n'):
                splitted_token = token.split('\t')
                if len(splitted_token) == 2:
                    word = splitted_token[0]
                    field_list.append(word)
            for word in field_list:
                word_count[word] = word_count.get(word, 0) + 1
                if word not in field_set:
                    field_set.add(word)
                    word_freq[word] = word_freq.get(word, 0) + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    # Normalizing word frequency
    for word in word_freq:
        word_freq[word] = float(word_freq[word]) / float(n)
    return word_count, word_freq

# Sort words for a given count dictionary object
def sortWords(word_count, word_freq):
    # Sort the words
    word_list = sorted(
        word_count, key = lambda word: word_count[word], reverse = True)
    # Open the files
    ofd = open(LIST, 'w', encoding = 'utf-8', newline = '')
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over all the words
    word_index = dict()
    n = 0
    for i in range(len(word_list)):
        word = word_list[i]
        row = [word.replace('\n', '\\n'), str(word_count[word]),
               str(word_freq[word])]
        writer.writerow(row)
        word_index[word] = i + 1
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing word: {}'.format(n), end = '')
    print('\rProcessed words: {}'.format(n))
    ofd.close()
    return word_index

# Convert the text in Chinese to word list
def convertWords(word_index):
    mecab = MeCab.Tagger()
    # Open the files
    ifd = open(INPUT, encoding = 'utf-8', newline = '')
    ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '')
    reader = csv.reader(ifd, quoting = csv.QUOTE_ALL)
    writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n')
    # Loop over the csv rows
    n = 0
    for row in reader:
        new_row = list()
        new_row.append(row[0])
        for i in range(1, len(row)):
            field = row[i].replace('\\n', '\n')
            field_list = list()
            parsed_result = mecab.parse(field)
            for token in parsed_result.split('\n'):
                splitted_token = token.split('\t')
                if len(splitted_token) == 2:
                    word = splitted_token[0]
                    field_list.append(word)
            new_row.append(' '.join(map(
                str, map(lambda word: word_index.get(word, len(word_index) + 1),
                         field_list))))
        writer.writerow(new_row)
        n = n + 1
        if n % 1000 == 0:
            print('\rProcessing line: {}'.format(n), end = '')
    print('\rProcessed lines: {}'.format(n))
    ifd.close()
    ofd.close()

if __name__ == '__main__':
    main()


================================================
FILE: doc/dianping.md
================================================
# Dianping

This documentation contains information on how to reproduce all the results for the `Dianping` datasets in the paper.

The root directory `/` in this documentation indicates the root directory of this repository.

## Download the dataset

Original text data for training and testing are available via these two links: [`train.csv.xz`](https://goo.gl/uKPxyo) [`test.csv.xz`](https://goo.gl/2QZpLx). When you download them, make sure to put them in the `/data/data/dianping` directory and unxz so that you have `train.csv` and `test.csv` available.

## GlyphNet

This section introduces how to prepare and run GlyphNet experiments.

### Prepare GNU Unifont

Running the glyphnet training script requires the GNU Unifont character images. We have built these images into a Torch 7 binary serialization file and it can be download via this link: [`unifont-8.0.1.t7b.xz`](https://goo.gl/aFxYHq). After downloading, put it in `/unifont/unifont` directory and unxz so that you have `unifont-8.0.1.t7b` available.

### Build Byte Serialization Files

The next step is to build the serialized code files. The first step is to build the string serialization files. Switch to the `/data/dianping` directory, then execute the following commands

```bash
th construct_string.lua ../data/dianping/train.csv ../data/dianping/train_string.t7b
th construct_string.lua ../data/dianping/test.csv ../data/dianping/test_string.t7b
```

These 2 commands will build byte serialization files for the samples in its original language. It assumes the texts are contained in a comma-separated-value format in which the first field is treated as the class index (starting from 1), and the remaining fields are all texts.

The output files contain a lua table that has the following members

* `index`: a table that contains index tensors for each class. For example `index[i]` is an n x m x 2 `LongTensor` that contains the starting position and length of byte string representing each sample in class i. We assume that class i contains n samples, and there are m text fields in the CSV file.
* `content`: a `ByteTensor` that contains the serialization of the strings of all samples. Each string is ended with a 0 byte, which is not included in the length count in `index`.

### Build Unicode Serialization Files

From this byte-level serialization, we will be able to construct serialization files that contain unicode values to be used in the `glyphnet` training scripts. To do this, execute the following 2 commands

```bash
th construct_code.lua ../data/dianping/train_string.t7b ../data/dianping/train_code.t7b
th construct_code.lua ../data/dianping/test_string.t7b ../data/dianping/test_code.t7b
```

Each of these code files contain a lua table that has 2 `LongTensor` members: `code` and `code_value`. The have a similar structure as the `index` and `content` members of the byte serialization files, but in this case they are for unicode values.

### Execute the Experiments

Then, you can switch to `/glyphnet`, and execute the following scripts to run the training program for the large GlyphNet

```bash
mkdir -p models/dianping/spatial8temporal12length512feature256
./archive/dianping_spatial8temporal12length512feature256.sh
```

The first command simply creates a directory where checkpointing files will be written into during training. Note that the shell scripts also accepts command-line parameters and can pass it directly to the training program. The most useful ones are probably `-driver_visualize false` and `-driver_plot false`, that disable visualization and plotting so that you can run the training programs on a headless server. You can also use `-driver_resume true` to resume from checkpointed experiments. These parameters are available for all Torch 7 training programs.

Similarly, the following commands execute the experiment for the small GlyphNet

```bash
mkdir -p models/dianping/spatial6temporal8length486feature256
./archive/dianping_spatial6temporal8length486feature256.sh
```

## OnehotNet

This section details how to execute OnehotNet experiments. Note that OnehotNet in this article are operating at byte-level for either the original text or the romanized text. In the case of romanized text, it is the same as character-level.

### Byte-Level OnehotNet for Original Text

To train OnehotNet for the original text, we only need the previously built byte serialization files. If you do not have them, see previous sections for using `construct_string.lua` data processing scripts.

#### Execute the Experiments

Assuming your current working directory is `/onehotnet`, the following commands execute experiments for large OnehotNet on the original text samples.

```bash
mkdir -p models/dianping/onehot4temporal12length2048feature256
./archive/dianping_onehot4temporal12length2048feature256.sh
```

Similarly, the small OnehotNet experiments can be done using the following commands

```bash
mkdir -p models/dianping/onehot4temporal8length1944feature256
./archive/dianping/onehot4temporal8length19444feature256.sh
```

### Character-Level OnehotNet for Romanized Text

This section details how to execute OnehotNet for romanized text. But before that, we need to build the romanized data first.

#### Build Romanized Text Serialization Files

The first step is to convert the original text into a romanization format. This is done in this project automatically using the [`pypinyin`](https://github.com/mozillazg/python-pinyin) package (version 0.12 for the results in the paper). You also want to install [`jieba`](https://github.com/fxsjy/jieba) (version 0.38 for the results in the paper) so that `pypinyin` can use it for word segmentation. All these packages were installed in a Python 3 environment.

Switch the working directory to `/data/dianping`, the following commands converting the original text to a romanization format for the Dianping dataset.

```bash
python3 construct_pinyin.py -i ../data/dianping/train.csv -o ../data/dianping/train_pinyin.csv
python3 construct_pinyin.py -i ../data/dianping/test.csv -o ../data/dianping/test_pinyin.csv
```

Then, we can use `construct_string.lua` again for constructing the byte serialization of romanized texts.

```bash
th construct_string.lua ../data/dianping/train_pinyin.csv ../data/dianping/train_pinyin_string.t7b
th construct_string.lua ../data/dianping/test_pinyin.csv ../data/dianping/test_pinyin_string.t7b
```

#### Execute the Experiments

Assuming your current working directory is `/onehotnet`, the following commands execute experiments for large OnehotNet on the romanized text samples.

```bash
mkdir -p models/dianping/onehot4temporal12length2048feature256roman
./archive/dianping_onehot4temporal12length2048feature256roman.sh
```

Similarly, the small OnehotNet experiments can be done using the following commands

```bash
mkdir -p models/dianping/onehot4temporal8length1944feature256roman
./archive/dianping/onehot4temporal8length19444feature256roman.sh
```

## EmbedNet

This section introduces how to build the data files and executing experiments for EmbedNet.

### Character-Level EmbedNet for Original Text

Since we already built the serialization data files for unicode characters for GlyphNet, we can directly use them. The only step required is to run the commands for training the models.

Assuming the current working directory is `/embednet`, the following commands will start the training process for large character-level EmbedNet.

```bash
mkdir -p models/dianping/temporal12length512feature256
./archive/dianping_temporal12length512feature256.sh
```

And for small character-level EmbedNet

```bash
mkdir -p models/dianping/temporal8length486feature256
./archive/dianping_temporal8length486feature256.sh
```

### Byte-Level EmbedNet for Original Text

This section details how to train byte-level EmbedNet for the original text

#### Convert Byte Serialization Files

Since the EmbedNet training program assumes the data files contain a table of 2 members `code` and `code_value`, we need to change the variable names in the string serialization files to match this. This can be done in `/data/dianping` by executing the following commands

```bash
th convert_string_code.lua ../data/dianping/train_string.t7b ../data/dianping/train_string_code.t7b
th convert_string_code.lua ../data/dianping/test_string.t7b ../data/dianping/test_string_code.t7b
```

#### Execute the Experiments

Assuming the current working director is `/embednet`, the following commands start the training process for the large byte-level EmbedNet

```bash
mkdir -p models/dianping/temporal12length512feature256byte
./archive/dianping_temporal12length512feature256byte.sh
```

And for small byte-level EmbedNet

```bash
mkdir -p models/dianping/temporal8length486feature256byte
./archive/dianping_temporal8length486feature256byte.sh
```

### Character-Level EmbedNet for Romanized Text

Note that characters for romanized text is the same as bytes. Therefore, the steps are exactly the same as the byte-level EmbedNet, except for romanized text instead of original text.

#### Convert Byte Serialization Files

In `/data/dianping`, execute the following commands

```bash
th convert_string_code.lua ../data/dianping/train_pinyin_string.t7b ../data/dianping/train_pinyin_string_code.t7b
th convert_string_code.lua ../data/dianping/test_pinyin_string.t7b ../data/dianping/test_pinyin_string_code.t7b
```

#### Execute the Experiments

Assuming the current working director is `/embednet`, the following commands start the training process for the large character-level EmbedNet for romanized text

```bash
mkdir -p models/dianping/temporal12length512feature256roman
./archive/dianping_temporal12length512feature256roman.sh
```

And for small EmbedNet

```bash
mkdir -p models/dianping/temporal8length486feature256roman
./archive/dianping_temporal8length486feature256roman.sh
```

### Word-Level Embednet for Original Text

This section introduces how to segment word from the text, build the word serialization files, and execute the commands.

#### Build Word Serialization Files for Original Text

The first step for building the word serialization files is to segment the words. This is done by executing a Python 3 script as follows, assuming you have the [`jieba`](https://github.com/fxsjy/jieba) package installed (version 0.38 for the results in the paper) and the working directory is `/data/dianping`.

```bash
python3 segment_word.py -i ../data/dianping/train.csv -o ../data/dianping/train_word.csv -l ../data/dianping/train_word_list.csv
python3 segment_word.py -i ../data/dianping/test.csv -o ../data/dianping/test_word.csv -l ../data/dianping/train_word_list.csv -r
```

The first command generate 2 data files. `train_word.csv` is a file containing sequences of indices of segmented words from the original text fields, whereas `train_word_list.csv` contains the list of words. The second command read the same list of words generated from the training data (therefore the `-r` option) and use that list to build sequences for the testing data. This is done deliberately so that new words not in the training data are not considered for classification results.

The second step is to build the word serialization files from the segmentation results.

```bash
th construct_word.lua ../data/dianping/train_word.csv ../data/dianping/train_word.t7b
th construct_word.lua ../data/dianping/test_word.csv ../data/dianping/test_word.t7b
```

#### Execute the Experiments

When we have `train_word.t7b` and `test_word.t7b`, we can start executing the experiments for word-level EmbedNet models. Assume that the current directory is `/embednet`, the following commands start the training process for the large word-level EmbedNet for original text

```bash
mkdir -p models/dianping/temporal12length512feature256word
./archive/dianping_temporal12length512feature256word.sh
```

And for small EmbedNet

```bash
mkdir -p models/dianping/temporal8length486feature256word
./archive/dianping_temporal8length486feature256word.sh
```

### Word-Level EmbedNet for Romanized Text

Similar to the original text, romanized text also require word segmentation before being able to pass through the EmbedNet training program.

#### Build Word Serialization Files for Romanized Text

Word segmentation for romanized text is pretty simple. Assume you are in `/data/dianping`, the following commands do the job

```bash
th segment_roman_word.lua ../data/dianping/train_pinyin.csv ../data/dianping/train_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv
th segment_roman_word.lua ../data/dianping/test_pinyin.csv ../data/dianping/test_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv true
```

Note the additional `true` argument in the second command-line to inform the script to use the training word list for constructing the indices for the testing data.

Then, word serialization files can be built from the segmentation results using the following commands.

```bash
th construct_word.lua ../data/dianping/train_pinyin_word.csv ../data/dianping/train_pinyin_word.t7b
th construct_word.lua ../data/dianping/test_pinyin_word.csv ../data/dianping/test_pinyin_word.t7b
```

#### Execute the Experiments

When we have `train_pinyinword.t7b` and `test_pinyinword.t7b`, we can start executing the experiments for word-level EmbedNet models. Assume that the current directory is `/embednet`, the following commands start the training process for the large word-level EmbedNet for original text

```bash
mkdir -p models/dianping/temporal12length512feature256romanword
./archive/dianping_temporal12length512feature256romanword.sh
```

And for small EmbedNet

```bash
mkdir -p models/dianping/temporal8length486feature256romanword
./archive/dianping_temporal8length486feature256romanword.sh
```

## Linear Model

This section details how to reproduce the results for linear models.

### Character-Level 1-Gram Linear Model for Original Text

To run the linear model for using bag-of-character features, we need to build the feature serialization files first.

#### Build Character-Level 1-Gram Feature Serialization Files

To build the character-level 1-gram feature serialization files, execute the following commands from `/data/dianping`.

```bash
th construct_charbag.lua ../data/dianping/train_code.t7b ../data/dianping/train_charbag.t7b ../data/dianping/train_charbag_list.csv
th construct_charbag.lua ../data/dianping/test_code.t7b ../data/dianping/test_charbag.t7b ../data/dianping/train_charbag_list.csv true
```

The first command creates a file `train_charbag.t7b`, which contains a table that has the following members

* `bag`: a table where `bag[i]` contains a n-by-2 `LongTensor`. It contains the beginning index and length of values in `bag_index` and `bag_value` for each sample.
* `bag_index`: a 1-D `LongTensor` that contains the character indices of all samples.
* `bag_value`: a 1-D `DoubleTensor` that contains the frequency of the corresponding character indices.

The seond command creates the feature serialization file for testing data, but using the same character index that was created from training data. The additional `true` parameter means to read from list rather than create a new one.

All of the feature serialization files for linear models has the same data structure design.

To prepare feature serialization files for the TFIDF variant of bag-of-character linear model, execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_charbag.t7b ../data/dianping/train_charbagtfidf.t7b ../data/dianping/train_charbag_list.csv
th construct_tfidf.lua ../data/dianping/test_charbag.t7b ../data/dianping/test_charbagtfidf.t7b ../data/dianping/train_charbag_list.csv
```

Note that constructing serialization files for testing data still uses the character frequency list from training data.

#### Execute the Experiments

To execute the experiment for character-level 1-gram linear model, execute the following commands from `/linearnet`

```bash
mkdir -p models/dianping/charbag
./archive/dianping_charbag.sh
```

To execute the experiment for the TFIDF version, execute the following command from `/linearnet`

```bash
mkdir -p models/dianping/charbagtfidf
./archive/dianping_charbagtfidf.sh
```

### Character-Level 5-Gram Linear Model for Original Text

Before being able to execute the 5-gram experiments, we have to build the feature serialization files first.

#### Build Character-Level 5-Gram Feature Serialization Files

In this work, 5-gram features actually mean features of grams from 1 to 5. It is usually infeasible to store all of these feature in memory, and building the features coud take a significant amount of time. Therefore, we build a list of grams ranked by their frequency via a multi-threaded program first, and then build the 5-gram feature serialization files using it.

To build the list of character grams, execute the following commands from `/data/dianping`

```bash
mkdir -p ../data/dianping/train_chargram_count
th count_chargram.lua ../data/dianping/train_code.t7b ../data/dianping/train_chargram_count/

mkdir -p ../data/dianping/train_chargram_count_sort
./sort_gram_count.sh ../data/dianping/train_chargram_count ../data/dianping/train_chargram_count_sort /tmp

th combine_gram_count.lua ../data/dianping/train_chargram_count_sort/ ../data/dianping/train_chargram_count_combine.csv

./sort_gram_list.sh ../data/dianping/train_chargram_count_combine.csv ../data/dianping/train_chargram_list.csv

./limit_csv_lines.sh ../data/dianping/train_chargram_list.csv ../data/dianping/train_chargram_list_limit.csv 1000001
```

The commands proceeds by first using 10 threads to construct chunks of counts of character grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000.

Then, you can build the character-level 5-gram feature serialization files using the following commands from `/data/dianping`

```bash
th construct_chargram.lua ../data/dianping/train_code.t7b ../data/dianping/train_chargram.t7b ../data/dianping/train_chargram_list_limit.csv
th construct_chargram.lua ../data/dianping/test_code.t7b ../data/dianping/test_chargram.t7b ../data/dianping/train_chargram_list_limit.csv
```

Note that the features for testing data are built using the gram list from the training data.

To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_chargram.t7b ../data/dianping/train_chargramtfidf.t7b ../data/dianping/train_chargram_list_limit.csv 1000000
th construct_tfidf.lua ../data/dianping/test_chargram.t7b ../data/dianping/test_chargramtfidf.t7b ../data/dianping/train_chargram_list_limit.csv 1000000
```

#### Execute the Experiments

To execute the experiment for character-level 5-gram linear model, run the following commands from `/linearnet`

```bash
mkdir -p models/dianping/chargram
./archive/dianping_chargram.sh
```

And for the TFIDF version

```bash
mkdir -p models/dianping/chargramtfidf
./archive/dianping_chargramtfidf.sh
```

### Word-Level 1-Gram Linear Model for Original Text

This section first introduces how to build bag-of-word features, and then details how to execute the experiments.

#### Build Word-Level 1-Gram Feature Serialization Files

The following commands from `/data/dianping` can create the word-level 1-gram features for linear model

```bash
th construct_wordbag.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordbag.t7b 200000 200001
th construct_wordbag.lua ../data/dianping/test_word.t7b ../data/dianping/test_wordbag.t7b 200000 200001
```

This is possible because the word segmentation process previously done for word-level EmbedNet already sorts the words by its frequency from the training data. The program also automatically limit the number of features to 200000 and replace all other features to the 200001-th one.

To construct the TFIDF feature, simply execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_wordbag.t7b ../data/dianping/train_wordbagtfidf.t7b ../data/dianping/train_word_list.csv 200000
th construct_tfidf.lua ../data/dianping/test_wordbag.t7b ../data/dianping/test_wordbagtfidf.t7b ../data/dianping/train_word_list.csv 200000
```

#### Execute the Experiments

From `/linearnet`, the following commands execute the experiment for bag-of-word model

```bash
mkdir -p models/dianping/wordbag
./archive/dianping_wordbag.sh
```

And for the TFIDF version

```bash
mkdir -p models/dianping/wordbagtfidf
./archive/dianping_wordbagtfidf.sh
```

### Word-Level 5-Gram Linear Model for Original Text

This section introduces how to build word-level 5-gram feature serialization files and how to execute the experiments.

#### Build Word-Level 5-Gram Feature Serialization Files

Similar to the character-level 5-gram features, we need a multi-threaded program to build the list of grams first before being able to build the feature serialization files. The list can be built by executing the following commands from `/data/dianping`

```bash
mkdir -p ../data/dianping/train_wordgram_count
th count_wordgram.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordgram_count/ ../data/dianping/train_word_list.csv

mkdir -p ../data/dianping/train_wordgram_count_sort
./sort_gram_count.sh ../data/dianping/train_wordgram_count ../data/dianping/train_wordgram_count_sort /tmp

th combine_gram_count.lua ../data/dianping/train_wordgram_count_sort/ ../data/dianping/train_wordgram_count_combine.csv

./sort_gram_list.sh ../data/dianping/train_wordgram_count_combine.csv ../data/dianping/train_wordgram_list.csv

./limit_csv_lines.sh ../data/dianping/train_wordgram_list.csv ../data/dianping/train_wordgram_list_limit.csv 1000001
```

The commands proceeds by first using 10 threads to construct chunks of counts of word grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000.

Then, you can build the word-level 5-gram feature serialization files using the following commands from `/data/dianping`

```bash
th construct_wordgram.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordgram.t7b ../data/dianping/train_wordgram_list_limit.csv
th construct_wordgram.lua ../data/dianping/test_word.t7b ../data/dianping/test_wordgram.t7b ../data/dianping/train_wordgram_list_limit.csv
```

Note that the features for testing data are built using the gram list from the training data.

To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_wordgram.t7b ../data/dianping/train_wordgramtfidf.t7b ../data/dianping/train_wordgram_list_limit.csv 1000000
th construct_tfidf.lua ../data/dianping/test_wordgram.t7b ../data/dianping/test_wordgramtfidf.t7b ../data/dianping/train_wordgram_list_limit.csv 1000000
```

#### Execute the Experiments

To execute the experiment for word-level 5-gram linear model, run the following commands from `/linearnet`

```bash
mkdir -p models/dianping/wordgram
./archive/dianping_wordgram.sh
```

And for the TFIDF version

```bash
mkdir -p models/dianping/wordgramtfidf
./archive/dianping_wordgramtfidf.sh
```

### Word-Level 1-Gram Linear Model for Romanized Text

This section first introduces how to build bag-of-word features for romanized text, and then details how to execute the experiments.

#### Build Word-Level 1-Gram Feature Serialization Files

The following commands from `/data/dianping` can create the word-level 1-gram features for romanized text

```bash
th construct_wordbag.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordbag.t7b 200000 200001
th construct_wordbag.lua ../data/dianping/test_pinyin_word.t7b ../data/dianping/test_pinyin_wordbag.t7b 200000 200001
```

This is possible because the word segmentation process previously done for romanized word-level EmbedNet already sorts the words by its frequency from the training data. The program also automatically limit the number of features to 200000 and replace all other features to the 200001-th one.

To construct the TFIDF feature, simply execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_pinyin_wordbag.t7b ../data/dianping/train_pinyin_wordbagtfidf.t7b ../data/dianping/train_pinyin_word_list.csv 200000
th construct_tfidf.lua ../data/dianping/test_pinyin_wordbag.t7b ../data/dianping/test_pinyin_wordbagtfidf.t7b ../data/dianping/train_pinyin_word_list.csv 200000
```

#### Execute the Experiments

From `/linearnet`, the following commands execute the experiment for bag-of-word model for romanized text

```bash
mkdir -p models/dianping/wordbagroman
./archive/dianping_wordbagroman.sh
```

And for the TFIDF version

```bash
mkdir -p models/dianping/wordbagtfidfroman
./archive/dianping_wordbagtfidfroman.sh
```

### Word-Level 5-Gram Linear Model for Romanized Text

This section introduces how to build word-level 5-gram feature serialization files for romanized text and how to execute the experiments.

#### Build Word-Level 5-Gram Feature Serialization Files

Similar to the character-level 5-gram features, we need a multi-threaded program to build the list of grams first before being able to build the feature serialization files. The list can be built by executing the following commands from `/data/dianping`

```bash
mkdir -p ../data/dianping/train_pinyin_wordgram_count
th count_wordgram.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordgram_count/ ../data/dianping/train_pinyin_word_list.csv

mkdir -p ../data/dianping/train_pinyin_wordgram_count_sort
./sort_gram_count.sh ../data/dianping/train_pinyin_wordgram_count ../data/dianping/train_pinyin_wordgram_count_sort /tmp

th combine_gram_count.lua ../data/dianping/train_pinyin_wordgram_count_sort/ ../data/dianping/train_pinyin_wordgram_count_combine.csv

./sort_gram_list.sh ../data/dianping/train_pinyin_wordgram_count_combine.csv ../data/dianping/train_pinyin_wordgram_list.csv

./limit_csv_lines.sh ../data/dianping/train_pinyin_wordgram_list.csv ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000001
```

The commands proceeds by first using 10 threads to construct chunks of counts of word grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000.

Then, you can build the word-level 5-gram feature serialization files for romanized text using the following commands from `/data/dianping`

```bash
th construct_wordgram.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordgram.t7b ../data/dianping/trainpinyin_wordgram_list_limit.csv
th construct_wordgram.lua ../data/dianping/test_pinyin_word.t7b ../data/dianping/test_pinyin_wordgram.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv
```

Note that the features for testing data are built using the gram list from the training data.

To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping`

```bash
th construct_tfidf.lua ../data/dianping/train_pinyin_wordgram.t7b ../data/dianping/train_pinyin_wordgramtfidf.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000000
th construct_tfidf.lua ../data/dianping/test_pinyin_wordgram.t7b ../data/dianping/test_pinyin_wordgramtfidf.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000000
```

#### Execute the Experiments

To execute the experiment for word-level 5-gram linear model, run the following commands from `/linearnet`

```bash
mkdir -p models/dianping/wordgramroman
./archive/dianping_wordgramroman.sh
```

And for the TFIDF version

```bash
mkdir -p models/dianping/wordgramtfidfroman
./archive/dianping_wordgramtfidfroman.sh
```

## fastText

This section introduces how to build the token files and run experiments for the fastText models. Note that before being able to execute the experiments in this section, you must make sure that you have [fastText](https://github.com/facebookresearch/fastText) installed and there is `fasttext` command in your `PATH`.

### Character-Level fastText for Original Text

We first build the token files for character-level fastText, and then detail how to execute the experiments.

#### Build Character-Level Token Files

To build the character token files from the original text files, execute the following commands from `/data/dianping`

```bash
th construct_chartoken.lua ../data/dianping/train.csv ../data/dianping/train_chartoken.txt
th construct_chartoken.lua ../data/dianping/test.csv ../data/dianping/test_chartoken.txt
```

Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio.

```bash
./shuffle_lines.sh ../data/dianping/train_chartoken.txt ../data/dianping/train_chartoken_shuffle.txt
./split_lines.sh 1800000 ../data/dianping/train_chartoken_shuffle.txt ../data/dianping/train_chartoken_shuffle_split_
```

Note that the second command above will produce 2 files `train_chartoken_shuffle_split_0.txt` and `train_chartoken_shuffle_split_1.txt`.

#### Execute the Experiments

To execute the character-level 1-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charunigram_evaluation
./archive/dianping_charunigram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the character-level 1-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charunigram_tuned
./archive/dianping_charunigram_tuned.sh
```

To execute the character-level 2-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charbigram_evaluation
./archive/dianping_charbigram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the character-level 2-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charbigram_tuned
./archive/dianping_charbigram_tuned.sh
```

To execute the character-level 5-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charpentagram_evaluation
./archive/dianping_charpentagram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the character-level 5-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/charpentagram_tuned
./archive/dianping_charpentagram_tuned.sh
```

### Word-Level fastText for Original Text

We first build the token files for word-level fastText, and then detail how to execute the experiments.

#### Build Word-Level Token Files

To build the word token files from the original text files, execute the following commands from `/data/dianping`

```bash
th construct_wordtoken.lua ../data/dianping/train_word.csv ../data/dianping/train_word_list.csv ../data/dianping/train_wordtoken.txt
th construct_wordtoken.lua ../data/dianping/test_word.csv ../data/dianping/train_word_list.csv ../data/dianping/test_wordtoken.txt
```

Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio.

```bash
./shuffle_lines.sh ../data/dianping/train_wordtoken.txt ../data/dianping/train_wordtoken_shuffle.txt
./split_lines.sh 1800000 ../data/dianping/train_wordtoken_shuffle.txt ../data/dianping/train_wordtoken_shuffle_split_
```

Note that the second command above will produce 2 files `train_wordtoken_shuffle_split_0.txt` and `train_wordtoken_shuffle_split_1.txt`.

#### Execute the Experiments

To execute the word-level 1-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordunigram_evaluation
./archive/dianping_wordunigram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 1-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordunigram_tuned
./archive/dianping_wordunigram_tuned.sh
```

To execute the word-level 2-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordbigram_evaluation
./archive/dianping_wordbigram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 2-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordbigram_tuned
./archive/dianping_wordbigram_tuned.sh
```

To execute the word-level 5-gram evaluation experiment, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordpentagram_evaluation
./archive/dianping_wordpentagram_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 5-gram experiment, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordpentagram_tuned
./archive/dianping_wordpentagram_tuned.sh
```

### Word-Level fastText for Romanized Text

We first build the token files for word-level fastText on romanized test, and then detail how to execute the experiments.

#### Build Word-Level Token Files

To build the word token files from the original text files, execute the following commands from `/data/dianping`

```bash
th construct_wordtoken.lua ../data/dianping/train_pinyin_word.csv ../data/dianping/train_word_pinyin_list.csv ../data/dianping/train_pinyin_wordtoken.txt
th construct_wordtoken.lua ../data/dianping/test_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv ../data/dianping/test_pinyin_wordtoken.txt
```

Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio.

```bash
./shuffle_lines.sh ../data/dianping/train_pinyin_wordtoken.txt ../data/dianping/train_pinyin_wordtoken_shuffle.txt
./split_lines.sh 1800000 ../data/dianping/train_pinyin_wordtoken_shuffle.txt ../data/dianping/train_pinyin_wordtoken_shuffle_split_
```

Note that the second command above will produce 2 files `train_pinyin_wordtoken_shuffle_split_0.txt` and `train_pinyin_wordtoken_shuffle_split_1.txt`.

#### Execute the Experiments

To execute the word-level 1-gram evaluation experiment on romanized text, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordunigramroman_evaluation
./archive/dianping_wordunigramroman_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 1-gram experiment on romanized text, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordunigramroman_tuned
./archive/dianping_wordunigramroman_tuned.sh
```

To execute the word-level 2-gram evaluation experiment on romanized text, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordbigramroman_evaluation
./archive/dianping_wordbigramroman_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 2-gram experiment on romanized text, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordbigramroman_tuned
./archive/dianping_wordbigramroman_tuned.sh
```

To execute the word-level 5-gram evaluation experiment on romanized text, do the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordpentagramroman_evaluation
./archive/dianping_wordpentagramroman_evaluation.sh
```

This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper.

To execute the word-level 5-gram experiment on romanized text, use the following commands from `/fasttext`

```bash
mkdir -p models/dianping/wordpentagramroman_tuned
./archive/dianping_wordpentagramroman_tuned.sh
```


================================================
FILE: embednet/archive/11stbinary_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal12length512feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/11stbinary_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_rr_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal8length486feature256 -driver_variation small -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/11stbinary_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_rr_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stbinary_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stfull_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal12length512feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/11stfull_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stfull_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stfull_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_rr_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stfull_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stfull_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal8length486feature256 -driver_variation small -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/11stfull_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stfull_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/11stfull_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_rr_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/11stfull_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/amazonbinary_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonbinary/temporal12length512feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/amazonbinary_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/amazon/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/amazonbinary_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonbinary/temporal8length486feature256 -driver_variation small -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/amazonbinary_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/amazon/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/amazonfull_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonfull/temporal12length512feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/amazonfull_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/amazon/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/amazonfull_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonfull/temporal8length486feature256 -driver_variation small -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/amazonfull_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/amazon/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/chinanews_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b -driver_location models/chinanews/temporal12length512feature256 "$@";


================================================
FILE: embednet/archive/chinanews_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/chinanews/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/chinanews_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/chinanews/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/chinanews_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/chinanews/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/chinanews_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/chinanews/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/chinanews_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b -driver_location models/chinanews/temporal8length486feature256 -driver_variation small "$@";


================================================
FILE: embednet/archive/chinanews_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/chinanews/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/chinanews_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/chinanews/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/chinanews_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/chinanews/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/chinanews_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/chinanews/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/dianping_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua "$@";


================================================
FILE: embednet/archive/dianping_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/dianping/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/dianping/train_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/dianping_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/dianping/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/dianping/train_pinyin_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_pinyin_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/dianping_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/dianping/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/dianping/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/dianping_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/dianping/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/dianping/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/dianping_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256 "$@";


================================================
FILE: embednet/archive/dianping_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256byte -driver_dimension 257 -train_data_file data/dianping/train_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/dianping_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256roman -driver_dimension 257 -train_data_file data/dianping/train_pinyin_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_pinyin_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/dianping_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256romanword -driver_dimension 200002 -train_data_file data/dianping/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/dianping_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256word -driver_dimension 200002 -train_data_file data/dianping/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/ifeng_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b -driver_location models/ifeng/temporal12length512feature256 "$@";


================================================
FILE: embednet/archive/ifeng_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/ifeng/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/ifeng_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/ifeng/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/ifeng_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/ifeng/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/ifeng_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/ifeng/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/ifeng_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b -driver_location models/ifeng/temporal8length486feature256 -driver_variation small "$@";


================================================
FILE: embednet/archive/ifeng_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/ifeng/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/ifeng_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/ifeng/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/ifeng_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/ifeng/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/ifeng_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/ifeng/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b -driver_location models/jdbinary/temporal12length512feature256 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal8length486feature256 -driver_variation small -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/jdbinary_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdbinary_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdfull_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b -driver_location models/jdfull/temporal12length512feature256 "$@";


================================================
FILE: embednet/archive/jdfull_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdfull_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdfull_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdfull_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdfull_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal8length486feature256 -driver_variation small -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/jdfull_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdfull_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/jdfull_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jdfull_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_location models/jointbinary/temporal12length512feature256 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/joint/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/joint/binary_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal12length512feature256romanword -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/binary_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_roman_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/joint/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_location models/jointbinary/temporal8length486feature256 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jointbinary/temporal8length486feature256byte -driver_dimension 257 -train_data_file data/joint/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/joint/binary_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/temporal8length486feature256romanword -driver_variation small -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/binary_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_roman_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jointbinary_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jointbinary/temporal8length486feature256word -driver_dimension 200002 -train_data_file data/joint/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_location models/jointfull/temporal12length512feature256 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/joint/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/joint/full_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal12length512feature256romanword -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/full_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_roman_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jointfull_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/joint/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_location models/jointfull/temporal8length486feature256 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/joint/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/joint/full_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/jointfull_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal8length486feature256romanword -driver_variation small -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/full_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_roman_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/jointfull_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/joint/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@";


================================================
FILE: embednet/archive/nytimes_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/nytimes/temporal12length512feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@";


================================================
FILE: embednet/archive/nytimes_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/nytimes/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/nytimes/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/nytimes/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/nytimes_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/nytimes/temporal8length486feature256 -driver_variation small -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@";


================================================
FILE: embednet/archive/nytimes_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/nytimes/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/nytimes/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/nytimes/topic/test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256 -driver_variation small -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenbinary_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal12length512feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal12length512feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal12length512feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal12length512feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256 -driver_variation small -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal8length486feature256byte.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal8length486feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal8length486feature256romanword.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/archive/rakutenfull_temporal8length486feature256word.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@";


================================================
FILE: embednet/config.lua
================================================
--[[
Configuration for EmbedNet
Copyright Xiang Zhang 2016
--]]

-- Name space
local config = {}

-- Training data configurations
config.train_data = {}
config.train_data.file = 'data/dianping/train_code.t7b'
config.train_data.batch = 16
config.train_data.replace = 65537
config.train_data.shift = 0

-- Testing data configurations
config.test_data = {}
config.test_data.file = 'data/dianping/test_code.t7b'
config.test_data.batch = 16
config.test_data.replace = 65537
config.test_data.shift = 0

-- Model configurations
config.model = {}
config.model.cudnn = true

-- Model variations configuration
config.variation = {}

-- Large model configuration
local embedding = {}
embedding[1] = {name = 'nn.LookupTable', nIndex = 65537, nOutput = 256,
                paddingValue = config.train_data.replace}
embedding[2] = {name = 'nn.Transpose', permutations = {{2, 3}}}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true}
temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024}
temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[31] = {name = 'nn.LogSoftMax'}
config.variation['large'] =
   {embedding = embedding, temporal = temporal, length = 512}

-- Small model configuration
local embedding = {}
embedding[1] = {name = 'nn.LookupTable', nIndex = 65537, nOutput = 256,
                paddingValue = config.train_data.replace}
embedding[2] = {name = 'nn.Transpose', permutations = {{2, 3}}}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true}
temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024}
temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[21] = {name = 'nn.LogSoftMax'}
config.variation['small'] =
   {embedding = embedding, temporal = temporal, length = 486}

-- Trainer settings
config.train = {}
config.train.momentum = 0.9
config.train.decay = 1e-5
-- These are just multipliers to config.driver.rate
-- For every config.driver.schedule * config.driver.steps
config.train.rates =
   {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024}

-- Tester settings
config.test = {}

-- Visualizer settings
config.visualizer = {}
config.visualizer.width = 1200
config.visualizer.scale = 4
config.visualizer.height = 64

-- Driver configurations
config.driver = {}
config.driver.type = 'torch.CudaTensor'
config.driver.device = 1
config.driver.loss = 'nn.ClassNLLCriterion'
config.driver.variation = 'large'
config.driver.dimension = 65537
config.driver.steps = 100000
config.driver.epoches = 100
config.driver.schedule = 8
config.driver.rate = 1e-5
config.driver.interval = 5
config.driver.location = 'models/dianping/temporal12length512feature256'
config.driver.plot = true
config.driver.visualize = true
config.driver.debug = false
config.driver.resume = false

-- Main configuration
config.joe = {}

return config


================================================
FILE: embednet/data.lua
================================================
--[[
Data class for Embedding Net
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local torch = require('torch')

local parent = require('glyphnet/data')

local Data = class(parent)

-- Constructor for Data
-- config: configuration table
--   .file: file for data
--   .batch: batch of data
--   .replace: the code to for replacing padding space
function Data:_init(config)
   self.data = torch.load(config.file)
   self.length = config.length or 512
   self.batch = config.batch or 16
   self.replace = config.replace or 65537
   self.shift = config.shift or 0
end

function Data:initSample(sample, label)
   local sample = sample or torch.Tensor(self.batch, self.length)
   local label = label or torch.Tensor(self.batch)
   sample:fill(self.replace)
   return sample, label
end

function Data:index(sample, class, item)
   local code, code_value = self.data.code, self.data.code_value
   local position = 1

   for field = 1, code[class][item]:size(1) do
      -- Break if current position is larger than sample length
      if position > sample:size(1) then
         break
      end
      -- Determine the actual length
      local length = code[class][item][field][2]
      if position + length - 1 > sample:size(1) then
         length = sample:size(1) - position + 1
      end
      -- Copy the data over
      if length > 0 then
         sample:narrow(1, position, length):copy(
            code_value:narrow(1, code[class][item][field][1], length)):add(
            self.shift)
      end
      -- Increment the position value
      position = position + length
   end

   return sample
end

return Data


================================================
FILE: embednet/driver.lua
================================================
--[[
Driver for EmbedNet training
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')

local parent = require('glyphnet/driver')
local Driver = class(parent)

-- Initialize variation
function Driver:initVariation()
   print('Driver using model variation '..self.variation)
   self.options.model.embedding =
      self.options.variation[self.variation].embedding
   self.options.model.temporal = self.options.variation[self.variation].temporal

   print('Driver adjusting data length to '..
            self.options.variation[self.variation].length)
   self.options.train_data.length =
      self.options.variation[self.variation].length
   self.options.test_data.length =
      self.options.variation[self.variation].length

   self.dimension = self.options.driver.dimension
   print('Driver adjusting data index dimension to '..self.dimension)
   self.options.model.embedding[1].nIndex = self.dimension
   self.options.model.embedding[1].paddingValue =
      self.options.train_data.replace
end

-- Visualize the model
function Driver:visualizeModel()
   local Visualizer = require('visualizer')
   self.options.visualizer.title = 'Embedding model'
   self.embedding_visualizer = self.embedding_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = 'Temporal model'
   self.temporal_visualizer = self.temporal_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = nil

   self.embedding_visualizer:drawSequential(self.model.embedding)
   self.temporal_visualizer:drawSequential(self.model.temporal)
end

return Driver


================================================
FILE: embednet/model.lua
================================================
--[[
Model for EmbedNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local nn = require('nn')

local parent = require('glyphnet/model')

local Model = class(parent)

-- Model constructor
-- config: configuration table
--   .embedding: configuration table of the embedding model
--   .temporal: configuration table of the temporal model
--   .file: the model file to load
--   .pretrain: whether the keep the embedding pretrained
--   .embedding_file: the file for pretrained embedding model
--   .cudnn: whether to use NVidia CUDNN
function Model:_init(config)
   -- Read or create model
   if config.file then
      local model = torch.load(config.file)
      self.embedding = self:makeCleanSequential(model.embedding)
      self.temporal = self:makeCleanSequential(model.temporal)
   else
      if config.embedding_file then
         self.embedding = self:makeCleanSequential(
            torch.load(config.embedding_file))
      else
         self.embedding = self:createCleanSequential(config.embedding)
         self:initSequential(self.embedding)
      end
      self.temporal = self:createCleanSequential(config.temporal)
      self:initSequential(self.temporal)
   end

   -- Saving configurations
   self.pretrain = config.pretrain
   self.cudnn = config.cudnn
   self.config = config
   self.tensortype = torch.getdefaulttensortype()
end

function Model:forward(input)
   self.feature = self.embedding:forward(input)
   self.output = self.temporal:forward(self.feature)
   return self.output
end

function Model:backward(input, grad_output)
   self.grad_feature = self.temporal:backward(self.feature, grad_output)
   if self.pretrain then
      return self.grad_feature
   else
      self.grad_input = self.embedding:backward(input, self.grad_feature)
      return self.grad_input
   end
end

function Model:getParameters()
   return nn.Module.getParameters(self)
end

function Model:parameters()
   local parameters, gradients = {}, {}

   if not self.pretrain then
      local embedding_parameters, embedding_gradients =
         self.embedding:parameters()
      for i = 1, #embedding_parameters do
         parameters[#parameters + 1] = embedding_parameters[i]
         gradients[#gradients + 1] = embedding_gradients[i]
      end
   end

   local temporal_parameters, temporal_gradients = self.temporal:parameters()
   for i = 1, #temporal_parameters do
      parameters[#parameters + 1] = temporal_parameters[i]
      gradients[#gradients + 1] = temporal_gradients[i]
   end

   return parameters, gradients
end

function Model:type(tensortype)
   if tensortype ~= nil and tensortype ~= self.tensortype then
      if tensortype == 'torch.CudaTensor' then
         require('cunn')
         self.embedding = self:makeCudaSequential(self.embedding)
         self.temporal = self:makeCudaSequential(self.temporal)
      else
         self.embedding = self:makeCleanSequential(self.embedding)
         self.temporal = self:makeCleanSequential(self.temporal)
      end
      self.embedding:type(tensortype)
      self.temporal:type(tensortype)
      self.tensortype = tensortype
   end

   return self.tensortype
end

function Model:setMode(mode)
   self:setModeSequential(self.embedding, mode)
   self:setModeSequential(self.temporal, mode)
end


function Model:save(file)
   local embedding = self:clearSequential(
      self:makeCleanSequential(self.embedding))
   local temporal = self:clearSequential(
      self:makeCleanSequential(self.temporal))
   torch.save(file, {embedding = embedding, temporal = temporal})
end

Model.initModule['nn.LookupTable'] = function (self, m)
   m.weight:normal(0, math.sqrt(1 / m.weight:size(2)))
   if m.paddingValue > 0 then
      m.weight[m.paddingValue]:zero()
   end
end
Model.initModule['nn.Transpose'] = function (self, m) end

Model.setModeModule['train']['nn.LookupTable'] = function (self, m) end
Model.setModeModule['train']['nn.Transpose'] = function (self, m) end

Model.setModeModule['test']['nn.LookupTable'] = function(self, m) end
Model.setModeModule['test']['nn.Transpose'] = function(self, m) end

Model.createCleanModule['nn.LookupTable'] = function (self, m)
   return nn.LookupTable(m.nIndex, m.nOutput, m.paddingValue)
end
Model.createCleanModule['nn.Transpose'] = function (self, m)
   return nn.Transpose(unpack(m.permutations))
end

Model.makeCleanModule['nn.LookupTable'] = function(self, m)
   local new = nn.LookupTable(
      m.weight:size(1), m.weight:size(2), m.paddingValue)
   new.weight:copy(m.weight)
   return new
end
Model.makeCleanModule['nn.Transpose'] = function (self, m)
   return nn.Transpose(unpack(m.permutations))
end

Model.makeCudaModule['nn.LookupTable'] = function (self, m)
   local new = nn.LookupTable(
      m.weight:size(1), m.weight:size(2), m.paddingValue)
   new.weight:copy(m.weight)
   return new
end
Model.makeCudaModule['nn.Transpose'] = function (self, m)
   return nn.Transpose(unpack(m.permutations))
end

return Model


================================================
FILE: embednet/unittest/data.lua
================================================
--[[
Unit test for EmbedNet data component
Copyright 2016 Xiang Zhang
--]]

local Data = require('data')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.length = 512
   config.test_data.length = 512

   print('Creating testing data object')
   local data = Data(config.test_data)

   self.config = config
   self.data = data
end

function joe:printSample(sample, label, count)
   local count = count or sample:size(1)
   for i = 1, count do
      io.write(label[i], ':')
      for j = 1, sample:size(2) do
         io.write(' ', sample[i][j])
      end
      io.write('\n')
   end
   io.flush()
end

function joe:getBatchTest()
   local data = self.data
   print('Getting a batch')
   local sample, label = data:getBatch()
   self:printSample(sample, label)
   print('Getting a second batch')
   sample, label = data:getBatch(sample, label)
   self:printSample(sample, label)
end

function joe:iteratorTest()
   local data = self.data
   for sample, label, count in data:iterator() do
      io.write(count, ':')
      for i = 1, count do
         io.write(' ', label[i])
      end
      io.write('\n')
      io.flush()
   end
end

joe.main()
return joe


================================================
FILE: embednet/unittest/driver.lua
================================================
--[[
Unit test for EmbedNet driver component
Copyright 2016 Xiang Zhang
--]]

local Driver = require('driver')

--  A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Creating driver')
   config.train_data.file = 'data/dianping/unittest_code.t7b'
   config.test_data.file = 'data/dianping/unittest_code.t7b'
   config.driver.debug = true
   config.driver.device = 3
   config.driver.steps = 10
   config.driver.epoches = 5
   local driver = Driver(config, config.driver)

   self.config = config
   self.driver = driver
end

function joe:driverTest()
   local driver = self.driver
   print('Testing driver')
   driver:run()
end

joe.main()
return joe


================================================
FILE: embednet/unittest/model.lua
================================================
--[[
Unit Test for EmbedNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal

   local model = Model(config.model)
   print('Embedding model:')
   print(model.embedding)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(2, 512):mul(65537):ceil()
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size())
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Embedding model')
   print(loaded.embedding)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: embednet/unittest/model_cudnn.lua
================================================
--[[
Unit Test for EmbedNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   config.model.cudnn = true

   local model = Model(config.model)
   model:cuda()
   print('Embedding model:')
   print(model.embedding)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(16, 512):mul(65537):ceil():cuda()
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   cutorch.synchronize()
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size()):cuda()
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   cutorch.synchronize()
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Embedding model')
   print(loaded.embedding)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: embednet/unittest/model_cunn.lua
================================================
--[[
Unit Test for EmbedNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   config.model.cudnn = nil

   local model = Model(config.model)
   model:cuda()
   print('Embedding model:')
   print(model.embedding)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(16, 512):mul(65537):ceil():cuda()
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   cutorch.synchronize()
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size()):cuda()
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   cutorch.synchronize()
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Embedding model')
   print(loaded.embedding)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: embednet/unittest/test.lua
================================================
--[[
Unit test for EmbedNet test component
Copyright 2015-2016 Xiang Zhang
--]]

local Test = require('test')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: embednet/unittest/test_cuda.lua
================================================
--[[
Unit test for EmbedNet test component
Copyright 2016 Xiang Zhang
--]]

local Test = require('test')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: embednet/unittest/train.lua
================================================
--[[
Unit test for EmbedNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create trainer')
   for i, v in pairs(config.train.rates) do
      config.train.rates[i] = v * config.driver.rate
   end
   local train = Train(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 10 steps')
   train:run(100, callback)
end

function joe:callback()
   self.time = os.time()
   return function (train, i)
      if os.difftime(os.time(), self.time) >= 5 then
         print('stp: '..train.step..', rat: '..train.rate..
                  ', err: '..train.error..', obj: '..train.objective..
                  ', dat: '..train.time.data..', fwd: '..train.time.forward..
                  ', bwd: '..train.time.backward..', upd: '..train.time.update)
         self.time = os.time()
      end
   end
end

joe.main()
return joe


================================================
FILE: embednet/unittest/train_cuda.lua
================================================
--[[
Unit test for EmbedNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.embedding = config.variation['large'].embedding
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create trainer')
   for i, v in pairs(config.train.rates) do
      config.train.rates[i] = v * config.driver.rate
   end
   local train = Train(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 100000 steps')
   train:run(100000, callback)
end

function joe:callback()
   self.time = os.time()
   return function (train, i)
      if os.difftime(os.time(), self.time) >= 5 then
         print('stp: '..train.step..', rat: '..train.rate..
                  ', err: '..train.error..', obj: '..train.objective..
                  ', dat: '..train.time.data..', fwd: '..train.time.forward..
                  ', bwd: '..train.time.backward..', upd: '..train.time.update)
         self.time = os.time()
      end
   end
end

joe.main()
return joe


================================================
FILE: embednet/visualizer.lua
================================================
--[[
Visualization module for EmbedNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')

local parent = require('glyphnet/visualizer')
local Visualizer = class(parent)

Visualizer.drawModule['nn.LookupTable'] = Visualizer.drawModule['nn.Linear']

return Visualizer


================================================
FILE: fasttext/archive/11stbinary_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charbigram;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charbigram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charbigram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charpentagram;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charpentagram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charpentagram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charunigram;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charunigram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/charunigram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigram;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigramroman;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordbigramroman_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagram;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagramroman;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordpentagramroman_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigram;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigram_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigram_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigramroman;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stbinary_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stbinary/wordunigramroman_tuned;
TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charbigram;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charbigram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charbigram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charpentagram;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charpentagram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charpentagram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charunigram;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charunigram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/charunigram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigram;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigramroman;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordbigramroman_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagram;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagramroman;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordpentagramroman_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigram;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigram_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigram_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigramroman;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigramroman_evaluation;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/11stfull_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/11stfull/wordunigramroman_tuned;
TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt;
TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charbigram;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charbigram_evaluation;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charbigram_tuned;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charpentagram;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charpentagram_evaluation;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charpentagram_tuned;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charunigram;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charunigram_evaluation;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/charunigram_tuned;
TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordbigram;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordbigram_evaluation;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordbigram_tuned;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordpentagram;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordpentagram_evaluation;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordpentagram_tuned;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordunigram;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordunigram_evaluation;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonbinary_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonbinary/wordunigram_tuned;
TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charbigram;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charbigram_evaluation;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charbigram_tuned;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charpentagram;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charpentagram_evaluation;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charpentagram_tuned;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charunigram;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charunigram_evaluation;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/charunigram_tuned;
TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordbigram;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordbigram_evaluation;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordbigram_tuned;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordpentagram;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordpentagram_evaluation;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordpentagram_tuned;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordunigram;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordunigram_evaluation;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/amazonfull_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/amazonfull/wordunigram_tuned;
TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charbigram;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charbigram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charbigram_tuned;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charpentagram;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charpentagram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charpentagram_tuned;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charunigram;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charunigram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/charunigram_tuned;
TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigram;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigram_tuned;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigramroman;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigramroman_evaluation;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordbigramroman_tuned;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagram;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagram_tuned;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagramroman;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagramroman_evaluation;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordpentagramroman_tuned;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigram;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigram_evaluation;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigram_tuned;
TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigramroman;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigramroman_evaluation;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/chinanews_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/chinanews/wordunigramroman_tuned;
TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charbigram;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charbigram_evaluation;
TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charbigram_tuned;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charpentagram;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charpentagram_evaluation;
TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charpentagram_tuned;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charunigram;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charunigram_evaluation;
TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/charunigram_tuned;
TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt;
TEST_DATA=data/dianping/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigram;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigram_evaluation;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigram_tuned;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigramroman;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigramroman_evaluation;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordbigramroman_tuned;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagram;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagram_evaluation;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagram_tuned;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagramroman;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagramroman_evaluation;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordpentagramroman_tuned;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigram;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigram_evaluation;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigram_tuned;
TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigramroman;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigramroman_evaluation;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/dianping_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/dianping/wordunigramroman_tuned;
TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charbigram;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charbigram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charbigram_tuned;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charpentagram;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charpentagram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charpentagram_tuned;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charunigram;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charunigram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/charunigram_tuned;
TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigram;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigram_tuned;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigramroman;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigramroman_evaluation;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordbigramroman_tuned;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagram;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagram_tuned;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagramroman;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagramroman_evaluation;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordpentagramroman_tuned;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigram;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigram_evaluation;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigram_tuned;
TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigramroman;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigramroman_evaluation;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/ifeng_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/ifeng/wordunigramroman_tuned;
TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charbigram;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charbigram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charbigram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charpentagram;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charpentagram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charpentagram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charunigram;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charunigram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/charunigram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigram;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigramroman;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordbigramroman_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagram;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagramroman;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordpentagramroman_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigram;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigram_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigram_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigramroman;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdbinary_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdbinary/wordunigramroman_tuned;
TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charbigram;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charbigram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charbigram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charpentagram;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charpentagram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charpentagram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charunigram;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charunigram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/charunigram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigram;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigramroman;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordbigramroman_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagram;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagramroman;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordpentagramroman_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigram;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigram_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigram_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigramroman;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigramroman_evaluation;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jdfull_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jdfull/wordunigramroman_tuned;
TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt;
TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charbigram;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charbigram_evaluation;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charbigram_tuned;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charpentagram;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charpentagram_evaluation;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charpentagram_tuned;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charunigram;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charunigram_evaluation;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/charunigram_tuned;
TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigram;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigram_evaluation;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigram_tuned;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigramroman;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigramroman_evaluation;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordbigramroman_tuned;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagram;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagram_evaluation;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagram_tuned;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagramroman;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagramroman_evaluation;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordpentagramroman_tuned;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigram;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigram_evaluation;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigram_tuned;
TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigramroman;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigramroman_evaluation;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointbinary_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointbinary/wordunigramroman_tuned;
TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charbigram;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charbigram_evaluation;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charbigram_tuned;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charpentagram;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charpentagram_evaluation;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charpentagram_tuned;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charunigram;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charunigram_evaluation;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/charunigram_tuned;
TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt;
TEST_DATA=data/joint/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigram;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigram_evaluation;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigram_tuned;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigramroman;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigramroman_evaluation;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordbigramroman_tuned;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagram;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagram_evaluation;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagram_tuned;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagramroman;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagramroman_evaluation;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordpentagramroman_tuned;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigram;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigram_evaluation;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigram_tuned;
TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigramroman;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigramroman_evaluation;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/jointfull_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/jointfull/wordunigramroman_tuned;
TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt;
TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charbigram;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charbigram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charbigram_tuned;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charpentagram;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charpentagram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charpentagram_tuned;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charunigram;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charunigram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/charunigram_tuned;
TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordbigram;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordbigram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordbigram_tuned;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordpentagram;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordpentagram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordpentagram_tuned;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordunigram;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordunigram_evaluation;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/nytimes_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/nytimes/wordunigram_tuned;
TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt;
TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charbigram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charbigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charbigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charpentagram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charpentagram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charpentagram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charunigram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charunigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/charunigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigramroman;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordbigramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagramroman;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordpentagramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigram;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigramroman;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenbinary_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenbinary/wordunigramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charbigram;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charbigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charbigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charpentagram;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charpentagram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charpentagram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charunigram;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charunigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_charunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/charunigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigram;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigramroman;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordbigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordbigramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagram;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagramroman;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordpentagramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordpentagramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigram;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigram_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigram_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigram_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigram_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigramroman;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigramroman_evaluation.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigramroman_evaluation;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt;
TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10;
fasttext test $LOCATION/model_2.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10;
fasttext test $LOCATION/model_5.bin $TEST_DATA;
fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model_10.bin $TEST_DATA;


================================================
FILE: fasttext/archive/rakutenfull_wordunigramroman_tuned.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2017 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

LOCATION=models/rakutenfull/wordunigramroman_tuned;
TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt;
TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt;

fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10;
fasttext test $LOCATION/model.bin $TRAIN_DATA;
fasttext test $LOCATION/model.bin $TEST_DATA;


================================================
FILE: glyphnet/archive/11stbinary_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/11stbinary/spatial6temporal8length486feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/11stbinary_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stbinary/spatial8temporal12length512feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/11stfull_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/11stfull/spatial6temporal8length486feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/11stfull_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/11stfull/spatial8temporal12length512feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/amazonbinary_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/amazonbinary/spatial6temporal8length486feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/amazonbinary_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonbinary/spatial8temporal12length512feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/amazonfull_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/amazonfull/spatial6temporal8length486feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/amazonfull_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/amazonfull/spatial8temporal12length512feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/chinanews_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/chinanews/spatial6temporal8length486feature256 -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/chinanews_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/chinanews/spatial8temporal12length512feature256 -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/dianping_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/spatial6temporal8length486feature256 "$@";


================================================
FILE: glyphnet/archive/dianping_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua "$@";


================================================
FILE: glyphnet/archive/ifeng_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/ifeng/spatial6temporal8length486feature256 -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/ifeng_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/ifeng/spatial8temporal12length512feature256 -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/jdbinary_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jdbinary/spatial6temporal8length486feature256 -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/jdbinary_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdbinary/spatial8temporal12length512feature256 -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/jdfull_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jdfull/spatial6temporal8length486feature256 -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/jdfull_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jdfull/spatial8temporal12length512feature256 -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/jointbinary_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jointbinary/spatial6temporal8length486feature256 -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_steps 400000 "$@";


================================================
FILE: glyphnet/archive/jointbinary_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointbinary/spatial8temporal12length512feature256 -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_steps 400000 "$@";


================================================
FILE: glyphnet/archive/jointfull_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/jointfull/spatial6temporal8length486feature256 -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_steps 400000 "$@";


================================================
FILE: glyphnet/archive/jointfull_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/jointfull/spatial8temporal12length512feature256 -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_steps 400000 "$@";


================================================
FILE: glyphnet/archive/nytimes_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/nytimes/spatial6temporal8length486feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/nytimes_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/nytimes/spatial8temporal12length512feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@";


================================================
FILE: glyphnet/archive/rakutenbinary_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/rakutenbinary/spatial6temporal8length486feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/rakutenbinary_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenbinary/spatial8temporal12length512feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/rakutenfull_spatial6temporal8length486feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/rakutenfull/spatial6temporal8length486feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/archive/rakutenfull_spatial8temporal12length512feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/rakutenfull/spatial8temporal12length512feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@";


================================================
FILE: glyphnet/config.lua
================================================
--[[
Configuration for GlyphNet
Copyright Xiang Zhang 2015-2016
--]]

-- Name space
local config = {}

-- Training data configurations
config.train_data = {}
config.train_data.file = 'data/dianping/train_code.t7b'
config.train_data.unifont = 'unifont/unifont-8.0.01.t7b'
config.train_data.batch = 16

-- Testing data configurations
config.test_data = {}
config.test_data.file = 'data/dianping/test_code.t7b'
config.test_data.unifont = 'unifont/unifont-8.0.01.t7b'
config.test_data.batch = 16

-- Model configurations
config.model = {}
config.model.cudnn = true
config.model.group = 16

-- Model variations configuration
config.variation = {}

-- Large network configuration
local spatial = {}
spatial[1] = {name = 'nn.SpatialConvolution',
              nInputPlane = 1, nOutputPlane = 64,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[3] = {name = 'nn.SpatialConvolution',
              nInputPlane = 64, nOutputPlane = 64,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[5] = {name = 'nn.SpatialMaxPooling',
              kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0}
spatial[6] = {name = 'nn.SpatialConvolution',
              nInputPlane = 64, nOutputPlane = 128,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[8] = {name = 'nn.SpatialConvolution',
              nInputPlane = 128, nOutputPlane = 128,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[10] = {name = 'nn.SpatialMaxPooling',
               kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0}
spatial[11] = {name = 'nn.SpatialConvolution',
              nInputPlane = 128, nOutputPlane = 256,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[13] = {name = 'nn.SpatialConvolution',
              nInputPlane = 256, nOutputPlane = 256,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[15] = {name = 'nn.SpatialMaxPooling',
              kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0}
spatial[16] = {name = 'nn.Reshape', size = 1024, bachMode = true}
spatial[17] = {name = 'nn.Linear', inputSize = 1024, outputSize = 1024}
spatial[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[19] = {name = 'nn.Linear', inputSize = 1024, outputSize = 256}
spatial[20] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true}
temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024}
temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[31] = {name = 'nn.LogSoftMax'}
config.variation['large'] =
   {spatial = spatial, temporal = temporal, length = 512}

-- Small network configuration
local spatial = {}
spatial[1] = {name = 'nn.SpatialConvolution',
              nInputPlane = 1, nOutputPlane = 64,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 2, padH = 2}
spatial[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[3] = {name = 'nn.SpatialConvolution',
              nInputPlane = 64, nOutputPlane = 64,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[5] = {name = 'nn.SpatialMaxPooling',
              kW = 3, kH = 3, dW = 3, dH = 3, padW = 0, padH = 0}
spatial[6] = {name = 'nn.SpatialConvolution',
              nInputPlane = 64, nOutputPlane = 128,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[8] = {name = 'nn.SpatialConvolution',
              nInputPlane = 128, nOutputPlane = 128,
              kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1}
spatial[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[10] = {name = 'nn.SpatialMaxPooling',
               kW = 3, kH = 3, dW = 3, dH = 3, padW = 0, padH = 0}
spatial[11] = {name = 'nn.Reshape', size = 512, bachMode = true}
spatial[12] = {name = 'nn.Linear', inputSize = 512, outputSize = 256}
spatial[13] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
spatial[14] = {name = 'nn.Linear', inputSize = 256, outputSize = 256}
spatial[15] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true}
temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024}
temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[21] = {name = 'nn.LogSoftMax'}
config.variation['small'] =
   {spatial = spatial, temporal = temporal, length = 486}

-- Trainer settings
config.train = {}
config.train.momentum = 0.9
config.train.decay = 1e-5
-- These are just multipliers to config.driver.rate
-- For every config.driver.schedule * config.driver.steps
config.train.rates =
   {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024}

-- Tester settings
config.test = {}

-- Visualizer settings
config.visualizer = {}
config.visualizer.width = 1200
config.visualizer.scale = 4
config.visualizer.height = 64

-- Driver configurations
config.driver = {}
config.driver.type = 'torch.CudaTensor'
config.driver.device = 1
config.driver.loss = 'nn.ClassNLLCriterion'
config.driver.variation = 'large'
config.driver.steps = 100000
config.driver.epoches = 100
config.driver.schedule = 8
config.driver.rate = 1e-5
config.driver.interval = 5
config.driver.location = 'models/dianping/spatial8temporal12length512feature256'
config.driver.plot = true
config.driver.visualize = true
config.driver.debug = false
config.driver.resume = false

-- Main configuration
config.joe = {}

return config


================================================
FILE: glyphnet/data.lua
================================================
--[[
Data program for GlyphNet
Copyright 2015-2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local torch = require('torch')

local Data = class()

-- Constructor for Data
-- config: configuration table
--   .file: the data file location
--   .unifont: the unifont data location
--   .length: the text length in the data
--   .batch: the batch size
function Data:_init(config)
   self.data = torch.load(config.file)
   self.unifont = torch.load(config.unifont or 'unifont/unifont-8.0.01.t7b')
   self.length = config.length or 512
   self.batch = config.batch or 16
end

function Data:getClasses()
   return #self.data.code
end

function Data:getBatch(sample, label)
   local code, code_value = self.data.code, self.data.code_value
   local sample, label = self:initSample(sample, label)

   -- Loop over batch dimension
   for i = 1, sample:size(1) do
      local class = torch.random(#code)
      local item = torch.random(code[class]:size(1))

      -- Assign sample
      self:index(sample[i], class, item)
      -- Assign label
      label[i] = class
   end

   return sample, label
end

function Data:iterator(sample, label)
   local code, code_value = self.data.code, self.data.code_value
   local sample, label = self:initSample(sample, label)

   local class = 1
   local item = 1
   local count = 0

   return function()
      if code[class] == nil then return end

      sample, label = self:initSample(sample, label)
      count = 0
      for i = 1, sample:size(1) do
         if item > code[class]:size(1) then
            class = class + 1
            item = 1
            if code[class] == nil then
               if count > 0 then
                  break
               else
                  return
               end
            end
         end
         self:index(sample[i], class, item)
         label[i] = class
         count = count + 1
         item = item + 1
      end

      return sample, label, count
   end
end

function Data:initSample(sample, label)
   local height, width = self.unifont:size(3), self.unifont:size(2)
   local sample = sample or
      torch.Tensor(self.batch, self.length, height, width)
   local label = label or torch.Tensor(self.batch)
   sample:zero()
   return sample, label
end

function Data:index(sample, class, item)
   local code, code_value = self.data.code, self.data.code_value
   local position = 1
   for field = 1, code[class][item]:size(1) do
      -- Break if current position is larger than sample length
      if position > sample:size(1) then
         break
      end
      -- Determine the actual length
      local length = code[class][item][field][2]
      if position + length - 1 > sample:size(1) then
         length = sample:size(1) - position + 1
      end
      -- Copy the data over
      sample:narrow(1, position, length):index(
         self.unifont, 1, code_value:narrow(
            1, code[class][item][field][1], length))
      position = position + length
   end

   return sample
end

return Data


================================================
FILE: glyphnet/driver.lua
================================================
--[[
Driver for GlyphNet training
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local nn = require('nn')
local os = require('os')
local paths = require('paths')
local torch = require('torch')

local Data = require('data')
local Model = require('model')
local Train = require('train')
local Test = require('test')

local Driver = class()

-- Constructor for driver
-- options: configuration table for other classes
-- config: configuration table for driver
--   .type: tensor type to do computation
--   .device: device id for CUDA. Only valid for .type = 'torch.CudaTensor'
--   .loss: the loss class to be used
--   .variation: the variation of the model
--   .steps: number of steps for each epoch
--   .epoches: number of epoches
--   .rate: initial learning rate
--   .schedule: rate change schedule
--   .interval: print time interval
--   .location: save location
--   .plot: whether to plot the result
--   .visualize: whether to visualize the models
--   .debug: whether to do debugging
--   .resume: whether to do resumption
function Driver:_init(options, config)
   local config = config or {}
   self.type = config.type or 'torch.DoubleTensor'
   self.device = config.device or 1
   self.loss = config.loss or 'nn.ClassNLLCriterion'
   self.variation = config.variation or 'large'
   self.steps = config.steps or 100000
   self.epoches = config.epoches or 100
   self.rate = config.rate or 1e-3
   self.schedule = config.schedule or 8
   self.interval = config.interval or 5
   self.location = config.location or '.'
   self.plot = config.plot
   self.visualize = config.visualize
   self.debug = config.debug
   self.resume = config.resume
   self.options = options

   -- Update the rates for training
   local rates = {}
   for i, v in pairs(self.options.train.rates) do
      rates[(i - 1) * self.steps * self.schedule + 1] = v * self.rate
      self.options.train.rates = rates
   end

   -- CUDA settings
   if self.type == 'torch.CudaTensor' then
      local cutorch = require('cutorch')
      print('Driver setting device to '..self.device)
      cutorch.setDevice(self.device)
   end

   -- Initialize random seed
   math.randomseed(os.time())
   torch.manualSeed(os.time())

   -- Handle model variation
   self:initVariation()

   -- Load data
   print('Driver loading training data')
   self.train_data = Data(self.options.train_data)
   print('Driver loading testing data')
   self.test_data = Data(self.options.test_data)

   -- Handle final output number of classes. Assuming last module is nn.Linear.
   local num_class = self.train_data:getClasses()
   for i = #self.options.model.temporal, 1, -1 do
      if self.options.model.temporal[i].name == 'nn.Linear' then
         print('Driver adjusting number of classes in model to '..num_class)
         self.options.model.temporal[i].outputSize = num_class
         break
      end
   end

   -- Handle resumption
   if self.resume then
      local record_file = paths.concat(self.location, 'record.t7b')
      print('Driver loading resumption from '..record_file)
      self.record = torch.load(record_file)

      local model_file = paths.concat(
         self.location, 'model_'..#self.record..'.t7b')
      print('Driver loading model from '..model_file)
      self.options.model.file = model_file
      self.model = Model(self.options.model)

      local state_file = paths.concat(
         self.location, 'state_'..#self.record..'.t7b')
      print('Driver loading training state from '..state_file)
      self.options.train.state = torch.load(state_file)
 
      print('Driver setting train step to '..(#self.record * self.steps))
      self.options.train.step = #self.record * self.steps

      for i = 1, #self.record do
         self:printResult(i)
      end
      if self.plot then
         self:plotRecord()
      end
   else
      self.record = {}
      print('Driver loading model')
      self.model = Model(self.options.model)
   end

   print('Driver setting model type to '..self.type)
   self.model:type(self.type)
   print('Driver loading trainer')
   self.trainer_loss = nn[self.loss:sub(4)]()
   self.trainer_loss:type(self.type)
   self.trainer = Train(
      self.train_data, self.model, self.trainer_loss, self.options.train)
   print('Driver loading tester for training data')
   self.train_tester_loss = nn[self.loss:sub(4)]()
   self.train_tester_loss:type(self.type)
   self.train_tester = Test(
      self.train_data, self.model, self.train_tester_loss, self.options.test)
   print('Driver loading tester for testing data')
   self.test_tester_loss = nn[self.loss:sub(4)]()
   self.test_tester_loss:type(self.type)
   self.test_tester = Test(
      self.test_data, self.model, self.test_tester_loss, self.options.test)

   if self.visualize then
      self:visualizeModel()
   end

   self.time = os.time()
end

-- Initialize variation
function Driver:initVariation()
   print('Driver using model variation '..self.variation)
   self.options.model.spatial = self.options.variation[self.variation].spatial
   self.options.model.temporal = self.options.variation[self.variation].temporal

   print('Driver adjusting data length to '..
            self.options.variation[self.variation].length)
   self.options.train_data.length =
      self.options.variation[self.variation].length
   self.options.test_data.length =
      self.options.variation[self.variation].length
end

-- Run the training process
function Driver:run()
   local begin_epoch = #self.record + 1
   local end_epoch = #self.record + self.epoches
   for i = begin_epoch, end_epoch do
      print('Driver setting model to training mode')
      self.model:setModeTrain()
      print('Driver training for epoch '..i)
      self.trainer:run(
         self.steps, function(train, step) self:logTrain(train, step) end)
      if self.visualize then
         self:visualizeModel()
      end

      print('Driver setting model to testing mode')
      self.model:setModeTest()
      print('Driver testing on training data for epoch '..i)
      self.train_tester:run(function(test, step) self:logTest(test, step) end)
      print('Driver testing on testing data for epoch '..i)

      self.test_tester:run(function(test, step) self:logTest(test, step) end)
      print('Driver saving for epoch '..i)
      self:save()
      self:printResult()
      if self.plot then
         self:plotRecord()
      end
   end
end

-- Save the record and the model
function Driver:save()
   local epoch = epoch or #self.record + 1

   -- Make a backup for the record
   print('Driver backing up record.t7b')
   local record_file = paths.concat(self.location, 'record.t7b')
   os.rename(record_file, record_file..'.backup')

   -- Save the new record
   print('Driver saving new records to '..record_file)
   self.record[epoch] = {
      train_loss = self.train_tester.total_objective,
      test_loss = self.test_tester.total_objective,
      train_error = self.train_tester.total_error,
      test_error = self.test_tester.total_error
   }
   torch.save(record_file, self.record)

   -- Save the model
   local model_file = paths.concat(self.location, 'model_'..epoch..'.t7b')
   print('Driver saving model to '..model_file)
   self.model:save(model_file)

   -- Save the training state
   local state_file = paths.concat(self.location, 'state_'..epoch..'.t7b')
   print('Driver saving training state to '..state_file)
   torch.save(state_file, self.trainer.state:type(torch.getdefaulttensortype()))
end

-- Print current result
function Driver:printResult(epoch)
   local epoch = epoch or #self.record
   print('Driver epoch = '..epoch..
            ', train_error = '..self.record[epoch].train_error..
            ', test_error = '..self.record[epoch].test_error..
            ', train_loss = '..self.record[epoch].train_loss..
            ', test_loss = '..self.record[epoch].test_loss)
end

-- Plot the record
function Driver:plotRecord()
   require('gnuplot')
   self.error_figure = self.error_figure or gnuplot.figure()
   self.loss_figure = self.loss_figure or gnuplot.figure()

   local epoch = torch.linspace(1, #self.record, #self.record)
   local train_error = torch.Tensor(epoch:size())
   local test_error = torch.Tensor(epoch:size())
   local train_loss = torch.Tensor(epoch:size())
   local test_loss = torch.Tensor(epoch:size())
   for i = 1, #self.record do
      train_error[i] = self.record[i].train_error
      test_error[i] = self.record[i].test_error
      train_loss[i] = self.record[i].train_loss
      test_loss[i] = self.record[i].test_loss
   end

   gnuplot.figure(self.error_figure)
   gnuplot.plot({'Training error', epoch, train_error},
                {'Testing error', epoch, test_error})
   gnuplot.title('Training and testing error')
   gnuplot.figure(self.loss_figure)
   gnuplot.plot({'Training loss', epoch, train_loss},
                {'Testing loss', epoch, test_loss})
   gnuplot.title('Training and testing loss')
end

-- Visualize the model
function Driver:visualizeModel()
   local Visualizer = require('visualizer')
   self.options.visualizer.title = 'Spatial model'
   self.spatial_visualizer = self.spatial_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = 'Temporal model'
   self.temporal_visualizer = self.temporal_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = nil

   self.spatial_visualizer:drawSequential(self.model.spatial)
   self.temporal_visualizer:drawSequential(self.model.temporal)
end

-- Log training
function Driver:logTrain(train, step)
   -- If it is not time to log, return
   if os.difftime(os.time(), self.time) < self.interval then return end

   local message = 'Train step = '..train.step..
      ', rate = '..string.format('%.2e', train.rate)..
      ', error = '..string.format('%.2e', train.error)..
      ', loss = '..string.format('%.2e', train.objective)..
      ', data = '..string.format('%.2e', train.time.data)..
      ', forward = '..string.format('%.2e', train.time.forward)..
      ', backward = '..string.format('%.2e', train.time.backward)..
      ', update = '..string.format('%.2e', train.time.update)

   if self.debug then
      message = message..
         ', input = ['..string.format("%.2e",train.input:min())..
         ' '..string.format("%.2e",train.input:max())..
         ' '..string.format("%.2e",train.input:mean())..
         ' '..string.format("%.2e",train.input:std())..']'..
         ', params = ['..string.format("%.2e",train.params:min())..
         ' '..string.format("%.2e",train.params:max())..
         ' '..string.format("%.2e",train.params:mean())..
         ' '..string.format("%.2e",train.params:std())..']'..
         ', grads = ['..string.format("%.2e",train.grads:min())..
         ' '..string.format("%.2e",train.grads:max())..
         ' '..string.format("%.2e",train.grads:mean())..
         ' '..string.format("%.2e",train.grads:std())..']'..
         ', state = ['..string.format("%.2e",train.state:min())..
         ' '..string.format("%.2e",train.state:max())..
         ' '..string.format("%.2e",train.state:mean())..
         ' '..string.format("%.2e",train.state:std())..']'

      if self.visualize then
         self:visualizeModel()
      end
   end

   print(message)
   self.time = os.time()
end

-- Log testing
function Driver:logTest(test)
   -- If it not time to log, return
   if os.difftime(os.time(), self.time) < self.interval then return end

   local message = 'Test count = '..test.total_count..
      ', error = '..string.format('%.2e', test.error)..
      ', loss = '..string.format('%.2e', test.objective)..
      ', total_error = '..string.format('%.2e', test.total_error)..
      ', total_loss = '..string.format('%.2e', test.total_objective)..
      ', data = '..string.format('%.2e', test.time.data)..
      ', forward = '..string.format('%.2e', test.time.forward)..
      ', update = '..string.format('%.2e', test.time.update)

   if self.debug then
      message = message..
         ', input = ['..string.format("%.2e",test.input:min())..
         ' '..string.format("%.2e",test.input:max())..
         ' '..string.format("%.2e",test.input:mean())..
         ' '..string.format("%.2e",test.input:std())..']'
   end

   print(message)
   self.time = os.time()
end

return Driver


================================================
FILE: glyphnet/main.lua
================================================
--[[
Main program for GlyphNet training
Copyright 2015 Xiang Zhang
--]]

local torch = require('torch')

local Driver = require('driver')

-- A Logic Named Joe
local joe = {}

function joe.main(arg)
   -- Load the configuration
   local config = dofile('config.lua')
   -- Build parameter table based on configuration
   local params = joe.buildArgumentTable(config)
   -- Parse arguments based on configuration
   config = joe.parseArguments(arg, params, config)
   -- Build the driver
   local driver = Driver(config, config.driver)
   -- Start the driver
   driver:run()
end

function joe.buildArgumentTable(config, params, prefix)
   local params = params or {}
   local prefix = prefix or ''
   for key, val in pairs(config) do
      if type(key) == 'string' then
         local val_type = type(val)
         if val_type == 'string' or val_type == 'number' then
            params[prefix..key] = val
         elseif val_type == 'boolean' then
            params[prefix..key] = tostring(val)
         elseif val_type == 'table' then
            params = joe.buildArgumentTable(val, params, prefix..key..'_')
         else
            print('Joe argument '..prefix..key..' type unsupported')
         end
      else
         print('Joe argument key '..prefix..tostring(key)..' not a string')
      end
   end
   return params
end

function joe.parseArguments(arg, params, config)
   local cmd = torch.CmdLine()
   for key, val in pairs(params) do
      cmd:option('-'..key, val)
   end

   local parsed = cmd:parse(arg)
   return joe.parseArgumentTable(config, parsed)
end

function joe.parseArgumentTable(config, params, prefix)
   local prefix = prefix or ''

   for key, val in pairs(config) do
      if type(key) == 'string' then
         local val_type = type(val)
         if val_type == 'string' or val_type == 'number' then
            config[key] = params[prefix..key] or val
         elseif val_type == 'boolean' then
            if params[prefix..key] == 'true' then
               config[key] = true
            elseif params[prefix..key] == 'false' then
               config[key] = false
            else
               error('Argument '..prefix..key..' must be true or false')
            end
         elseif val_type == 'table' then
            config[key] = joe.parseArgumentTable(val, params, prefix..key..'_')
         end
      end
   end
   return config
end

-- Call the main program
joe.main(arg)


================================================
FILE: glyphnet/model.lua
================================================
--[[
Model for GlyphNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local cudnn
local nn = require('nn')
local torch = require('torch')

local Modules = require('modules')

local Model = class()

-- Model constructor
-- config: configuration table
--   .spatial: configuration table of the spatial network
--   .temporal: configuration table of the temporal network
--   .file: (optional) the model file
--   .cudnn: (optional) whether to use NVidia cudnn
--   .group: (optional) number of spatial network groups
function Model:_init(config)
   -- Read or create model
   if config.file then
      local model = torch.load(config.file)
      self.spatial = self:makeCleanSequential(model.spatial)
      self.temporal = self:makeCleanSequential(model.temporal)
   else
      self.spatial = self:createCleanSequential(config.spatial)
      self.temporal = self:createCleanSequential(config.temporal)
      self:initSequential(self.spatial)
      self:initSequential(self.temporal)
   end

   -- Saving configurations
   self.cudnn = config.cudnn
   self.config = config
   self.tensortype = torch.getdefaulttensortype()

   -- Initialize intermediate values
   self.feature = torch.Tensor()
   self.feature_cache = torch.Tensor()
   self.grad_feature = torch.Tensor()
   self.grad_input = torch.Tensor()

   -- Initialize groups
   self:initGroup(config.group)
end

function Model:initGroup(group)
   local group = group or 1

   -- Clean current network group
   if self.group then
      self.group = nil
      collectgarbage()
   end

   -- Create new group
   self.group = {}
   for i = 1, group do
      self.group[i] = self.spatial:clone(
         'weight', 'bias', 'gradWeight', 'gradBias')
   end
end

function Model:forward(input)
   -- Do forward propagation for spatial model group
   local input_group = input:view(
      #self.group, -1, 1, input:size(3), input:size(4))
   local feature = self.group[1]:forward(input_group[1])
   self.feature_cache:resize(#self.group, feature:size(1), feature:size(2))
   self.feature_cache[1]:copy(feature)
   for i = 2, #self.group do
      local feature = self.group[i]:forward(input_group[i])
      self.feature_cache[i]:copy(feature)
   end

   -- Do forward propagation for temporal model
   self.feature:resize(
      input:size(1), self.feature_cache:size(3), input:size(2)):copy(
      self.feature_cache:view(
         input:size(1), input:size(2), self.feature_cache:size(3)):transpose(
         2, 3))
   self.output = self.temporal:forward(self.feature)

   return self.output
end

function Model:backward(input, grad_output)
   -- Do backward propagation for temporal model
   local grad_feature = self.temporal:backward(self.feature, grad_output)
   self.grad_feature:resizeAs(self.feature_cache):view(
      input:size(1), input:size(2), self.feature_cache:size(3)):copy(
      grad_feature:transpose(2, 3)):div(input:size(2))

   -- Do backward propagation for spatial model group
   local input_group = input:view(
      #self.group, -1, 1, input:size(3), input:size(4))
   self.grad_input:resizeAs(input)
   local grad_input_group = self.grad_input:view(
      #self.group, -1, 1, input:size(3), input:size(4))
   for i = 1, #self.group do
      local grad_input = self.group[i]:backward(
         input_group[i], self.grad_feature[i])
      grad_input_group[i]:copy(grad_input)
   end

   return self.grad_input
end

function Model:getParameters()
   local parameters, gradients = nn.Module.getParameters(self)
   self:initGroup(#self.group)
   return parameters, gradients
end

function Model:parameters()
   local parameters, gradients = {}, {}

   local spatial_parameters, spatial_gradients = self.spatial:parameters()
   for i = 1, #spatial_parameters do
      parameters[#parameters + 1] = spatial_parameters[i]
      gradients[#gradients + 1] = spatial_gradients[i]
   end

   local temporal_parameters, temporal_gradients = self.temporal:parameters()
   for i = 1, #temporal_parameters do
      parameters[#parameters + 1] = temporal_parameters[i]
      gradients[#gradients + 1] = temporal_gradients[i]
   end

   return parameters, gradients
end

function Model:type(tensortype)
   if tensortype ~= nil and tensortype ~= self.tensortype then
      if tensortype == 'torch.CudaTensor' then
         require('cunn')
         self.spatial = self:makeCudaSequential(self.spatial)
         self.temporal = self:makeCudaSequential(self.temporal)
      else
         self.spatial = self:makeCleanSequential(self.spatial)
         self.temporal = self:makeCleanSequential(self.temporal)
      end
      self.spatial:type(tensortype)
      self.temporal:type(tensortype)
      self.feature = self.feature:type(tensortype)
      self.feature_cache = self.feature_cache:type(tensortype)
      self.grad_feature = self.grad_feature:type(tensortype)
      self.grad_input = self.grad_input:type(tensortype)
      self.tensortype = tensortype
      self:initGroup(#self.group)
   end

   return self.tensortype
end

function Model:cuda()
   return self:type('torch.CudaTensor')
end

function Model:double()
   return self:type('torch.DoubleTensor')
end

function Model:float()
   return self:type('torch.FloatTensor')
end

function Model:setMode(mode)
   self:setModeSequential(self.temporal, mode)
   self:setModeSequential(self.spatial, mode)
   for i = 1, #self.group do
      self:setModeSequential(self.group[i], mode)
   end
end

function Model:setModeTrain()
   self:setMode('train')
end

function Model:setModeTest()
   self:setMode('test')
end

function Model:save(file)
   local spatial = self:clearSequential(
      self:makeCleanSequential(self.spatial))
   local temporal = self:clearSequential(
      self:makeCleanSequential(self.temporal))
   torch.save(file, {spatial = spatial, temporal = temporal})
end

-- Clear sequential model
function Model:clearSequential(sequential)
   local function recursiveClear(key, param)
      local param = param
      if torch.type(param) == 'table' then
         for k, v in pairs(param) do
            param[k] = recursiveClear(k, v)
         end
      elseif torch.isTensor(param) and key ~= 'weight' and key ~= 'bias' then
         param = param.new()
      end
      return param
   end

   for _, m in ipairs(sequential.modules) do
      for k, v in pairs(m) do
         m[k] = recursiveClear(k, v)
      end
   end

   return sequential
end

-- Initialize sequential using microsoft initialization
function Model:initSequential(sequential)
   for _, m in ipairs(sequential.modules) do
      self.initModule[torch.type(m)](self, m)
   end
end

-- Setting the mode of sequential modules
function Model:setModeSequential(sequential, mode)
   for _, m in ipairs(sequential.modules) do
      self.setModeModule[mode][torch.type(m)](self, m)
   end
end

-- Create a clean sequential
function Model:createCleanSequential(config)
   local new = nn.Sequential()
   for _, m in ipairs(config) do
      new:add(self.createCleanModule[m.name](self, m))
   end
   return new
end

-- Make a clean sequential
function Model:makeCleanSequential(sequential)
   local new = nn.Sequential()
   for _, m in ipairs(sequential.modules) do
      new:add(self.makeCleanModule[torch.type(m)](self, m))
   end
   return new
end

-- Make a CUDA sequential
function Model:makeCudaSequential(sequential)
   if self.cudnn then
      cudnn = require('cudnn')
   end
   local new = nn.Sequential()
   for _, m in ipairs(sequential.modules) do
      new:add(self.makeCudaModule[torch.type(m)](self, m))
   end
   return new
end

-- Initialize modules
Model.initModule = {}
Model.initModule['nn.LogSoftMax'] = function (self, m) end
Model.initModule['nn.Threshold'] = function (self, m) end
Model.initModule['nn.Reshape'] = function (self, m) end
Model.initModule['nn.Dropout'] = function (self, m) end
Model.initModule['nn.Linear'] = function (self, m)
   m.bias:zero()
   m.weight:normal(0, math.sqrt(2 / m.weight:size(1)))
end
Model.initModule['nn.SpatialConvolution'] = function (self, m)
   m.bias:zero()
   m.weight:normal(
      0, math.sqrt(2 / m.weight:size(1) / m.weight:size(3) / m.weight:size(4)))
end
Model.initModule['nn.SpatialMaxPooling'] = function (self, m) end
Model.initModule['nn.TemporalConvolutionMM'] = function (self, m)
   m.bias:zero()
   m.weight:normal(0, math.sqrt(2 / m.weight:size(1) / m.weight:size(3)))
end
Model.initModule['nn.TemporalMaxPoolingMM'] = function (self, m) end

-- Set module mode to train
Model.setModeModule = {}
Model.setModeModule['train'] = {}
Model.setModeModule['train']['nn.LogSoftMax'] = function (self, m) end
Model.setModeModule['train']['cudnn.LogSoftMax'] =
   Model.setModeModule['train']['nn.LogSoftMax']
Model.setModeModule['train']['nn.Threshold'] = function (self, m) end
Model.setModeModule['train']['nn.Reshape'] = function (self, m) end
Model.setModeModule['train']['nn.Dropout'] = function (self, m)
   m.train = true
end
Model.setModeModule['train']['nn.Linear'] = function (self, m) end
Model.setModeModule['train']['nn.SpatialConvolution'] = function (self, m) end
Model.setModeModule['train']['cudnn.SpatialConvolution'] =
   Model.setModeModule['train']['nn.SpatialConvolution']
Model.setModeModule['train']['nn.SpatialMaxPooling'] = function (self, m) end
Model.setModeModule['train']['cudnn.SpatialMaxPooling'] =
   Model.setModeModule['train']['nn.SpatialMaxPooling']
Model.setModeModule['train']['nn.TemporalConvolutionMM'] =
   function (self, m) end
Model.setModeModule['train']['cudnn.TemporalConvolutionCudnn'] =
   function (self, m) end
Model.setModeModule['train']['nn.TemporalMaxPoolingMM'] = function (self, m) end
Model.setModeModule['train']['cudnn.TemporalMaxPoolingCudnn'] =
   Model.setModeModule['train']['nn.TemporalMaxPoolingMM']

-- Set module mode to test
Model.setModeModule['test'] = {}
Model.setModeModule['test']['nn.LogSoftMax'] = function (self, m) end
Model.setModeModule['test']['cudnn.LogSoftMax'] =
   Model.setModeModule['test']['nn.LogSoftMax']
Model.setModeModule['test']['nn.Threshold'] = function (self, m) end
Model.setModeModule['test']['nn.Reshape'] = function (self, m) end
Model.setModeModule['test']['nn.Dropout'] = function (self, m)
   m.train = false
end
Model.setModeModule['test']['nn.Linear'] = function (self, m) end
Model.setModeModule['test']['nn.SpatialConvolution'] = function (self, m) end
Model.setModeModule['test']['cudnn.SpatialConvolution'] =
   Model.setModeModule['test']['nn.SpatialConvolution']
Model.setModeModule['test']['nn.SpatialMaxPooling'] = function (self, m) end
Model.setModeModule['test']['cudnn.SpatialMaxPooling'] =
   Model.setModeModule['test']['nn.SpatialMaxPooling']
Model.setModeModule['test']['nn.TemporalConvolutionMM'] =
   function (self, m) end
Model.setModeModule['test']['cudnn.TemporalConvolutionCudnn'] =
   function (self, m) end
Model.setModeModule['test']['nn.TemporalMaxPoolingMM'] = function (self, m) end
Model.setModeModule['test']['cudnn.TemporalMaxPoolingCudnn'] =
   Model.setModeModule['test']['nn.TemporalMaxPoolingMM']

-- Create clean modules
Model.createCleanModule = {}
Model.createCleanModule['nn.LogSoftMax'] = function (self, m)
   return nn.LogSoftMax()
end
Model.createCleanModule['nn.Threshold'] = function (self, m)
   return nn.Threshold(m.th, m.v, m.ip)
end
Model.createCleanModule['nn.Reshape'] = function (self, m)
   return nn.Reshape(m.size, m.batchMode)
end
Model.createCleanModule['nn.Dropout'] = function (self, m)
   return nn.Dropout(m.p, not m.v2, m.inplace)
end
Model.createCleanModule['nn.Linear'] = function (self, m)
   return nn.Linear(m.inputSize, m.outputSize, m.bias)
end
Model.createCleanModule['nn.SpatialConvolution'] = function (self, m)
   return nn.SpatialConvolution(
      m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
end
Model.createCleanModule['nn.SpatialMaxPooling'] = function (self, m)
   return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
end
Model.createCleanModule['nn.TemporalConvolutionMM'] = function (self, m)
   return nn.TemporalConvolutionMM(
      m.inputFrameSize, m.outputFrameSize, m.kW, m.dW, m.padW)
end
Model.createCleanModule['nn.TemporalMaxPoolingMM'] = function (self, m)
   return nn.TemporalMaxPoolingMM(m.kW, m.dW)
end

-- Make clean modules
Model.makeCleanModule = {}
Model.makeCleanModule['nn.LogSoftMax'] = function (self, m)
   return nn.LogSoftMax()
end
Model.makeCleanModule['cudnn.LogSoftMax'] =
   Model.makeCleanModule['nn.LogSoftMax']
Model.makeCleanModule['nn.Threshold'] = function (self, m)
   return nn.Threshold(m.threshold, m.val, m.inplace)
end
Model.makeCleanModule['nn.Reshape'] = function (self, m)
   return nn.Reshape(m.size, m.batchMode)
end
Model.makeCleanModule['nn.Dropout'] = function (self, m)
   return nn.Dropout(m.p, not m.v2, m.inplace)
end
Model.makeCleanModule['nn.Linear'] = function (self, m)
   local new = nn.Linear(m.weight:size(2), m.weight:size(1), m.bias)
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCleanModule['nn.SpatialConvolution'] = function (self, m)
   local new = nn.SpatialConvolution(
      m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCleanModule['cudnn.SpatialConvolution'] =
   Model.makeCleanModule['nn.SpatialConvolution']
Model.makeCleanModule['nn.SpatialMaxPooling'] = function (self, m)
   return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
end
Model.makeCleanModule['cudnn.SpatialMaxPooling'] =
   Model.makeCleanModule['nn.SpatialMaxPooling']
Model.makeCleanModule['nn.TemporalConvolutionMM'] = function (self, m)
   local new = nn.TemporalConvolutionMM(
      m.input_feature, m.output_feature, m.kernel, m.stride, m.pad)
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCleanModule['cudnn.TemporalConvolutionCudnn'] = function (self, m)
   local new = nn.TemporalConvolutionMM(
      m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW)
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCleanModule['nn.TemporalMaxPoolingMM'] = function (self, m)
   return nn.TemporalMaxPoolingMM(m.kW, m.dW)
end
Model.makeCleanModule['cudnn.TemporalMaxPoolingCudnn'] =
   Model.makeCleanModule['nn.TemporalMaxPoolingMM']

-- Make CUDA modules
Model.makeCudaModule = {}
Model.makeCudaModule['nn.LogSoftMax'] = function (self, m)
   if self.cudnn and cudnn.LogSoftMax then
      return cudnn.LogSoftMax()
   else
      return nn.LogSoftMax()
   end
end
Model.makeCudaModule['cudnn.LogSoftMax'] = Model.makeCudaModule['nn.LogSoftMax']
Model.makeCudaModule['nn.Threshold'] = function (self, m)
   return nn.Threshold(m.threshold, m.val, m.inplace)
end
Model.makeCudaModule['nn.Reshape'] = function (self, m)
   return nn.Reshape(m.size, m.batchMode)
end
Model.makeCudaModule['nn.Dropout'] = function (self, m)
   return nn.Dropout(m.p, not m.v2, m.inplace)
end
Model.makeCudaModule['nn.Linear'] = function (self, m)
   local new = nn.Linear(m.weight:size(2), m.weight:size(1), m.bias)
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCudaModule['nn.SpatialConvolution'] = function (self, m)
   local new
   if self.cudnn then
      new = cudnn.SpatialConvolution(
         m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
   else
      new = nn.SpatialConvolution(
         m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
   end
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCudaModule['cudnn.SpatialConvolution'] =
   Model.makeCudaModule['nn.SpatialConvolution']
Model.makeCudaModule['nn.SpatialMaxPooling'] = function (self, m)
   if self.cudnn then
      return cudnn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
   else
      return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH)
   end
end
Model.makeCudaModule['cudnn.SpatialMaxPooling'] =
   Model.makeCudaModule['nn.SpatialMaxPooling']
Model.makeCudaModule['nn.TemporalConvolutionMM'] = function (self, m)
   local new
   if self.cudnn then
      new = cudnn.TemporalConvolutionCudnn(
         m.input_feature, m.output_feature, m.kernel, m.stride, m.pad)
   else
      new = nn.TemporalConvolutionMM(
         m.input_feature, m.output_feature, m.kernel, m.stride, m.pad)
   end
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCudaModule['cudnn.TemporalConvolutionCudnn'] = function (self, m)
   local new
   if self.cudnn then
      new = cudnn.TemporalConvolutionCudnn(
         m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW)
   else
      new = nn.TemporalConvolutionMM(
         m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW)
   end
   new.weight:copy(m.weight)
   new.bias:copy(m.bias)
   return new
end
Model.makeCudaModule['nn.TemporalMaxPoolingMM'] = function (self, m)
   if self.cudnn then
      return cudnn.TemporalMaxPoolingCudnn(m.kW, m.dW)
   else
      return nn.TemporalMaxPoolingMM(m.kW, m.dW)
   end
end
Model.makeCudaModule['cudnn.TemporalMaxPoolingCudnn'] =
   Model.makeCudaModule['nn.TemporalMaxPoolingMM']

return Model


================================================
FILE: glyphnet/modules/TemporalConvolutionCudnn.lua
================================================
--[[
Temporal max pooling module .with data order consistent with MM
Copyright 2016 Xiang Zhang
--]]

local TemporalConvolutionCudnn, parent =
   torch.class('cudnn.TemporalConvolutionCudnn', 'cudnn.SpatialConvolution')

function TemporalConvolutionCudnn:__init(
      input_feature, output_feature, kW, dW, padW)
   parent.__init(self, input_feature, output_feature, kW, 1, dW, 1, padW, 0)
end

function TemporalConvolutionCudnn:updateOutput(input)
   local input_view

   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
   end

   local output = parent.updateOutput(self, input_view)

   if input:dim() ~= output:dim() then
      if input:dim() == 2 then
         self.output = output:view(output:size(1), output:size(3))
      else
         self.output = output:view(output:size(1), output:size(2), output:size(4))
      end
   end

   return self.output
end

function TemporalConvolutionCudnn:updateGradInput(input, grad_output)
   local input_view
   local grad_output_view
   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
      grad_output_view = grad_output:view(
         grad_output:size(1), 1, grad_output:size(2))
      self.output = self.output:view(
         self.output:size(1), 1, self.output:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
      grad_output_view = grad_output:view(
         grad_output:size(1), grad_output:size(2), 1, grad_output:size(3))
      self.output = self.output:view(
         self.output:size(1), self.output:size(2), 1, self.output:size(3))
   end

   local grad_input = parent.updateGradInput(self, input_view, grad_output_view)

   if self.gradInput:dim() ~= input:dim() then
      if input:dim() == 2 then
         self.output = self.output:view(
            self.output:size(1), self.output:size(3))
         self.gradInput = grad_input:view(grad_input:size(1), grad_input:size(3))
      else
         self.output = self.output:view(
            self.output:size(1), self.output:size(2), self.output:size(4))
         self.gradInput = grad_input:view(
            grad_input:size(1), grad_input:size(2), grad_input:size(4))
      end
   end

   return self.gradInput
end

function TemporalConvolutionCudnn:accGradParameters(input, grad_output, scale)
   local input_view
   local grad_output_view
   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
      grad_output_view = grad_output:view(
         grad_output:size(1), 1, grad_output:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
      grad_output_view = grad_output:view(
         grad_output:size(1), grad_output:size(2), 1, grad_output:size(3))
   end

   parent.accGradParameters(self, input_view, grad_output_view, scale)
end

function TemporalConvolutionCudnn:__tostring__()
   return string.format(
      '%s(%d -> %d, %d, %d, %d)',
      torch.type(self), self.nInputPlane, self.nOutputPlane,
      self.kW, self.dW, self.padW)
end


================================================
FILE: glyphnet/modules/TemporalConvolutionMM.lua
================================================
--[[
Temporal convolution module that supports padding
Copyright 2016 Xiang Zhang
--]]

local TemporalConvolutionMM, parent =
   torch.class('nn.TemporalConvolutionMM', 'nn.Module')

function TemporalConvolutionMM:__init(
      input_feature, output_feature, kernel, stride, pad)
   parent.__init(self)
   
   self.input_feature = input_feature
   self.output_feature = output_feature
   self.kernel = kernel
   self.stride = stride or 1
   self.pad = pad or 0
   
   self.weight = torch.Tensor(output_feature, input_feature, kernel)
   self.bias = torch.Tensor(output_feature)
   self.gradWeight = torch.Tensor(output_feature, input_feature, kernel)
   self.gradBias = torch.Tensor(output_feature)
   
   self.pad_cache = torch.Tensor()
   self.unfold_cache = torch.Tensor()
   self.interlace_cache = torch.Tensor()
   self.weight_cache = torch.Tensor(
      self.weight:size(2), self.weight:size(1), self.weight:size(3))
   self.reverse_index = torch.LongTensor(self.kernel)
   
   for i = 1, self.kernel do
      self.reverse_index[i] = self.kernel - i + 1
   end
   
   self:reset()
end

function TemporalConvolutionMM:reset(stdv)
   if stdv then
      stdv = stdv * math.sqrt(3)
   else
      stdv = 1/math.sqrt(self.kernel * self.input_feature)
   end
   self.weight:uniform(-stdv, stdv)
   self.bias:uniform(-stdv, stdv)
end

function TemporalConvolutionMM:updateOutput(input)
   if input:dim() ~= 2 and input:dim() ~= 3 then
      error('Input dimension must be 2 or 3')
   end
   
   -- Create temporary input cache that is to be unfolded
   if input:dim() == 2 then
      self.pad_cache:resize(
         input:size(1), input:size(2) + 2 * self.pad):zero():narrow(
         2, self.pad + 1, input:size(2)):copy(input)
   else
      self.pad_cache:resize(
         input:size(1), input:size(2),
         input:size(3) + 2 * self.pad):zero():narrow(
         3, self.pad + 1, input:size(3)):copy(input)
   end
   
   -- Unfold the input cache
   local unfolded = self.pad_cache:unfold(
      self.pad_cache:dim(), self.kernel, self.stride):transpose(
      self.pad_cache:dim(), self.pad_cache:dim() + 1)
   self.unfold_cache:resizeAs(unfolded):copy(unfolded)
   
   -- Do matrix multiplication
   if input:dim() == 2 then
      self.output:resize(
         self.output_feature, self.unfold_cache:size(3)):copy(
         self.bias:view(-1, 1):expandAs(self.output))
      self.output:addmm(
         1, self.output, 1,
         self.weight:view(self.weight:size(1), -1),
         self.unfold_cache:view(-1, self.unfold_cache:size(3)))
   else
      self.output:resize(
         self.unfold_cache:size(1), self.output_feature,
         self.unfold_cache:size(4)):copy(
         self.bias:view(1, -1, 1):expandAs(self.output))
      local weight = self.weight:view(
         1, self.weight:size(1),
         self.weight:size(2) * self.weight:size(3)):expand(
         self.unfold_cache:size(1), self.weight:size(1),
         self.weight:size(2) * self.weight:size(3))
      self.output:baddbmm(
         1, self.output, 1, weight,
         self.unfold_cache:view(
            self.unfold_cache:size(1), -1, self.unfold_cache:size(4)))
   end
   
   return self.output
end

function TemporalConvolutionMM:updateGradInput(input, grad_output)
   -- Reverse the weight on the kernel dimension
   self.weight_cache:indexCopy(
      3, self.reverse_index, self.weight:transpose(1, 2))
   
   -- Resize the initialize the interlace cache
   if input:dim() == 2 then
      self.interlace_cache:resize(
         grad_output:size(1),
         self.stride * (grad_output:size(2) - 1) + 1):zero()
      self.interlace_cache:narrow(
         2, 1, self.interlace_cache:size(2) - 1):unfold(
         2, self.stride, self.stride):select(3, 1):copy(
         grad_output:narrow(2, 1, grad_output:size(2) - 1))
      self.interlace_cache:select(2, self.interlace_cache:size(2)):copy(
         grad_output:select(2, grad_output:size(2)))
   else
      self.interlace_cache:resize(
         grad_output:size(1), grad_output:size(2),
         self.stride * (grad_output:size(3) - 1) + 1):zero()
      self.interlace_cache:narrow(
         3, 1, self.interlace_cache:size(3) - 1):unfold(
         3, self.stride, self.stride):select(4, 1):copy(
         grad_output:narrow(3, 1, grad_output:size(3) - 1))
      self.interlace_cache:select(3, self.interlace_cache:size(3)):copy(
         grad_output:select(3, grad_output:size(3)))
   end
   
   -- Resize and initialize the padded cache
   if input:dim() == 2 then
      self.pad_cache:resize(
         grad_output:size(1), input:size(2) + self.kernel - 1)
      local length = math.min(
         self.pad_cache:size(2), self.interlace_cache:size(2))
      self.pad_cache:zero():narrow(
         2, (self.pad_cache:size(2) - length) / 2 + 1, length):copy(
         self.interlace_cache:narrow(
            2, (self.interlace_cache:size(2) - length) / 2 + 1, length))
   else
      self.pad_cache:resize(
         grad_output:size(1), grad_output:size(2),
         input:size(3) + self.kernel - 1)
      local length = math.min(
         self.pad_cache:size(3), self.interlace_cache:size(3))
      self.pad_cache:zero():narrow(
         3, (self.pad_cache:size(3) - length) / 2 + 1, length):copy(
         self.interlace_cache:narrow(
            3, (self.interlace_cache:size(3) - length) / 2 + 1, length))
   end
   
   -- Unfold the output cache
   local unfolded = self.pad_cache:unfold(
      self.pad_cache:dim(), self.kernel, 1):transpose(
      self.pad_cache:dim(), self.pad_cache:dim() + 1)
   self.unfold_cache:resizeAs(unfolded):copy(unfolded)
   
   -- Do matrix multiplication
   self.gradInput:resizeAs(input):zero()
   if input:dim() == 2 then
      self.gradInput:addmm(
         1, self.gradInput, 1,
         self.weight_cache:view(self.weight:size(2), -1),
         self.unfold_cache:view(-1, self.unfold_cache:size(3)))
   else
      local weight = self.weight_cache:view(
         1, self.weight:size(2),
         self.weight:size(1) * self.weight:size(3)):expand(
         unfolded:size(1), self.weight:size(2),
         self.weight:size(1) * self.weight:size(3))
      self.gradInput:baddbmm(
         1, self.gradInput, 1, weight,
         self.unfold_cache:view(
            self.unfold_cache:size(1), -1, self.unfold_cache:size(4)))
   end
   
   return self.gradInput
end

function TemporalConvolutionMM:accGradParameters(input, grad_output, scale)
   local scale = scale or 1
   
   -- Create temporary input cache that is to be unfolded
   if input:dim() == 2 then
      self.pad_cache:resize(
         input:size(1), input:size(2) + 2 * self.pad):zero():narrow(
         2, self.pad + 1, input:size(2)):copy(input)
   else
      self.pad_cache:resize(
         input:size(1), input:size(2),
         input:size(3) + 2 * self.pad):zero():narrow(
         3, self.pad + 1, input:size(3)):copy(input)
   end

   -- Unfold the input cache
   local unfolded = self.pad_cache:unfold(
      self.pad_cache:dim(), self.kernel, self.stride):transpose(
      self.pad_cache:dim() - 1, self.pad_cache:dim())
   self.unfold_cache:resizeAs(unfolded):copy(unfolded)

   -- Do matrix multiplication
   local grad_weight = self.gradWeight:view(self.weight:size(1), -1)
   if input:dim() == 2 then
      grad_weight:addmm(
         1, grad_weight, scale, grad_output,
         self.unfold_cache:view(unfolded:size(1), -1))
      self.gradBias:add(scale, grad_output:sum(2))
   else
      if grad_weight.addbmm then
         grad_weight:addbmm(
            1, grad_weight, scale, grad_output,
            self.unfold_cache:view(
               self.unfold_cache:size(1), self.unfold_cache:size(2), -1))
      else
         for i = 1, grad_output:size(1) do
            grad_weight:addmm(
               1, grad_weight, scale, grad_output:select(1, i),
               self.unfold_cache:select(1, i):view(
                  self.unfold_cache:size(2), -1))
         end
      end
      self.gradBias:add(scale, grad_output:sum(3):sum(1))
   end
end

TemporalConvolutionMM.sharedAccUpdateGradParameters =
   TemporalConvolutionMM.accUpdateGradParameters

function TemporalConvolutionMM:__tostring__()
   return string.format(
      '%s(%d -> %d, %d, %d, %d)', torch.type(self), self.input_feature,
      self.output_feature, self.kernel, self.stride, self.pad)
end


================================================
FILE: glyphnet/modules/TemporalMaxPoolingCudnn.lua
================================================
--[[
Temporal max pooling module with data order consistent with MM
Copyright 2016 Xiang Zhang
--]]

local TemporalMaxPoolingCudnn, parent =
   torch.class('cudnn.TemporalMaxPoolingCudnn', 'cudnn.SpatialMaxPooling')

function TemporalMaxPoolingCudnn:__init(kW, dW, padW)
   parent.__init(self, kW, 1, dW, 1, padW, 0)
end

function TemporalMaxPoolingCudnn:updateOutput(input)
   local input_view

   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
   end

   local output = parent.updateOutput(self, input_view)

   if self.output:dim() ~= input:dim() then
      if input:dim() == 2 then
         self.output = output:view(output:size(1), output:size(3))
      else
         self.output = output:view(output:size(1), output:size(2), output:size(4))
      end
   end

   return self.output
end

function TemporalMaxPoolingCudnn:updateGradInput(input, grad_output)
   local input_view
   local grad_output_view
   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
      grad_output_view = grad_output:view(
         grad_output:size(1), 1, grad_output:size(2))
      self.output = self.output:view(
         self.output:size(1), 1, self.output:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
      grad_output_view = grad_output:view(
         grad_output:size(1), grad_output:size(2), 1, grad_output:size(3))
      self.output = self.output:view(
         self.output:size(1), self.output:size(2), 1, self.output:size(3))
   end

   local grad_input = parent.updateGradInput(self, input_view, grad_output_view)

   if self.gradInput:dim() ~= input:dim() then
      if input:dim() == 2 then
         self.output = self.output:view(
            self.utput:size(1), self.output:size(3))
         self.gradInput = grad_input:view(
            grad_input:size(1), grad_input:size(3))
      else
         self.output = self.output:view(
            self.output:size(1), self.output:size(2), self.output:size(4))
         self.gradInput = grad_input:view(
            grad_input:size(1), grad_input:size(2), grad_input:size(4))
      end
   end

   return self.gradInput
end

function TemporalMaxPoolingCudnn:__tostring__()
   return string.format('%s(%d, %d)', torch.type(self), self.kW, self.dW)
end


================================================
FILE: glyphnet/modules/TemporalMaxPoolingMM.lua
================================================
--[[
Temporal max pooling module with data order consistent with MM
Copyright 2016 Xiang Zhang
--]]

local TemporalMaxPoolingMM, parent =
   torch.class('nn.TemporalMaxPoolingMM', 'nn.SpatialMaxPooling')

function TemporalMaxPoolingMM:__init(kW, dW)
   parent.__init(self, kW, 1, dW, 1)
end

function TemporalMaxPoolingMM:updateOutput(input)
   local input_view

   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
   end

   local output = parent.updateOutput(self, input_view)

   if input:dim() == 2 then
      self.output = output:view(output:size(1), output:size(3))
   else
      self.output = output:view(output:size(1), output:size(2), output:size(4))
   end

   return self.output
end

function TemporalMaxPoolingMM:updateGradInput(input, grad_output)
   local input_view
   local grad_output_view
   if input:dim() == 2 then
      input_view = input:view(input:size(1), 1, input:size(2))
      grad_output_view = grad_output:view(
         grad_output:size(1), 1, grad_output:size(2))
      self.output = self.output:view(
         self.output:size(1), 1, self.output:size(2))
   else
      input_view = input:view(input:size(1), input:size(2), 1, input:size(3))
      grad_output_view = grad_output:view(
         grad_output:size(1), grad_output:size(2), 1, grad_output:size(3))
      self.output = self.output:view(
         self.output:size(1), self.output:size(2), 1, self.output:size(3))
   end

   local grad_input = parent.updateGradInput(self, input_view, grad_output_view)

   if input:dim() == 2 then
      self.output = self.output:view(
         self.utput:size(1), self.output:size(3))
      self.gradInput = grad_input:view(grad_input:size(1), grad_input:size(3))
   else
      self.output = self.output:view(
         self.output:size(1), self.output:size(2), self.output:size(4))
      self.gradInput = grad_input:view(
         grad_input:size(1), grad_input:size(2), grad_input:size(4))
   end

   return self.gradInput
end

function TemporalMaxPoolingMM:__tostring__()
   return string.format('%s(%d, %d)', torch.type(self), self.kW, self.dW)
end


================================================
FILE: glyphnet/modules.lua
================================================
--[[
Additional modules for GlyphNet
Copyright 2016 Xiang Zhang
--]]

local status, cudnn = pcall(require, 'cudnn')
local nn = require('nn')

-- nn.TemporalConvolutionMM
if not nn.TemporalConvolutionMM then
   dofile('modules/TemporalConvolutionMM.lua')
end

-- nn.TemporalMaxPoolingMM
if not nn.TemporalMaxPoolingMM then
   dofile('modules/TemporalMaxPoolingMM.lua')
end

-- cudnn.TemporalConvolutionCudnn
if status == true and not cudnn.TemporalMaxPoolingCudnn then
   dofile('modules/TemporalConvolutionCudnn.lua')
end

-- cudnn.TemporalMaxPoolingCudnn
if status == true and not cudnn.TemporalMaxPoolingCudnn then
   dofile('modules/TemporalMaxPoolingCudnn.lua')
end

return nn


================================================
FILE: glyphnet/scroll.lua
================================================
--[[
The schollable UI
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')

local Scroll = class()

-- Initialize a scroll interface
-- width: (optional) the pixel width of the scollable area. Default is 600.
-- title: (optional) title for the window
function Scroll:_init(width,title)
   require('qtuiloader')
   require('qtwidget')
   require('qttorch')

   self.file = 'scroll.ui'
   self.win = qtuiloader.load(self.file)
   self.frame = self.win.frame
   self.painter = qt.QtLuaPainter(self.frame)
   self.width = width or 600
   self.height = 0
   self.fontSize = 15
   self.x = 0
   self.y = 0
   self.border = 1

   self:resize(self.width, self.height)
   self:setFontSize(self.fontSize)
   if title then 
      self:setTitle(title)
   end
   self:show()
end

-- Resize the window to designated width and height
function Scroll:resize(width, height)
   self.width = width or self.width
   self.height = height or self.height

   self.frame.size = qt.QSize{width = self.width,height = self.height}
end

-- Set the text width
function Scroll:setFontSize(size)
   self.painter:setfontsize(size or 15)
   self.fontSize = size
end

-- Set border width
function Scroll:setBorder(width)
   self.border = width
end

-- Draw text
function Scroll:drawText(text)
   -- Drawing text must happen on a new line
   if self.x ~= 0 then
      self.x = 0
      self.y = self.height
   end

   -- Determine height and resize if necessary
   if self.height < self.y+self.fontSize+1 then
      self:resize(self.width,self.y+self.fontSize+1+self.border)
   end

   -- Draw the yellow main text
   self.painter:gbegin()
   self.painter:moveto(self.x,self.y+self.fontSize-1)
   self.painter:setcolor(1,1,0,1)
   self.painter:show(text)
   self.painter:stroke()
   self.painter:gend()

   -- Draw the black shadow text
   self.painter:gbegin()
   self.painter:moveto(self.x,self.y+self.fontSize+1-1)
   self.painter:setcolor(0,0,0,1)
   self.painter:show(text)
   self.painter:stroke()
   self.painter:gend()
   
   -- Move the cursor to next line
   self.x = 0
   if self.height < self.y+self.fontSize+1+self.border then
      self:resize(self.width,self.y+self.fontSize+1+self.border)
   end
   self.y = self.height
end

-- Draw image
function Scroll:drawImage(im, scale)
   -- Get the image height and width
   local scale = scale or 1
   local height, width
   if im:dim() == 2 then
      height = im:size(1) * scale
      width = im:size(2) * scale
   elseif im:dim() == 3 then
      height = im:size(2) * scale
      width = im:size(3) * scale
   else
      error("Image must be 2-dim or 3-dim data")
   end

   -- Determine whether a new line is needed
   if self.x ~= 0 and self.x + width > self.width then
      self.x = 0
      self.y = self.height
   end

   -- Determine whether need to resize the document area
   if self.y + height > self.height then
      self:resize(self.width, self.y + height + self.border)
   end

   -- Draw the image
   self.painter:gbegin()
   self.painter:image(self.x, self.y, width, height, qt.QImage.fromTensor(im))
   self.painter:stroke()
   self.painter:gend()

   -- Move the cursor
   self.x = self.x + width + self.border
end

-- Draw a new line
function Scroll:drawEndOfLine()
   self.x = 0
   self.y = self.height
end

-- Hint for heights
function Scroll:hintImageHeight(im, scale)
   -- Get the image height and width
   local scale = scale or 1
   local height, width
   if im:dim() == 2 then
      height = im:size(1) * scale
      width = im:size(2) * scale
   elseif im:dim() == 3 then
      height = im:size(2) * scale
      width = im:size(3) * scale
   else
      error("Image must be 2-dim or 3-dim data")
   end

   -- Determine whether a new line is needed
   if self.x ~= 0 and self.x + width > self.width then
      return self.height
   else
      return self.y
   end
end

-- Show the window
function Scroll:show()
   self.win:show()
end

-- Hide the window
function Scroll:hide()
   self.win:hide()
end

-- Save to file
function Scroll:save(file)
   self.painter:write(file)
end

-- Set window title
function Scroll:setTitle(title)
   self.win:setWindowTitle(title)
end

-- Reset the drawing area
function Scroll:clear()
   self:resize(self.width,0)
   self.x = 0
   self.y = 0
end

return Scroll


================================================
FILE: glyphnet/scroll.ui
================================================
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
 <class>window</class>
 <widget class="QWidget" name="window">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>640</width>
    <height>480</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Scrollable Window</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0">
    <widget class="QScrollArea" name="scrollArea">
     <property name="sizePolicy">
      <sizepolicy hsizetype="Expanding" vsizetype="Expanding">
       <horstretch>0</horstretch>
       <verstretch>0</verstretch>
      </sizepolicy>
     </property>
     <property name="midLineWidth">
      <number>0</number>
     </property>
     <property name="widgetResizable">
      <bool>false</bool>
     </property>
     <widget class="QWidget" name="frame">
      <property name="geometry">
       <rect>
        <x>0</x>
        <y>0</y>
        <width>600</width>
        <height>440</height>
       </rect>
      </property>
      <property name="sizePolicy">
       <sizepolicy hsizetype="Preferred" vsizetype="Preferred">
        <horstretch>0</horstretch>
        <verstretch>0</verstretch>
       </sizepolicy>
      </property>
     </widget>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
 <connections/>
</ui>


================================================
FILE: glyphnet/test.lua
================================================
--[[
Tester for GlyphNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local torch = require('torch')
local sys = require('sys')

local Test = class()

-- Constructor for Test
-- data: the data object
-- model: the model object
-- loss: the loss object
-- config: configuration table
function Test:_init(data, model, loss, config)
   self.data = data
   self.model = model
   self.loss = loss

   self.time = {}
end

-- Run for all the data
-- callback: (optional) a callback function to execute after each step
function Test:run(callback)
   self.total_error = 0
   self.total_objective = 0
   self.total_count = 0
   self.clock = sys.clock()
   for input, label, count in self.data:iterator() do
      self:runStep(input, label, count)
      if callback then callback(self) end
      self.clock = sys.clock()
   end
end

-- Run for one minibatch step
function Test:runStep(input, label, count)
   -- Get a batch of data
   self.input_untyped, self.label_untyped = input, label
   self.input = self.input or self.input_untyped:type(self.model:type())
   self.input:copy(self.input_untyped)
   self.label = self.label or self.label_untyped:type(self.model:type())
   self.label:copy(self.label_untyped)
   self.count = count
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.data = sys.clock() - self.clock

   -- Forward propagation
   self.clock = sys.clock()
   self.output = self.model:forward(self.input)
   self.objective = self.loss:forward(self.output, self.label)
   if type(self.objective) ~= 'number' then self.objective = self.objectve[1] end
   self.max, self.decision = self.output:type(
      torch.getdefaulttensortype()):max(2)
   self.max = self.max:squeeze()
   self.decision = self.decision:squeeze():narrow(1, 1, count):type(
      torch.getdefaulttensortype())
   self.error = torch.ne(
      self.decision, self.label_untyped:narrow(1, 1, count)):type(
      torch.getdefaulttensortype()):sum() / count
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.forward = sys.clock() - self.clock

   -- Update the results
   self.clock = sys.clock()
   self.total_objective =
      (self.total_objective * self.total_count + self.objective * count) /
      (self.total_count + count)
   self.total_error =
      (self.total_error * self.total_count + self.error * count) /
      (self.total_count + count)
   self.total_count = self.total_count + count
   self.time.update = sys.clock() - self.clock
end

return Test


================================================
FILE: glyphnet/train.lua
================================================
--[[
Trainer for GlyphNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local torch = require('torch')
local sys = require('sys')

local Train = class()

-- Constructor for Train
-- data: the data object
-- model: the model object
-- loss: the loss object
-- config: configuration table
function Train:_init(data, model, loss, config)
   self.data = data
   self.model = model
   self.loss = loss

   self.rates = config.rates or {1e-3}
   self.step = config.step or 0
   self.momentum = config.momentum or 0
   self.decay = config.decay or 0
   self.recapture = config.recapture

   self.params, self.grads = self.model:getParameters()
   if config.state then
      self.state = config.state:type(self.model:type())
   else
      self.state = self.grads:clone():zero()
   end

   -- Find current learning rate
   local max_step = 1
   self.rate = self.rates[1]
   for step, rate in pairs(self.rates) do
      if step <= self.step and step > max_step then
         max_step = step
         self.rate = rate
      end
   end

   self.time = {}
end

-- Run for a number of steps
-- steps: number of steps
-- callback: (optional) a callback function to execute after each step
function Train:run(steps, callback)
   if self.recapture then
      self.params, self.grads = self.model:getParameters()
   end

   for i = 1, steps do
      self.step = self.step + 1
      self:runStep()
      if callback then callback(self, i) end
   end
end

-- Run for one minibatch step
function Train:runStep()
   -- Get a batch of data/
   self.clock = sys.clock()
   self.input_untyped, self.label_untyped = self.data:getBatch(
      self.input_untyped, self.label_untyped)
   self.input = self.input or self.input_untyped:type(self.model:type())
   self.input:copy(self.input_untyped)
   self.label = self.label or self.label_untyped:type(self.model:type())
   self.label:copy(self.label_untyped)
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.data = sys.clock() - self.clock

   -- Forward propagation
   self.clock = sys.clock()
   self.output = self.model:forward(self.input)
   self.objective = self.loss:forward(self.output, self.label)
   if type(self.objective) ~= 'number' then self.objective = self.objectve[1] end
   self.max, self.decision = self.output:type(
      torch.getdefaulttensortype()):max(2)
   self.max = self.max:squeeze()
   self.decision = self.decision:squeeze():type(torch.getdefaulttensortype())
   self.error = torch.ne(self.decision, self.label_untyped):type(
      torch.getdefaulttensortype()):sum() / self.label:size(1)
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.forward = sys.clock() - self.clock

   -- Backward propagation
   self.clock = sys.clock()
   self.grads:zero()
   self.grad_output = self.loss:backward(self.output, self.label)
   self.grad_input = self.model:backward(self.input, self.grad_output)
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.backward = sys.clock() - self.clock

   -- Update the step
   self.clock = sys.clock()
   self:sgd()
   if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end
   self.time.update = sys.clock() - self.clock
end

function Train:sgd()
   self.rate = self.rates[self.step] or self.rate
   if self.momentum and self.momentum > 0 then
      self.state:mul(self.momentum):add(self.grads:mul(-self.rate))
      self.params:mul(1 - self.rate * self.decay):add(self.state)
   else
      self.params:mul(1 - self.rate * self.decay):add(
         self.grads:mul(-self.rate))
   end
end

return Train


================================================
FILE: glyphnet/unittest/data.lua
================================================
--[[
Unit test for GlyphNet data program
Copyright 2015-2016 Xiang Zhang
--]]

local Data = require('data')

local image = require('image')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe.init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe.init()
   local config = {}
   config.file = 'data/dianping/test_code.t7b'
   config.unifont = 'unifont/unifont-8.0.01.t7b'
   config.length = 512
   config.batch = 16

   joe.config = config
   joe.data = Data(config)
end

function joe.getBatchTest()
   local data = joe.data
   local sample, label = data:getBatch()

   print('Size of sample: ')
   print(sample:size())
   print('Size of label: ')
   print(label:size())

   io.write('Labels:')
   for i = 1, label:size(1) do
      io.write(' ', label[i])
   end
   io.write('\n')

   image.display{image = sample[1]:narrow(1, 1, 100),
                 nrow = 10, zoom = 4}

   joe.sample = sample
   joe.label = label
end

function joe.iteratorTest()
   local data = joe.data

   local window
   local total = 0
   for sample, label, count in data:iterator() do
      total = total + count
      io.write(total, ',', count, ':')
      for i = 1, count do
         window = image.display{
            image = sample[1][1], nrow = 10, zoom = 4, win = window}
         io.write(' ', label[i])
      end
      io.write('\n')
      io.flush()
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/driver.lua
================================================
--[[
Unit test for GlyphNet driver component
Copyright 2016 Xiang Zhang
--]]

local Driver = require('driver')

--  A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Creating driver')
   config.train_data.file = 'data/dianping/unittest_code.t7b'
   config.test_data.file = 'data/dianping/unittest_code.t7b'
   config.driver.debug = true
   config.driver.device = 3
   config.driver.steps = 10
   config.driver.epoches = 30
   config.driver.schedule = 4
   config.driver.variation = 'small'
   config.driver.location = '/tmp'
   local driver = Driver(config, config.driver)

   self.config = config
   self.driver = driver
end

function joe:driverTest()
   local driver = self.driver
   print('Training schedule')
   for i, v in pairs(driver.options.train.rates) do
      print(i, v)
   end
   print('Testing driver')
   driver:run()
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/model.lua
================================================
--[[
Unit test for GlyphNet model component
Copyright 2015-2016 Xiang Zhang
--]]

local Model = require('model')

local os = require('os')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   local model = Model(config.model)

   local parameters, gradients = model:getParameters()
   print('Parameter pointers: '..torch.pointer(parameters:storage())..' '..
            torch.pointer(gradients:storage()))
   print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement())

   self.config = config
   self.model = model
   self.parameters = parameters
   self.gradients = gradients

   self:printModel()
end

function joe:printModel(model)
   local model = model or self.model
   print('Created spatial model: ')
   print(model.spatial)
   print('Created temporal model: ')
   print(model.temporal)
   print('Spatial group pointers:')
   print(0, torch.pointer(model.spatial.modules[1].weight:storage()),
         torch.pointer(model.spatial.modules[1].gradWeight:storage()))
   for i, m in ipairs(model.group) do
      print(i, torch.pointer(m.modules[1].weight:storage()),
            torch.pointer(m.modules[1].gradWeight:storage()))
   end
end

function joe:forwardBackwardTest()
   local model = self.model

   print('Initializing input')
   local input = torch.rand(4, 512, 16, 16)
   print('Input size:')
   print(input:size())

   print('Running forward propagation')
   local output = model:forward(input)
   print('Feature size:')
   print(model.feature:size())
   print('Output size:')
   print(output:size())

   print('Initializing output gradients')
   local grad_output = torch.rand(output:size())
   print('Running backward propagation')
   local grad_input = model:backward(input, grad_output)
   print('Feature gradient size:')
   print(model.grad_feature:size())

   self.input = input
   self.grad_input = grad_input
   self.output = output
   self.grad_output = grad_output
end

function joe:saveTest()
   local model = self.model
   local file = '/tmp/model.t7b'
   print('Saving to '..file)
   model:save(file)
   print('Model saved')

   local config = {}
   config.file = file
   config.cudnn = joe.config.model.cudnn
   config.group = joe.config.model.group
   print('Loading from '..file)
   model = Model(config)

   self:printModel(model)
end

function joe:modeTest()
   local model = self.model
   print('Setting to testing mode')
   model:setModeTest()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end

   print('Setting to training mode')
   model:setModeTrain()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/model_cuda.lua
================================================
--[[
Unit test for GlyphNet model component
Copyright 2015-2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local os = require('os')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.cudnn = nil

   print('Changing device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)

   local model = Model(config.model)
   model:cuda()

   local parameters, gradients = model:getParameters()
   print('Parameter pointers: '..torch.pointer(parameters:storage())..' '..
            torch.pointer(gradients:storage()))
   print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement())

   self.config = config
   self.model = model
   self.parameters = parameters
   self.gradients = gradients

   self:printModel()
end

function joe:printModel(model)
   local model = model or self.model
   print('Type of model: '..model:type())
   print('Created spatial model: ')
   print(model.spatial)
   print('Created temporal model: ')
   print(model.temporal)
   print('Spatial group pointers:')
   print(0, torch.pointer(model.spatial.modules[1].weight:storage()),
         torch.pointer(model.spatial.modules[1].gradWeight:storage()))
   for i, m in ipairs(model.group) do
      print(i, torch.pointer(m.modules[1].weight:storage()),
            torch.pointer(m.modules[1].gradWeight:storage()))
   end
end

function joe:forwardBackwardTest()
   local model = self.model

   print('Initializing input')
   local input = torch.rand(16, 512, 16, 16):type(model:type())
   print('Input size:')
   print(input:size())

   print('Running forward propagation')
   cutorch.synchronize()
   sys.tic()
   local output = model:forward(input)
   cutorch.synchronize()
   sys.toc(true)
   print('Feature size:')
   print(model.feature:size())
   print('Output size:')
   print(output:size())

   print('Initializing output gradients')
   local grad_output = torch.rand(output:size()):type(model:type())
   print('Running backward propagation')
   cutorch.synchronize()
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   cutorch.synchronize()
   sys.toc(true)
   print('Feature gradient size:')
   print(model.grad_feature:size())

   self.input = input
   self.grad_input = grad_input
   self.output = output
   self.grad_output = grad_output
end

function joe:saveTest()
   local model = self.model
   local file = '/tmp/model.t7b'
   print('Saving to '..file)
   model:save(file)
   print('Model saved')

   local config = {}
   config.file = file
   config.cudnn = joe.config.model.cudnn
   config.group = joe.config.model.group
   print('Loading from '..file)
   model = Model(config)

   self:printModel(model)
end

function joe:modeTest()
   local model = self.model
   print('Setting to testing mode')
   model:setModeTest()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end

   print('Setting to training mode')
   model:setModeTrain()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/model_cudnn.lua
================================================
--[[
Unit test for GlyphNet model component
Copyright 2015-2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local os = require('os')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.cudnn = true

   print('Changing device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)

   local model = Model(config.model)
   model:cuda()

   local parameters, gradients = model:getParameters()
   print('Parameter pointers: '..torch.pointer(parameters:storage())..' '..
            torch.pointer(gradients:storage()))
   print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement())

   self.config = config
   self.model = model
   self.parameters = parameters
   self.gradients = gradients

   self:printModel()
end

function joe:printModel(model)
   local model = model or self.model
   print('Type of model: '..model:type())
   print('Created spatial model: ')
   print(model.spatial)
   print('Created temporal model: ')
   print(model.temporal)
   print('Spatial group pointers:')
   print(0, torch.pointer(model.spatial.modules[1].weight:storage()),
         torch.pointer(model.spatial.modules[1].gradWeight:storage()))
   for i, m in ipairs(model.group) do
      print(i, torch.pointer(m.modules[1].weight:storage()),
            torch.pointer(m.modules[1].gradWeight:storage()))
   end
end

function joe:forwardBackwardTest()
   local model = self.model

   print('Initializing input')
   local input = torch.rand(16, 512, 16, 16):type(model:type())
   print('Input size:')
   print(input:size())

   print('Running forward propagation')
   cutorch.synchronize()
   sys.tic()
   local output = model:forward(input)
   cutorch.synchronize()
   sys.toc(true)
   print('Feature size:')
   print(model.feature:size())
   print('Output size:')
   print(output:size())

   print('Initializing output gradients')
   local grad_output = torch.rand(output:size()):type(model:type())
   print('Running backward propagation')
   cutorch.synchronize()
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   cutorch.synchronize()
   sys.toc(true)
   print('Feature gradient size:')
   print(model.grad_feature:size())

   self.input = input
   self.grad_input = grad_input
   self.output = output
   self.grad_output = grad_output
end

function joe:saveTest()
   local model = self.model
   local file = '/tmp/model.t7b'
   print('Saving to '..file)
   model:save(file)
   print('Model saved')

   local config = {}
   config.file = file
   config.cudnn = joe.config.model.cudnn
   config.group = joe.config.model.group
   print('Loading from '..file)
   model = Model(config)

   self:printModel(model)
end

function joe:modeTest()
   local model = self.model
   print('Setting to testing mode')
   model:setModeTest()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end

   print('Setting to training mode')
   model:setModeTrain()
   print('Temporal mode:')
   for i, m in ipairs(model.temporal.modules) do
      print(i, torch.type(m), m.train)
   end
   print('Spatial mode:')
   for i, m in ipairs(model.spatial.modules) do
      print(i, torch.type(m), m.train)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/modules_temporal.lua
================================================
--[[
Unit test for modules
Copyright 2016 Xiang Zhang
--]]

local nn = require('modules')

local cunn = require('cunn')
local cutorch = require('cutorch')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe.init(joe)
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local device = 1
   cutorch.setDevice(device)
   print('Device set to '..device)
   self.jacobian = nn.Jacobian
end

function joe:noBatchCPU(kernel, stride, pad)
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = nn.TemporalConvolutionMM(
      input_feature, output_feature, kernel, stride, pad)
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()
   
   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(input_feature, input_length)
   print('Input size:')
   print(input:size())
   
   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())
   
   local input_pad = torch.zeros(input_feature, input_length + 2 * pad)
   input_pad:narrow(2, pad + 1, input_length):copy(input)
   for i = 1, output:size(2) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
	 2, input_begin, kernel):contiguous():view(
	 1, input_feature, kernel):expand(output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
	 temporal.weight, input_chunk):sum(3):sum(2):squeeze()
      output_slice:add(1, temporal.bias:viewAs(output_slice))
      print('Error of output slice '..i..': '..
	       output_slice:add(-1, output:select(2, i)):abs():mean())
   end
   
   local grad_output = torch.rand(output:size())
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())
   
   local grad_output_pad = torch.Tensor(
      output_feature, input_length + kernel - 1):zero()
   local interlace_length = stride * (grad_output:size(2) - 1) + 1
   local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2
   for i = 1, grad_output:size(2) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
	 and grad_output_pad_begin <= grad_output_pad:size(2) then
	 grad_output_pad:select(2, grad_output_pad_begin):copy(
	    grad_output:select(2, i))
      end
   end
   local weight_reverse = torch.Tensor(temporal.weight:size())
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, temporal.weight)
   for i = 1, grad_input:size(2) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
	 2, grad_output_pad_begin, kernel):contiguous():view(
	 output_feature, 1, kernel):expand(
	 output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
	 weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze()
      print('Error of input gradient slice '..i..': '..
	       grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean())
   end
   
   local input_unfold = input_pad:unfold(2, kernel, stride)
   for i = 1, temporal.weight:size(3) do
      local grad_weight_slice = torch.mm(
	 grad_output, input_unfold:select(3, i):transpose(1, 2))
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
		  -1, temporal.gradWeight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(2)
   print('Error of bias gradient: '..grad_bias:add(
	       -1, temporal.gradBias):abs():mean())
   
   local jacobian = self.jacobian
   local err = jacobian.testJacobian(temporal, input)
   print('Error of jacobian test: '..err)
   local err = jacobian.testJacobianParameters(
      temporal, input, temporal.weight, temporal.gradWeight)
   print('Error of jacobian test for weight: '..err)
   local err = jacobian.testJacobianParameters(
      temporal, input, temporal.bias, temporal.gradBias)
   print('Error of jacobian test for bias: '..err)
   local err = jacobian.testJacobianUpdateParameters(
      temporal, input, temporal.weight)
   print('Error of jacobian test for weight update: '..err)
   local err = jacobian.testJacobianUpdateParameters(
      temporal, input, temporal.bias)
   print('Error of jacobian test for bias update: '..err)
   for t,err in pairs(
      jacobian.testAllUpdate(temporal, input, 'weight', 'gradWeight')) do
      print('Error of jacobian test for '..t..' all update: '..err)
   end
   for t,err in pairs(
      jacobian.testAllUpdate(temporal, input, 'bias', 'gradBias')) do
      print('Error of jacobian test for '..t..' all update: '..err)
   end
end

function joe:noBatchCPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:noBatchCPU(kernel, stride, pad)
	 end
      end
   end
end

function joe:batchCPU(kernel, stride, pad)
   local batch = 4
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = nn.TemporalConvolutionMM(
      input_feature, output_feature, kernel, stride, pad)
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()
   
   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(batch, input_feature, input_length)
   print('Input size:')
   print(input:size())
   
   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())

   local input_pad = torch.zeros(batch, input_feature, input_length + 2 * pad)
   input_pad:narrow(3, pad + 1, input_length):copy(input)
   local weight = temporal.weight:view(
      1, output_feature, input_feature, kernel):expand(
      batch, output_feature, input_feature, kernel)
   for i = 1, output:size(3) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
	 3, input_begin, kernel):contiguous():view(
	 batch, 1, input_feature, kernel):expand(
	 batch, output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
	 weight, input_chunk):sum(4):sum(3):squeeze()
      output_slice:add(
	 1, temporal.bias:view(1, output_feature):expandAs(output_slice))
      print('Error of output slice '..i..': '..
	       output_slice:add(-1, output:select(3, i)):abs():mean())
   end

   local grad_output = torch.rand(output:size())
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())

   local grad_output_pad = torch.Tensor(
      batch, output_feature, input_length + kernel - 1):zero()
   local interlace_length = stride * (grad_output:size(3) - 1) + 1
   local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2
   for i = 1, grad_output:size(3) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
	 and grad_output_pad_begin <= grad_output_pad:size(3) then
	 grad_output_pad:select(3, grad_output_pad_begin):copy(
	    grad_output:select(3, i))
      end
   end
   local weight_reverse = torch.Tensor(temporal.weight:size())
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, temporal.weight)
   for i = 1, grad_input:size(3) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
	 3, grad_output_pad_begin, kernel):contiguous():view(
	 batch, output_feature, 1, kernel):expand(
	 batch, output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
	 weight_reverse:view(1, output_feature, input_feature, kernel):expand(
	    batch, output_feature, input_feature, kernel),
	 grad_output_pad_chunk):sum(4):sum(2):squeeze()
      print('Error of input gradient slice '..i..': '..
	       grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean())
   end

   local input_unfold = input_pad:unfold(3, kernel, stride)
   for i = 1, temporal.weight:size(3) do
      local grad_weight_slice = torch.bmm(
	 grad_output, input_unfold:select(4, i):transpose(2, 3)):sum(
	 1):squeeze()
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
		  -1, temporal.gradWeight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(3):sum(1)
   print('Error of bias gradient: '..grad_bias:add(
	       -1, temporal.gradBias):abs():mean())

   local jacobian = self.jacobian
   local err = jacobian.testJacobian(temporal, input)
   print('Error of jacobian test: '..err)
   local err = jacobian.testJacobianParameters(
      temporal, input, temporal.weight, temporal.gradWeight)
   print('Error of jacobian test for weight: '..err)
   local err = jacobian.testJacobianParameters(
      temporal, input, temporal.bias, temporal.gradBias)
   print('Error of jacobian test for bias: '..err)
   local err = jacobian.testJacobianUpdateParameters(
      temporal, input, temporal.weight)
   print('Error of jacobian test for weight update: '..err)
   local err = jacobian.testJacobianUpdateParameters(
      temporal, input, temporal.bias)
   print('Error of jacobian test for bias update: '..err)
   for t,err in pairs(
      jacobian.testAllUpdate(temporal, input, 'weight', 'gradWeight')) do
      print('Error of jacobian test for '..t..' all update: '..err)
   end
   for t,err in pairs(
      jacobian.testAllUpdate(temporal, input, 'bias', 'gradBias')) do
      print('Error of jacobian test for '..t..' all update: '..err)
   end
end

function joe:batchCPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:batchCPU(kernel, stride, pad)
	 end
      end
   end
end

function joe:noBatchGPU(kernel, stride, pad)
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = nn.TemporalConvolutionMM(
      input_feature, output_feature, kernel, stride, pad):cuda()
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()
   
   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(input_feature, input_length):cuda()
   print('Input size:')
   print(input:size())
   
   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())
   
   local input_pad = torch.zeros(input_feature, input_length + 2 * pad):cuda()
   input_pad:narrow(2, pad + 1, input_length):copy(input)
   for i = 1, output:size(2) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
         2, input_begin, kernel):contiguous():view(
         1, input_feature, kernel):expand(output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
         temporal.weight, input_chunk):sum(3):sum(2):squeeze()
      output_slice:add(1, temporal.bias:viewAs(output_slice))
      print('Error of output slice '..i..': '..
               output_slice:add(-1, output:select(2, i)):abs():mean())
   end
   
   local grad_output = torch.rand(output:size()):cuda()
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())
   
   local grad_output_pad = torch.Tensor(
      output_feature, input_length + kernel - 1):zero():cuda()
   local interlace_length = stride * (grad_output:size(2) - 1) + 1
   local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2
   for i = 1, grad_output:size(2) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
         and grad_output_pad_begin <= grad_output_pad:size(2) then
            grad_output_pad:select(2, grad_output_pad_begin):copy(
               grad_output:select(2, i))
      end
   end
   local weight_reverse = torch.Tensor(temporal.weight:size()):cuda()
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, temporal.weight)
   for i = 1, grad_input:size(2) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
         2, grad_output_pad_begin, kernel):contiguous():view(
         output_feature, 1, kernel):expand(
         output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
         weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze()
      print('Error of input gradient slice '..i..': '..
               grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean())
   end
   
   local input_unfold = input_pad:unfold(2, kernel, stride)
   for i = 1, temporal.weight:size(3) do
      local grad_weight_slice = torch.mm(
         grad_output, input_unfold:select(3, i):transpose(1, 2))
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
                  -1, temporal.gradWeight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(2)
   print('Error of bias gradient: '..grad_bias:add(
               -1, temporal.gradBias):abs():mean())
end

function joe:noBatchGPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:noBatchGPU(kernel, stride, pad)
	 end
      end
   end
end

function joe:batchGPU(kernel, stride, pad)
   local batch = 4
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = nn.TemporalConvolutionMM(
      input_feature, output_feature, kernel, stride, pad):cuda()
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()
   
   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(batch, input_feature, input_length):cuda()
   print('Input size:')
   print(input:size())
   
   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())

   local input_pad = torch.zeros(
      batch, input_feature, input_length + 2 * pad):cuda()
   input_pad:narrow(3, pad + 1, input_length):copy(input)
   local weight = temporal.weight:view(
      1, output_feature, input_feature, kernel):expand(
      batch, output_feature, input_feature, kernel)
   for i = 1, output:size(3) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
         3, input_begin, kernel):contiguous():view(
         batch, 1, input_feature, kernel):expand(
         batch, output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
         weight, input_chunk):sum(4):sum(3):squeeze()
      output_slice:add(
         1, temporal.bias:view(1, output_feature):expandAs(output_slice))
      print('Error of output slice '..i..': '..
               output_slice:add(-1, output:select(3, i)):abs():mean())
   end

   local grad_output = torch.rand(output:size()):cuda()
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())

   local grad_output_pad = torch.Tensor(
      batch, output_feature, input_length + kernel - 1):zero():cuda()
   local interlace_length = stride * (grad_output:size(3) - 1) + 1
   local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2
   for i = 1, grad_output:size(3) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
         and grad_output_pad_begin <= grad_output_pad:size(3) then
            grad_output_pad:select(3, grad_output_pad_begin):copy(
               grad_output:select(3, i))
      end
   end
   local weight_reverse = torch.Tensor(temporal.weight:size()):cuda()
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, temporal.weight)
   for i = 1, grad_input:size(3) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
         3, grad_output_pad_begin, kernel):contiguous():view(
         batch, output_feature, 1, kernel):expand(
         batch, output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
         weight_reverse:view(1, output_feature, input_feature, kernel):expand(
            batch, output_feature, input_feature, kernel),
         grad_output_pad_chunk):sum(4):sum(2):squeeze()
      print('Error of input gradient slice '..i..': '..
               grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean())
   end

   local input_unfold = input_pad:unfold(3, kernel, stride)
   for i = 1, temporal.weight:size(3) do
      local grad_weight_slice = torch.bmm(
         grad_output, input_unfold:select(4, i):transpose(2, 3)):sum(
         1):squeeze()
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
                  -1, temporal.gradWeight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(3):sum(1)
   print('Error of bias gradient: '..grad_bias:add(
               -1, temporal.gradBias):abs():mean())
end

function joe:batchGPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:batchGPU(kernel, stride, pad)
	 end
      end
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/modules_temporal_cudnn.lua
================================================
--[[
Unit test for modules
Copyright 2016 Xiang Zhang
--]]

local nn = require('modules')

local cudnn = require('cudnn')
local cunn = require('cunn')
local cutorch = require('cutorch')
local torch = require('torch')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe.init(joe)
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local device = 2
   cutorch.setDevice(device)
   print('Device set to '..device)
   self.jacobian = nn.Jacobian
end

function joe:noBatchGPU(kernel, stride, pad)
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = cudnn.TemporalConvolutionCudnn(
      input_feature, output_feature, kernel, stride, pad):cuda()
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()

   local weight = temporal.weight:view(output_feature, input_feature, kernel)
   local grad_weight = temporal.gradWeight:view(
      output_feature, input_feature, kernel)

   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(input_feature, input_length):cuda()
   print('Input size:')
   print(input:size())

   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())
   
   local input_pad = torch.zeros(input_feature, input_length + 2 * pad):cuda()
   input_pad:narrow(2, pad + 1, input_length):copy(input)
   for i = 1, output:size(2) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
         2, input_begin, kernel):contiguous():view(
         1, input_feature, kernel):expand(output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
         weight, input_chunk):sum(3):sum(2):squeeze()
      output_slice:add(1, temporal.bias:viewAs(output_slice))
      print('Error of output slice '..i..': '..
               output_slice:add(-1, output:select(2, i)):abs():mean())
   end
   
   local grad_output = torch.rand(output:size()):cuda()
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())
   
   local grad_output_pad = torch.Tensor(
      output_feature, input_length + kernel - 1):zero():cuda()
   local interlace_length = stride * (grad_output:size(2) - 1) + 1
   local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2
   for i = 1, grad_output:size(2) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
         and grad_output_pad_begin <= grad_output_pad:size(2) then
            grad_output_pad:select(2, grad_output_pad_begin):copy(
               grad_output:select(2, i))
      end
   end
   local weight_reverse = torch.Tensor(weight:size()):cuda()
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, weight)
   for i = 1, grad_input:size(2) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
         2, grad_output_pad_begin, kernel):contiguous():view(
         output_feature, 1, kernel):expand(
         output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
         weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze()
      print('Error of input gradient slice '..i..': '..
               grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean())
   end
   
   local input_unfold = input_pad:unfold(2, kernel, stride)
   for i = 1, weight:size(3) do
      local grad_weight_slice = torch.mm(
         grad_output, input_unfold:select(3, i):transpose(1, 2))
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
                  -1, grad_weight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(2)
   print('Error of bias gradient: '..grad_bias:add(
               -1, temporal.gradBias):abs():mean())
end

function joe:noBatchGPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:noBatchGPU(kernel, stride, pad)
	 end
      end
   end
end

function joe:batchGPU(kernel, stride, pad)
   local batch = 4
   local input_feature = 2
   local output_feature = 4
   local kernel = kernel or 3
   local stride = stride or 1
   local pad = pad or 0
   local temporal = nn.TemporalConvolutionMM(
      input_feature, output_feature, kernel, stride, pad):cuda()
   print('Created module: '..tostring(temporal))
   temporal.gradWeight:zero()
   temporal.gradBias:zero()

   local temporal_weight = temporal.weight:view(input_feature, output_feature, kernel)
   local temporal_grad_weight = temporal.gradWeight:view(
      input_feature, output_feature, kernel)
   
   local output_length = 16
   local input_length = (output_length - 1) * stride + kernel - 2 * pad
   
   local input = torch.rand(batch, input_feature, input_length):cuda()
   print('Input size:')
   print(input:size())
   
   print('Executing forward propagation')
   local output = temporal:forward(input)
   print('Output size: ')
   print(output:size())

   local input_pad = torch.zeros(
      batch, input_feature, input_length + 2 * pad):cuda()
   input_pad:narrow(3, pad + 1, input_length):copy(input)
   local weight = temporal_weight:view(
      1, output_feature, input_feature, kernel):expand(
      batch, output_feature, input_feature, kernel)
   for i = 1, output:size(3) do
      local input_begin = (i - 1) * stride + 1
      local input_chunk = input_pad:narrow(
         3, input_begin, kernel):contiguous():view(
         batch, 1, input_feature, kernel):expand(
         batch, output_feature, input_feature, kernel)
      local output_slice = torch.cmul(
         weight, input_chunk):sum(4):sum(3):squeeze()
      output_slice:add(
         1, temporal.bias:view(1, output_feature):expandAs(output_slice))
      print('Error of output slice '..i..': '..
               output_slice:add(-1, output:select(3, i)):abs():mean())
   end

   local grad_output = torch.rand(output:size()):cuda()
   print('Executing backward propagation')
   local grad_input = temporal:backward(input, grad_output)
   print('Input gradient size: ')
   print(grad_input:size())

   local grad_output_pad = torch.Tensor(
      batch, output_feature, input_length + kernel - 1):zero():cuda()
   local interlace_length = stride * (grad_output:size(3) - 1) + 1
   local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2
   for i = 1, grad_output:size(3) do
      local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift
      if grad_output_pad_begin >= 1
         and grad_output_pad_begin <= grad_output_pad:size(3) then
            grad_output_pad:select(3, grad_output_pad_begin):copy(
               grad_output:select(3, i))
      end
   end
   local weight_reverse = torch.Tensor(temporal_weight:size()):cuda()
   local weight_index = torch.LongTensor(kernel)
   for i = 1, weight_index:size(1) do
      weight_index[i] = kernel - i + 1
   end
   weight_reverse:indexCopy(3, weight_index, temporal_weight)
   for i = 1, grad_input:size(3) do
      local grad_output_pad_begin = i
      local grad_output_pad_chunk = grad_output_pad:narrow(
         3, grad_output_pad_begin, kernel):contiguous():view(
         batch, output_feature, 1, kernel):expand(
         batch, output_feature, input_feature, kernel)
      local grad_input_slice = torch.cmul(
         weight_reverse:view(1, output_feature, input_feature, kernel):expand(
            batch, output_feature, input_feature, kernel),
         grad_output_pad_chunk):sum(4):sum(2):squeeze()
      print('Error of input gradient slice '..i..': '..
               grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean())
   end

   local input_unfold = input_pad:unfold(3, kernel, stride)
   for i = 1, temporal_weight:size(3) do
      local grad_weight_slice = torch.bmm(
         grad_output, input_unfold:select(4, i):transpose(2, 3)):sum(
         1):squeeze()
      print('Error of weight gradient slice '..i..': '..grad_weight_slice:add(
                  -1, temporal_grad_weight:select(3, i)):abs():mean())
   end
   
   local grad_bias = grad_output:sum(3):sum(1)
   print('Error of bias gradient: '..grad_bias:add(
               -1, temporal.gradBias):abs():mean())
end

function joe:batchGPUTest()
   for _, kernel in ipairs({3, 5}) do
      for _, stride in ipairs({1, 2, 3, 5}) do
	 for _, pad in ipairs({0, 1, 2, 3, 5}) do
	    self:batchGPU(kernel, stride, pad)
	 end
      end
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/test.lua
================================================
--[[
Unit test for GlyphNet test component
Copyright 2015-2016 Xiang Zhang
--]]

local Test = require('test')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   local data = Data(config.test_data)
   print('Create model')
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/test_cuda.lua
================================================
--[[
Unit test for GlyphNet test component
Copyright 2015-2016 Xiang Zhang
--]]

local Test = require('test')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   local data = Data(config.test_data)
   print('Create model')
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/train.lua
================================================
--[[
Unit test for GlyphNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   local data = Data(config.test_data)
   print('Create model')
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create trainer')
   config.train.rates[4] = 1e-5
   local train = Train(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 10 steps')
   train:run(10, callback)
end

function joe:callback()
   return function (train, i)
      print('stp: '..train.step..', rat: '..train.rate..
               ', obj: '..train.objective..', dat: '..train.time.data..
               ', fwd: '..train.time.forward..', bwd: '..train.time.backward..
               ', upd: '..train.time.update)
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/unittest/train_cuda.lua
================================================
--[[
Unit test for GlyphNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   local data = Data(config.test_data)
   print('Create model')
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create trainer')
   config.train.rates[79] = 1e-5
   config.train.rates[85] = config.train.rates[1]
   local train = Train(data, model, loss, config.train)

   print('pmn: '..train.params:mean()..', psd: '..train.params:std()..
            ', gmn: '..train.grads:mean()..', gsd: '..train.grads:std()..
            ', smn: '..train.state:mean()..', ssd: '..train.state:std())

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 100 steps')
   train:run(100, callback)
end

function joe:callback()
   return function (train, i)
      print('stp: '..train.step..', rat: '..train.rate..', err: '..train.error..
               ', obj: '..train.objective..', dat: '..train.time.data..
               ', fwd: '..train.time.forward..', bwd: '..train.time.backward..
               ', upd: '..train.time.update..', pmn: '..train.params:mean()..
               ', psd: '..train.params:std()..', gmn: '..train.grads:mean()..
               ', gsd: '..train.grads:std()..', smn: '..train.state:mean()..
               ', ssd: '..train.state:std())
   end
end

joe.main()
return joe


================================================
FILE: glyphnet/visualizer.lua
================================================
--[[
Visualization module for glyphnet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local torch = require('torch')

local Scroll = require('scroll')

local Visualizer = class()

-- Constructor
--  config: configuration table
--    .width: (optional) width of scrollable window
--    .scale: (optional) scale of visualizing weights
--    .title: (optional) title of the scrollable window
--    .height: (optional) maximum height of visualization for a module
function Visualizer:_init(config)
   local config = config or {}
   local config = config or {}
   self.width = config.width or 800
   self.scale = config.scale or 4
   self.title = config.title or "Visualizer"
   self.height = config.height or 64
   self.win = Scroll(self.width, self.title)
end

-- Save wrapper
function Visualizer:save(...)
   return self.win:save(...)
end

-- Visualize the weights of a sequential model
-- model: the sequential model
function Visualizer:drawSequential(model)
   self.win:clear()
   for i, m in ipairs(model.modules) do
      self.win:drawText(tostring(i)..": "..tostring(m))
      if self.drawModule[torch.type(m)] then
         self.drawModule[torch.type(m)](self, m)
      end
   end
end

-- Draw an image with height hints
function Visualizer:drawImage(im, y_zero, max, min)
   local win = self.win
   local y = win:hintImageHeight(im, self.scale)
   if y - y_zero > self.height then
      return false
   end
   local max = max or im:max()
   local min = min or im:min()
   local normalized = torch.Tensor(im:size()):copy(im):add(-min)
   if max - min > 0 then
      normalized:div(max - min)
   end
   win:drawImage(normalized, self.scale)
   return true
end

-- A table for reading modules
Visualizer.drawModule = {}
Visualizer.drawModule['nn.Linear'] = function (self, m)
   local weight = m.weight
   local y_zero = self.win.y

   for i = 1, m.weight:size(1) do
      local w = weight[i]:view(1, weight:size(2))
      if not self:drawImage(w, y_zero) then
         return
      end
   end

   self:drawImage(m.bias:view(1, m.bias:size(1)), y_zero)
end
Visualizer.drawModule['nn.SpatialConvolution'] = function (self, m)
   local weight = m.weight:view(m.nOutputPlane, m.nInputPlane, m.kH, m.kW)
   local height = m.kH
   local width = m.kW
   local y_zero = self.win.y
   local max = weight:max()
   local min = weight:min()
   
   if m.nInputPlane == 3 then
      for i = 1, m.nOutputPlane do
         local w = weight[i]
         if not self:drawImage(w, y_zero, max, min) then
            return
         end
      end
   else
      for i = 1, m.nOutputPlane do
         for j = 1, m.nInputPlane do
            local w = weight[i][j]
            if not self:drawImage(w, y_zero, max, min) then
               return
            end
         end
      end
   end

   self:drawImage(m.bias:view(1, m.nOutputPlane), y_zero)
end
Visualizer.drawModule['nn.SpatialConvolutionMM'] =
   Visualizer.drawModule['nn.SpatialConvolution']
Visualizer.drawModule['cudnn.SpatialConvolution'] =
   Visualizer.drawModule['nn.SpatialConvolution']
Visualizer.drawModule['nn.TemporalConvolutionMM'] = function (self, m)
   local weight = m.weight:view(m.output_feature, m.input_feature, m.kernel)
   local y_zero = self.win.y
   local max = weight:max()
   local min = weight:min()

   for i = 1, m.output_feature do
      local w = weight[i]:transpose(2, 1)
      if not self:drawImage(w, y_zero, max, min) then
         return
      end
   end
end
Visualizer.drawModule['cudnn.TemporalConvolutionCudnn'] = function (self, m)
   local weight = m.weight:view(m.nOutputPlane, m.nInputPlane, m.kW)
   local y_zero = self.win.y
   local max = weight:max()
   local min = weight:min()

   for i = 1, m.nOutputPlane do
      local w = weight[i]:transpose(2, 1)
      if not self:drawImage(w, y_zero, max, min) then
         return
      end
   end
end

return Visualizer


================================================
FILE: linearnet/archive/11stbinary_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/charbag -train_data_file data/11st/sentiment/binary_train_charbag.t7b -test_data_file data/11st/sentiment/binary_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/charbagtfidf -train_data_file data/11st/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/chargram -train_data_file data/11st/sentiment/binary_train_chargram.t7b -test_data_file data/11st/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stbinary_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/chargramtfidf -train_data_file data/11st/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stbinary_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordbag -train_data_file data/11st/sentiment/binary_train_wordbag.t7b -test_data_file data/11st/sentiment/binary_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordbagroman -train_data_file data/11st/sentiment/binary_train_rr_wordbag.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordbagtfidf -train_data_file data/11st/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordbagtfidfroman -train_data_file data/11st/sentiment/binary_train_rr_wordbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stbinary_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordgram -train_data_file data/11st/sentiment/binary_train_wordgram.t7b -test_data_file data/11st/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stbinary_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordgramroman -train_data_file data/11st/sentiment/binary_train_rr_wordgram.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stbinary_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordgramtfidf -train_data_file data/11st/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stbinary_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stbinary/wordgramtfidfroman -train_data_file data/11st/sentiment/binary_train_rr_wordgramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/charbag -train_data_file data/11st/sentiment/full_train_charbag.t7b -test_data_file data/11st/sentiment/full_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/charbagtfidf -train_data_file data/11st/sentiment/full_train_charbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/chargram -train_data_file data/11st/sentiment/full_train_chargram.t7b -test_data_file data/11st/sentiment/full_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/chargramtfidf -train_data_file data/11st/sentiment/full_train_chargramtfidf.t7b -test_data_file data/11st/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordbag -train_data_file data/11st/sentiment/full_train_wordbag.t7b -test_data_file data/11st/sentiment/full_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordbagroman -train_data_file data/11st/sentiment/full_train_rr_wordbag.t7b -test_data_file data/11st/sentiment/full_test_rr_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordbagtfidf -train_data_file data/11st/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordbagtfidfroman -train_data_file data/11st/sentiment/full_train_rr_wordbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_rr_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/11stfull_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordgram -train_data_file data/11st/sentiment/full_train_wordgram.t7b -test_data_file data/11st/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordgramroman -train_data_file data/11st/sentiment/full_train_rr_wordgram.t7b -test_data_file data/11st/sentiment/full_test_rr_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordgramtfidf -train_data_file data/11st/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/11st/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/11stfull_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/11stfull/wordgramtfidfroman -train_data_file data/11st/sentiment/full_train_rr_wordgramtfidf.t7b -test_data_file data/11st/sentiment/full_test_rr_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonbinary_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/charbag -train_data_file data/amazon/binary_train_charbag.t7b -test_data_file data/amazon/binary_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/amazonbinary_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/charbagtfidf -train_data_file data/amazon/binary_train_charbagtfidf.t7b -test_data_file data/amazon/binary_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/amazonbinary_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/chargram -train_data_file data/amazon/binary_train_chargram.t7b -test_data_file data/amazon/binary_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonbinary_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/chargramtfidf -train_data_file data/amazon/binary_train_chargramtfidf.t7b -test_data_file data/amazon/binary_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonbinary_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/wordbag -train_data_file data/amazon/binary_train_wordbag.t7b -test_data_file data/amazon/binary_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/amazonbinary_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/wordbagtfidf -train_data_file data/amazon/binary_train_wordbagtfidf.t7b -test_data_file data/amazon/binary_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/amazonbinary_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/wordgram -train_data_file data/amazon/binary_train_wordgram.t7b -test_data_file data/amazon/binary_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonbinary_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonbinary/wordgramtfidf -train_data_file data/amazon/binary_train_wordgramtfidf.t7b -test_data_file data/amazon/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonfull_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/charbag -train_data_file data/amazon/full_train_charbag.t7b -test_data_file data/amazon/full_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/amazonfull_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/charbagtfidf -train_data_file data/amazon/full_train_charbagtfidf.t7b -test_data_file data/amazon/full_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/amazonfull_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/chargram -train_data_file data/amazon/full_train_chargram.t7b -test_data_file data/amazon/full_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonfull_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/chargramtfidf -train_data_file data/amazon/full_train_chargramtfidf.t7b -test_data_file data/amazon/full_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonfull_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/wordbag -train_data_file data/amazon/full_train_wordbag.t7b -test_data_file data/amazon/full_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/amazonfull_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/wordbagtfidf -train_data_file data/amazon/full_train_wordbagtfidf.t7b -test_data_file data/amazon/full_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/amazonfull_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/wordgram -train_data_file data/amazon/full_train_wordgram.t7b -test_data_file data/amazon/full_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/amazonfull_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/amazonfull/wordgramtfidf -train_data_file data/amazon/full_train_wordgramtfidf.t7b -test_data_file data/amazon/full_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/charbag -train_data_file data/chinanews/topic/train_charbag.t7b -test_data_file data/chinanews/topic/test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/charbagtfidf -train_data_file data/chinanews/topic/train_charbagtfidf.t7b -test_data_file data/chinanews/topic/test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/chargram -train_data_file data/chinanews/topic/train_chargram.t7b -test_data_file data/chinanews/topic/test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/chargramtfidf -train_data_file data/chinanews/topic/train_chargramtfidf.t7b -test_data_file data/chinanews/topic/test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordbag -train_data_file data/chinanews/topic/train_wordbag.t7b -test_data_file data/chinanews/topic/test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordbagroman -train_data_file data/chinanews/topic/train_pinyin_wordbag.t7b -test_data_file data/chinanews/topic/test_pinyin_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordbagtfidf -train_data_file data/chinanews/topic/train_wordbagtfidf.t7b -test_data_file data/chinanews/topic/test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordbagtfidfroman -train_data_file data/chinanews/topic/train_pinyin_wordbagtfidf.t7b -test_data_file data/chinanews/topic/test_pinyin_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/chinanews_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordgram -train_data_file data/chinanews/topic/train_wordgram.t7b -test_data_file data/chinanews/topic/test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordgramroman -train_data_file data/chinanews/topic/train_pinyin_wordgram.t7b -test_data_file data/chinanews/topic/test_pinyin_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordgramtfidf -train_data_file data/chinanews/topic/train_wordgramtfidf.t7b -test_data_file data/chinanews/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/chinanews_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/chinanews/wordgramtfidfroman -train_data_file data/chinanews/topic/train_pinyin_wordgramtfidf.t7b -test_data_file data/chinanews/topic/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua "$@";


================================================
FILE: linearnet/archive/dianping_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/charbagtfidf -train_data_file data/dianping/train_charbagtfidf.t7b -test_data_file data/dianping/test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/dianping_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/chargram -train_data_file data/dianping/train_chargram.t7b -test_data_file data/dianping/test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/chargramtfidf -train_data_file data/dianping/train_chargramtfidf.t7b -test_data_file data/dianping/test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordbag -train_data_file data/dianping/train_wordbag.t7b -test_data_file data/dianping/test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/dianping_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordbagroman -train_data_file data/dianping/train_pinyin_wordbag.t7b -test_data_file data/dianping/test_pinyin_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/dianping_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordbagtfidf -train_data_file data/dianping/train_wordbagtfidf.t7b -test_data_file data/dianping/test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/dianping_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordbagtfidfroman -train_data_file data/dianping/train_pinyin_wordbagtfidf.t7b -test_data_file data/dianping/test_pinyin_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/dianping_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordgram -train_data_file data/dianping/train_wordgram.t7b -test_data_file data/dianping/test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordgramroman -train_data_file data/dianping/train_pinyin_wordgram.t7b -test_data_file data/dianping/test_pinyin_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordgramtfidf -train_data_file data/dianping/train_wordgramtfidf.t7b -test_data_file data/dianping/test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/dianping_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/dianping/wordgramtfidfroman -train_data_file data/dianping/train_pinyin_wordgramtfidf.t7b -test_data_file data/dianping/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/charbag -train_data_file data/ifeng/topic/train_charbag.t7b -test_data_file data/ifeng/topic/test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/charbagtfidf -train_data_file data/ifeng/topic/train_charbagtfidf.t7b -test_data_file data/ifeng/topic/test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/chargram -train_data_file data/ifeng/topic/train_chargram.t7b -test_data_file data/ifeng/topic/test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/chargramtfidf -train_data_file data/ifeng/topic/train_chargramtfidf.t7b -test_data_file data/ifeng/topic/test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordbag -train_data_file data/ifeng/topic/train_wordbag.t7b -test_data_file data/ifeng/topic/test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordbagroman -train_data_file data/ifeng/topic/train_pinyin_wordbag.t7b -test_data_file data/ifeng/topic/test_pinyin_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordbagtfidf -train_data_file data/ifeng/topic/train_wordbagtfidf.t7b -test_data_file data/ifeng/topic/test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordbagtfidfroman -train_data_file data/ifeng/topic/train_pinyin_wordbagtfidf.t7b -test_data_file data/ifeng/topic/test_pinyin_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/ifeng_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordgram -train_data_file data/ifeng/topic/train_wordgram.t7b -test_data_file data/ifeng/topic/test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordgramroman -train_data_file data/ifeng/topic/train_pinyin_wordgram.t7b -test_data_file data/ifeng/topic/test_pinyin_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordgramtfidf -train_data_file data/ifeng/topic/train_wordgramtfidf.t7b -test_data_file data/ifeng/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/ifeng_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/ifeng/wordgramtfidfroman -train_data_file data/ifeng/topic/train_pinyin_wordgramtfidf.t7b -test_data_file data/ifeng/topic/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/charbag -train_data_file data/jd/sentiment/binary_train_charbag.t7b -test_data_file data/jd/sentiment/binary_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/charbagtfidf -train_data_file data/jd/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/chargram -train_data_file data/jd/sentiment/binary_train_chargram.t7b -test_data_file data/jd/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/chargramtfidf -train_data_file data/jd/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordbag -train_data_file data/jd/sentiment/binary_train_wordbag.t7b -test_data_file data/jd/sentiment/binary_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordbagroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordbag.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordbagtfidf -train_data_file data/jd/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordbagtfidfroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdbinary_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordgram -train_data_file data/jd/sentiment/binary_train_wordgram.t7b -test_data_file data/jd/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordgramroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordgram.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordgramtfidf -train_data_file data/jd/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdbinary_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdbinary/wordgramtfidfroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordgramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/charbag -train_data_file data/jd/sentiment/full_train_charbag.t7b -test_data_file data/jd/sentiment/full_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/charbagtfidf -train_data_file data/jd/sentiment/full_train_charbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/chargram -train_data_file data/jd/sentiment/full_train_chargram.t7b -test_data_file data/jd/sentiment/full_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/chargramtfidf -train_data_file data/jd/sentiment/full_train_chargramtfidf.t7b -test_data_file data/jd/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordbag -train_data_file data/jd/sentiment/full_train_wordbag.t7b -test_data_file data/jd/sentiment/full_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordbagroman -train_data_file data/jd/sentiment/full_train_pinyin_wordbag.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordbagtfidf -train_data_file data/jd/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordbagtfidfroman -train_data_file data/jd/sentiment/full_train_pinyin_wordbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jdfull_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordgram -train_data_file data/jd/sentiment/full_train_wordgram.t7b -test_data_file data/jd/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordgramroman -train_data_file data/jd/sentiment/full_train_pinyin_wordgram.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordgramtfidf -train_data_file data/jd/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/jd/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jdfull_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jdfull/wordgramtfidfroman -train_data_file data/jd/sentiment/full_train_pinyin_wordgramtfidf.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/charbag -train_data_file data/joint/binary_train_charbag.t7b -test_data_file data/joint/binary_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/charbagtfidf -train_data_file data/joint/binary_train_charbagtfidf.t7b -test_data_file data/joint/binary_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/chargram -train_data_file data/joint/binary_train_chargram.t7b -test_data_file data/joint/binary_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/chargramtfidf -train_data_file data/joint/binary_train_chargramtfidf.t7b -test_data_file data/joint/binary_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordbag -train_data_file data/joint/binary_train_wordbag.t7b -test_data_file data/joint/binary_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordbagroman -train_data_file data/joint/binary_train_roman_wordbag.t7b -test_data_file data/joint/binary_test_roman_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordbagtfidf -train_data_file data/joint/binary_train_wordbagtfidf.t7b -test_data_file data/joint/binary_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordbagtfidfroman -train_data_file data/joint/binary_train_roman_wordbagtfidf.t7b -test_data_file data/joint/binary_test_roman_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointbinary_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordgram -train_data_file data/joint/binary_train_wordgram.t7b -test_data_file data/joint/binary_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordgramroman -train_data_file data/joint/binary_train_roman_wordgram.t7b -test_data_file data/joint/binary_test_roman_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordgramtfidf -train_data_file data/joint/binary_train_wordgramtfidf.t7b -test_data_file data/joint/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointbinary_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointbinary/wordgramtfidfroman -train_data_file data/joint/binary_train_roman_wordgramtfidf.t7b -test_data_file data/joint/binary_test_roman_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/charbag -train_data_file data/joint/full_train_charbag.t7b -test_data_file data/joint/full_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/charbagtfidf -train_data_file data/joint/full_train_charbagtfidf.t7b -test_data_file data/joint/full_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/chargram -train_data_file data/joint/full_train_chargram.t7b -test_data_file data/joint/full_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/chargramtfidf -train_data_file data/joint/full_train_chargramtfidf.t7b -test_data_file data/joint/full_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordbag -train_data_file data/joint/full_train_wordbag.t7b -test_data_file data/joint/full_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordbagroman -train_data_file data/joint/full_train_roman_wordbag.t7b -test_data_file data/joint/full_test_roman_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordbagtfidf -train_data_file data/joint/full_train_wordbagtfidf.t7b -test_data_file data/joint/full_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordbagtfidfroman -train_data_file data/joint/full_train_roman_wordbagtfidf.t7b -test_data_file data/joint/full_test_roman_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/jointfull_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordgram -train_data_file data/joint/full_train_wordgram.t7b -test_data_file data/joint/full_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordgramroman -train_data_file data/joint/full_train_roman_wordgram.t7b -test_data_file data/joint/full_test_roman_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordgramtfidf -train_data_file data/joint/full_train_wordgramtfidf.t7b -test_data_file data/joint/full_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/jointfull_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/jointfull/wordgramromantfidf -train_data_file data/joint/full_train_roman_wordgramtfidf.t7b -test_data_file data/joint/full_test_roman_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/nytimes_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/charbag -train_data_file data/nytimes/topic/train_charbag.t7b -test_data_file data/nytimes/topic/test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/nytimes_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/charbagtfidf -train_data_file data/nytimes/topic/train_charbagtfidf.t7b -test_data_file data/nytimes/topic/test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/nytimes_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/chargram -train_data_file data/nytimes/topic/train_chargram.t7b -test_data_file data/nytimes/topic/test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/nytimes_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/chargramtfidf -train_data_file data/nytimes/topic/train_chargramtfidf.t7b -test_data_file data/nytimes/topic/test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/nytimes_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/wordbag -train_data_file data/nytimes/topic/train_wordbag.t7b -test_data_file data/nytimes/topic/test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/nytimes_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/wordbagtfidf -train_data_file data/nytimes/topic/train_wordbagtfidf.t7b -test_data_file data/nytimes/topic/test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/nytimes_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/wordgram -train_data_file data/nytimes/topic/train_wordgram.t7b -test_data_file data/nytimes/topic/test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/nytimes_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/nytimes/wordgramtfidf -train_data_file data/nytimes/topic/train_wordgramtfidf.t7b -test_data_file data/nytimes/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/charbag -train_data_file data/rakuten/sentiment/binary_train_charbag.t7b -test_data_file data/rakuten/sentiment/binary_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/charbagtfidf -train_data_file data/rakuten/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/chargram -train_data_file data/rakuten/sentiment/binary_train_chargram.t7b -test_data_file data/rakuten/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/chargramtfidf -train_data_file data/rakuten/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordbag -train_data_file data/rakuten/sentiment/binary_train_wordbag.t7b -test_data_file data/rakuten/sentiment/binary_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordbagroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordbag.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordbagtfidf -train_data_file data/rakuten/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordbagtfidfroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordgram -train_data_file data/rakuten/sentiment/binary_train_wordgram.t7b -test_data_file data/rakuten/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordgramroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordgram.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordgramtfidf -train_data_file data/rakuten/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenbinary_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenbinary/wordgramtfidfroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_charbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/charbag -train_data_file data/rakuten/sentiment/full_train_charbag.t7b -test_data_file data/rakuten/sentiment/full_test_charbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_charbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/charbagtfidf -train_data_file data/rakuten/sentiment/full_train_charbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_charbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_chargram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/chargram -train_data_file data/rakuten/sentiment/full_train_chargram.t7b -test_data_file data/rakuten/sentiment/full_test_chargram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_chargramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/chargramtfidf -train_data_file data/rakuten/sentiment/full_train_chargramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordbag.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordbag -train_data_file data/rakuten/sentiment/full_train_wordbag.t7b -test_data_file data/rakuten/sentiment/full_test_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordbagroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordbagroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordbag.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordbag.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordbagtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordbagtfidf -train_data_file data/rakuten/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordbagtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordbagtfidfroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordbagtfidf.t7b "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordgram.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordgram -train_data_file data/rakuten/sentiment/full_train_wordgram.t7b -test_data_file data/rakuten/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordgramroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordgramroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordgram.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordgram.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordgramtfidf.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordgramtfidf -train_data_file data/rakuten/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/archive/rakutenfull_wordgramtfidfroman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

th main.lua -driver_location models/rakutenfull/wordgramtfidfroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordgramtfidf.t7b -model_size 1000001 "$@";


================================================
FILE: linearnet/config.lua
================================================
--[[
Configuration for LinearNet
Copyright 2016 Xiang Zhang
--]]

-- Name space
local config = {}

-- Training data configuration
config.train_data = {}
config.train_data.file = 'data/dianping/train_charbag.t7b'

-- Testing data configuration
config.test_data = {}
config.test_data.file = 'data/dianping/test_charbag.t7b'

-- Model configuration
config.model = {}
config.model.size = 200001
config.model.dimension = 2
config.model.decay = 1e-5

-- Trainer configuration
config.train = {}
config.train.rate = 1e-3

-- Tester configuration
config.test = {}

-- Driver configuration
config.driver = {}
config.driver.loss = 'nn.ClassNLLCriterion'
config.driver.threads = 10
config.driver.buffer = 100
config.driver.steps = 100000
config.driver.epoches = 1000
config.driver.interval = 5
config.driver.location = 'models/dianping/charbag'
config.driver.initialization = 1e-2
config.driver.plot = true
config.driver.debug = false
config.driver.resume = false

return config


================================================
FILE: linearnet/data.lua
================================================
--[[
Data class for LinearNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local torch = require('torch')

local Data = class()

-- Constructor for Data
-- config: configuration table
--   .file: the data file location
-- data_table: if present, will use the data_table instead of load from file
function Data:_init(config, data_table)
   self.data = data_table or torch.load(config.file)
end

function Data:getClasses()
   return #self.data.bag
end

function Data:getSample(sample, label)
   local bag, bag_index, bag_value =
      self.data.bag, self.data.bag_index, self.data.bag_value

   -- Sample a non-empty example
   local class = torch.random(#bag)
   local item = torch.random(bag[class]:size(1))
   while bag[class][item][2] == 0 do
      class = torch.random(#bag)
      item = torch.random(bag[class]:size(1))
   end

   local start = bag[class][item][1]
   local length = bag[class][item][2]
   local sample = sample or torch.Tensor(bag[class][item][2] ,2)
   sample:resize(bag[class][item][2], 2)
   sample:select(2, 1):copy(bag_index:narrow(1, start, length))
   sample:select(2, 2):copy(bag_value:narrow(1, start, length))

   local label = label or torch.Tensor(1)
   label[1] = class

   return sample, label
end

-- Iterator
function Data:iterator(sample, label)
   local bag, bag_index, bag_value =
      self.data.bag, self.data.bag_index, self.data.bag_value
   local sample = sample or torch.Tensor(1, 2)
   local label = label or torch.Tensor(1)

   local class = 1
   local item = 0
   local count = 0

   return function()
      item = item + 1
      if item > bag[class]:size(1) then
         class = class + 1
         item = 1
         if bag[class] == nil then return end
      end
      while bag[class][item][2] == 0 do
         item = item + 1
         if item > bag[class]:size(1) then
            class = class + 1
            item = 1
            if bag[class] == nil then return end
         end
      end
      local start = bag[class][item][1]
      local length = bag[class][item][2]
      sample:resize(length, 2)
      sample:select(2, 1):copy(bag_index:narrow(1, start, length))
      sample:select(2, 2):copy(bag_value:narrow(1, start, length))
      label[1] = class
      return sample, label
   end
end

-- Get data table for share
function Data:getTable()
   return self.data
end

return Data


================================================
FILE: linearnet/driver.lua
================================================
--[[
Driver for LinearNet using HogWILD!
Copyright 2015 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local os = require('os')
local paths = require('paths')
local threads = require('threads')
local torch = require('torch')

local Data = require('data')
local Model = require('model')
local Qeueu = require('queue')
local Train = require('train')
local Test = require('test')

-- Library configurations
threads.serialization('threads.sharedserialize')

local Driver = class()

-- Constructor for driver
-- options: configuration table for others
-- config: configuration table
--    .loss: the loss used for classification task
--    .threads: number of threads
--    .buffer: buffer size for RPC queues
--    .steps: steps for each training run
--    .epoches: number of testing epoches before stopping
--    .interval: print time interval
--    .location: save location
--    .initialization: initialization parameter for model
--    .plot: whether to plot the output
--    .debug: whether to debug
--    .resume: whether to resume
function Driver:_init(options, config)
   local config = config or {}
   self.loss = config.loss or 'nn.ClassNLLCriterion'
   self.threads = config.threads or 10
   self.buffer = config.buffer or 100
   self.steps = config.steps or 100000
   self.epoches = config.epoches or 1000
   self.interval = config.interval or 5
   self.location = config.location or '.'
   self.initialization = config.initialization or 1e-2
   self.plot = config.plot
   self.debug = config.debug
   self.resume = config.resume
   self.options = options or {}
   self.config = config

   math.randomseed(os.time())
   torch.manualSeed(os.time())

   print('Driver loading training data')
   self.train_data = Data(self.options.train_data)
   print('Driver loading testing data')
   self.test_data = Data(self.options.test_data)
   self.options.model.dimension = self.train_data:getClasses()
   print('Driver changed model output dimension to '..
            self.options.model.dimension)

   if self.resume then
      local record_file = paths.concat(self.location, 'record.t7b')
      print('Driver loading resumption record from '..record_file)
      self.record = torch.load(record_file)
      local model_file = paths.concat(
         self.location, 'model_'..#self.record..'.t7b')
      print('Driver loading model from '..model_file)
      self.model = Model(self.options.model)
      self.model:load(model_file)
      if self.record[#self.record].progress then
         if self.record[#self.record].progress:size(1) == self.threads then
            self.progress = self.record[#self.record].progress:clone()
         else
            print('Driver resumption number of threads change.')
            self.progress = torch.LongTensor(self.threads):zero()
            local total = self.record[#self.record].progress:sum()
            while self.progress:sum() < total do
               local thread = math.random(self.threads)
               self.progress[thread] = self.progress[thread] + self.steps 
            end
         end
      else
         print('Driver resumption progress vector not found')
         self.progress = torch.LongTensor(self.threads):zero()
      end
      print('Driver progress = '..self.progress:sum())
      for i = 1, #self.record do
         self:printResult(i)
      end
      if self.plot then
         self:plotRecord()
      end
   else
      self.record = {}
      print('Driver loading model')
      self.model = Model(self.options.model)
      print('Driver initializing model')
      self.model:reset(self.initialization)
      self.progress = torch.LongTensor(self.threads):zero()
      if self.plot then
         require('gnuplot')
      end
   end

   print('Driver loading tester for training data')
   self.train_test = Test(
      self.train_data, self.model, nn[self.loss:sub(4)](), self.options.test)
   print('Driver loading tester for testing data')
   self.test_test = Test(
      self.test_data, self.model, nn[self.loss:sub(4)](), self.options.test)

   print('Driver building RPC queues')
   self.master_queue = Queue(self.buffer)
   self.slave_queues = {}
   for i = 1, self.threads do
      self.slave_queues[i] = Queue(self.buffer)
   end

   print('Driver creating thread block')
   local init_thread = self:initThread()
   self.block = threads.Threads(self.threads, init_thread)
   self.block:specific(true)

   self.time = os.time()
   self.step = self.progress:sum()
end

-- Run the training process
function Driver:run()
   self:deployThreads()

   local begin_epoch = #self.record + 1
   local end_epoch = #self.record + self.epoches
   for i = begin_epoch, end_epoch do
      print('Driver testing on training data for epoch '..i)
      self.train_test:run(function (test, step) self:logTest(test, step) end)
      print('Driver testing on testing data for epoch '..i)
      self.test_test:run(function (test, step) self:logTest(test, step) end)
      self:save()
      self:printResult()
      if self.plot then
         self:plotRecord()
      end
   end

   for i = 1, self.threads do
      print('Driver sending RPC to exit thread '..i)
      self.slave_queues[i]:push{func = 'exit', arg = {}}
   end

   self.block:synchronize()
   self.block:terminate()
end

-- Deploy threads in sequential order to prevent io and memory jam
function Driver:deployThreads()
   for i = 1, self.threads do
      print('Driver deploying job for threads '..i)
      local thread_job = self:threadJob(i)
      self.block:addjob(i, thread_job)
      local rpc = self.master_queue:pop()
      while rpc.func ~= 'notifyDeploy' do
         self[rpc.func](self, unpack(rpc.arg))
         rpc = self.master_queue:pop()
      end
      print('Driver rpc = notifyDeploy, thread = '..rpc.arg[1])
   end
end

-- Thread initialization callback
function Driver:initThread()
   return function ()
      local math = require('math')
      local nn = require('nn')
      local os = require('os')
      local torch = require('torch')

      local Queue = require('queue')

      math.randomseed(os.time() + __threadid)
      torch.manualSeed(os.time() + __threadid)
   end
end

-- Thread job callback
function Driver:threadJob(id)
   local options = self.options
   local steps = self.steps
   local data_table = self.train_data:getTable()
   local modules = self.model:getModules()
   local loss = self.loss
   local master_queue = self.master_queue
   local slave_queue = self.slave_queues[id]
   local progress = self.progress[id]
   return function()
      local os = require('os')
      local nn = require('nn')
      local torch = require('torch')

      local Data = require('data')
      local Model = require('model')
      local Train = require('train')

      local train_data = Data(options.train_data, data_table)
      local model = Model(options.model, modules)

      options.train.step = progress
      local train = Train(train_data, model, nn[loss:sub(4)](), options.train)
      master_queue:push{func = 'notifyDeploy', arg = {__threadid}}

      local exit = false
      while not exit do
         train:run(steps)
         -- Tell main thread to update progress
         master_queue:push{
            func = 'updateProgress',
            arg = {__threadid, train.step, train.objective}}
         -- Handle RPC requests from main thread
         local rpc = slave_queue:pop_async()
         while rpc do
            if rpc.func == 'exit' then
               exit = true
            end
            rpc = slave_queue:pop_async()
         end
      end
   end
end

-- Update progress
function Driver:updateProgress(thread, step, objective)
   self.progress[thread] = step
   print('Driver rpc = updateProgress, thread = '..thread..', objective = '..
            objective..', progress = '..self.progress[thread]..', total = '..
            self.progress:sum())
end

-- Log for testing
function Driver:logTest(test, step)
   if os.difftime(os.time(), self.time) >= self.interval then
      local message = 'Test step = '..step..
         ', total_error = '..test.total_error..
         ', total_objective = '..test.total_objective..
         ', label = '..test.label[1]..
         ', decision = '..test.decision[1]
      if self.debug then
         local weight = {
            weight = test.model.linear.weight, bias = test.model.linear.bias}
         for key, w in pairs(weight) do
            message = message..', '..key..':mean() = '..w:mean()..', '..
               key..':std() = '..w:std()
         end
      end
      print(message)

      -- Handle rpc
      local rpc = self.master_queue:pop_async()
      while rpc do
         self[rpc.func](self, unpack(rpc.arg))
         rpc = self.master_queue:pop_async()
      end
      self.time = os.time()
   end
end

-- Save for model
function Driver:save(epoch)
   local epoch = epoch or #self.record + 1

   -- Make a backup for the record
   print('Driver backing up record.t7b')
   local record_file = paths.concat(self.location, 'record.t7b')
   os.rename(record_file, record_file..'.backup')

   -- Save the new record
   print('Driver saving new records to '..record_file)
   self.record[epoch] = {
      train_loss = self.train_test.total_objective,
      test_loss = self.test_test.total_objective,
      train_error = self.train_test.total_error,
      test_error = self.test_test.total_error,
      progress = self.progress:clone()
   }
   torch.save(record_file, self.record)

   -- Save the model
   local model_file = paths.concat(self.location, 'model_'..epoch..'.t7b')
   print('Driver saving model to '..model_file)
   self.model:save(model_file)
end

-- Print current result
function Driver:printResult(epoch)
   local epoch = epoch or #self.record
   print('Driver epoch = '..epoch..
            ', train_error = '..self.record[epoch].train_error..
            ', test_error = '..self.record[epoch].test_error..
            ', train_loss = '..self.record[epoch].train_loss..
            ', test_loss = '..self.record[epoch].test_loss)
end

-- Plot the record
function Driver:plotRecord()
   require('gnuplot')
   self.error_figure = self.error_figure or gnuplot.figure()
   self.loss_figure = self.loss_figure or gnuplot.figure()

   local epoch = torch.linspace(1, #self.record, #self.record)
   local train_error = torch.Tensor(epoch:size())
   local test_error = torch.Tensor(epoch:size())
   local train_loss = torch.Tensor(epoch:size())
   local test_loss = torch.Tensor(epoch:size())
   for i = 1, #self.record do
      train_error[i] = self.record[i].train_error
      test_error[i] = self.record[i].test_error
      train_loss[i] = self.record[i].train_loss
      test_loss[i] = self.record[i].test_loss
   end

   gnuplot.figure(self.error_figure)
   gnuplot.plot({'Training error', epoch, train_error},
                {'Testing error', epoch, test_error})
   gnuplot.title('Training and testing error')
   gnuplot.figure(self.loss_figure)
   gnuplot.plot({'Training loss', epoch, train_loss},
                {'Testing loss', epoch, test_loss})
   gnuplot.title('Training and testing loss')
end

return Driver


================================================
FILE: linearnet/model.lua
================================================
--[[
Model class for LinearNet, using SparseLinear
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local nn = require('nn')
local torch = require('torch')

local Model = class()

-- Constructor for model
-- config: configuration table
--   .size: size of input index
--   .dimension: dimension of output
--   .decay: weight decay. Optional.
-- modules: share weights with the given modules. Optional.
function Model:_init(config, modules)
   self.size = config.size
   self.dimension = config.dimension
   self.decay = config.decay or 0

   if modules then
      self.linear = modules.linear:clone('weight', 'bias')
   else
      self.linear = nn.SparseLinear(self.size, self.dimension)
   end

   self.sequential = nn.Sequential()
   self.sequential:add(self.linear)
   self.sequential:add(nn.LogSoftMax())
end

-- Forward propagation
function Model:forward(input)
   return self.sequential:forward(input)
end

-- Backward propagation
function Model:backward(input, grad_output)
   local grad_input = self.sequential:backward(input, grad_output)
   -- Apply weight decay to linear module
   if self.decay > 0 then
      self.linear_index = self.linear_index or torch.LongTensor(input:size(1))
      self.linear_index:resize(input:size(1)):copy(input:select(2, 1))
      self.linear_decay = self.linear_decay or self.linear.gradWeight:new()
      self.linear_decay:index(self.linear.weight, 2, self.linear_index)
      self.linear.gradWeight:indexAdd(
         2, self.linear_index, self.linear_decay:mul(self.decay))
      self.linear.gradBias:add(self.decay, self.linear.bias)
   end
   return grad_input
end

-- Update parameters
function Model:updateParameters(rate)
   return self.linear:updateParameters(rate)
end

-- Zero grad parameters
function Model:zeroGradParameters()
   return self.linear:zeroGradParameters()
end

-- Set the type
function Model:type(tensortype)
   local tensortype = tensortype or self.linear.weight:type()
   if tensor_type ~= self.linear.weight:type() then
      self.linear:type(tensortype)
   end
   return tensortype
end

-- Reset the weights
function Model:reset(sigma)
   self.linear.weight:normal(0, sigma)
   self.linear.bias:zero()
end

-- Get the modules
function Model:getModules()
   return {linear = self.linear}
end

-- Share given modules
function Model:shareModules(modules)
   self.linear:share(modules.linear, 'weight', 'bias')
end

-- Save to file
function Model:save(file)
   torch.save(file, self.linear)
end

-- Load from file
function Model:load(file)
   local linear = torch.load(file)
   self.linear.weight:copy(linear.weight)
   self.linear.bias:copy(linear.bias)
end

return Model


================================================
FILE: linearnet/queue.lua
================================================
--[[
Multithreaded queue based on tds
Copyright 2015 Xiang Zhang
--]]

local class = require('pl.class')
local ffi = require('ffi')
local serialize = require('threads.sharedserialize')
local tds = require('tds')
local threads = require('threads')
local torch = require('torch')

-- Append an underscore to distinguish between metatable and class name
local Queue_ = torch.class('Queue')

-- Constructor
-- n: buffer size
function Queue_:__init(size)
   self.data = tds.hash()
   self.pointer = torch.LongTensor(3):fill(1)
   self.pointer[3] = 0
   self.size = size or 10
   self.mutex = threads.Mutex()
   self.added_condition = threads.Condition()
   self.removed_condition = threads.Condition()
end

function Queue_:push(item)
   local storage = serialize.save(item)
   self.mutex:lock()
   while self.pointer[3] == self.size do
      self.removed_condition:wait(self.mutex)
   end
   self.data[self.pointer[1]] = storage:string()
   self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1
   self.pointer[3] = self.pointer[3] + 1
   self.mutex:unlock()
   self.added_condition:signal()
end

function Queue_:pop()
   self.mutex:lock()
   while self.pointer[3] == 0 do
      self.added_condition:wait(self.mutex)
   end
   local storage = torch.CharStorage():string(self.data[self.pointer[2]])
   self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1
   self.pointer[3] = self.pointer[3] - 1
   self.mutex:unlock()
   self.removed_condition:signal()
   local item = serialize.load(storage)
   return item
end

function Queue_:push_async(item)
   if self.pointer[3] == self.size then
      return
   end
   local storage = serialize.save(item)
   self.mutex:lock()
   if self.pointer[3] == self.size then
      self.mutex:unlock()
      return
   end
   self.data[self.pointer[1]] = storage:string()
   self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1
   self.pointer[3] = self.pointer[3] + 1
   self.mutex:unlock()
   self.added_condition:signal()
   return item
end

function Queue_:pop_async()
   if self.pointer[3] == 0 then
      return
   end
   self.mutex:lock()
   if self.pointer[3] == 0 then
      self.mutex:unlock()
      return
   end
   local storage = torch.CharStorage():string(self.data[self.pointer[2]])
   self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1
   self.pointer[3] = self.pointer[3] - 1
   self.mutex:unlock()
   self.removed_condition:signal()
   local item = serialize.load(storage)
   return item
end

function Queue_:free()
   self.mutex:free()
   self.added_condition:free()
   self.removed_condition:free()
end

function Queue_:__write(f)
   local data = self.data
   f:writeLong(torch.pointer(data))
   tds.C.tds_hash_retain(data)

   local pointer = self.pointer
   f:writeLong(torch.pointer(pointer))
   pointer:retain()

   f:writeObject(self.size)
   f:writeObject(self.mutex:id())
   f:writeObject(self.added_condition:id())
   f:writeObject(self.removed_condition:id())
end

function Queue_:__read(f)
   local data = f:readLong()
   data = ffi.cast('tds_hash&', data)
   ffi.gc(data, tds.C.tds_hash_free)
   self.data = data

   local pointer = f:readLong()
   pointer = torch.pushudata(pointer, 'torch.LongTensor')
   self.pointer = pointer
   
   self.size = f:readObject()
   self.mutex = threads.Mutex(f:readObject())
   self.added_condition = threads.Condition(f:readObject())
   self.removed_condition = threads.Condition(f:readObject())
end

-- Return class name, not the underscored metatable
return Queue


================================================
FILE: linearnet/test.lua
================================================
--[[
Tester for LinearNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local torch = require('torch')

local Test = class()

-- Constructor
-- data: the data instance
-- model: the model instance
-- loss: the loss instance
-- config: configuration table
function Test:_init(data, model, loss, config)
   self.data = data
   self.model = model
   self.loss = loss

   self.type = model:type()
end

-- Run the tester
-- callback: a function to execute after each step
function Test:run(callback)
   self.total_objective = 0
   self.total_error = 1
   self.step = 0
   for sample, label in self.data:iterator() do
      self:runStep(sample, label)
      self.step = self.step + 1
      if callback then
         callback(self, self.step)
      end
   end
end

-- Run for one step
function Test:runStep(sample, label)
   -- Get sample
   self.sample, self.label = sample, label

   -- Forward propagation
   self.output = self.model:forward(self.sample)
   self.objective = self.loss:forward(self.output, self.label)

   -- Compute decision
   self.max, self.decision = self.output:max(1)
   self.error = (self.decision[1] == self.label[1]) and 0 or 1

   -- Accumulate errors
   self.total_objective = (self.total_objective * self.step + self.objective) /
      (self.step + 1)
   self.total_error = (self.total_error * self.step + self.error) /
      (self.step + 1)
end

return Test


================================================
FILE: linearnet/train.lua
================================================
--[[
Training class for LinearNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local math = require('math')
local nn = require('nn')

local Train = class()

-- Constructor
-- data: the data instance
-- model: the model instance
-- loss: the loss instance
-- config: the configuration table
--   .rate: learning rate
--   .step: current finished steps. Starting from 0
function Train:_init(data, model, loss, config)
   self.data = data
   self.model = model
   self.loss = loss

   local config = config or {}
   self.rate = config.rate or 1e-3
   self.step = config.step or 0

   self.type = model:type()
end

-- Run for a number of steps
-- steps: number of steps to run
-- callback: a function to execute after each step
function Train:run(steps, callback)
   for i = 1, steps do
      self:runStep()
      self.step = self.step + 1
      if callback then
         callback(self, i)
      end
   end
end

-- Run for one step
function Train:runStep()
   -- Get sample
   self.sample, self.label = self.data:getSample(self.sample, self.label)

   -- Forward propagation
   self.output = self.model:forward(self.sample)
   self.objective = self.loss:forward(self.output, self.label)

   -- Backward propagation
   self.grad_output = self.loss:backward(self.output, self.label)
   self.grad_input = self.model:backward(self.sample, self.grad_output)

   -- Update parameters
   self.model:updateParameters(self.rate)
   self.model:zeroGradParameters()
end

return Train


================================================
FILE: linearnet/unittest/data.lua
================================================
--[[
Unit test for LinearNet data program
Copyright 2016 Xiang Zhang
--]]

local Data = require('data')

local math = require('math')
local string = require('string')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
         and name:match('[%g]+Test') then
            print('\nExecuting '..name)
            func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.file = 'data/dianping/unittest_charbag.t7b'
   self.config = config
   print('Loading data from '..config.train_data.file)
   self.data = Data(config.train_data)
end

function joe:getSampleTest()
   local data = self.data
   print('Getting 10 samples')
   for i = 1, 10 do
      local sample, label = data:getSample(sample, label)
      io.write(label[1], ' ', sample:size(1))
      for j = 1, sample:size(1) do
         io.write(' ', sample[j][1], ':', string.format('%.2g', sample[j][2]))
      end
      io.write('\n')
      io.flush()
   end
end

function joe:iteratorTest()
   local data = self.data
   print('Iterating through data')
   local count = 0
   for sample, label in data:iterator() do
      io.write(label[1], ' ', sample:size(1))
      count = count + 1
      if math.fmod(count, 16) == 0 then
         io.write('\n')
         io.flush()
      else
         io.write(', ')
      end
   end
   if math.fmod(count, 16) ~= 0 then
      io.write('\n')
      io.flush()
   end
end

joe.main()
return joe


================================================
FILE: linearnet/unittest/driver.lua
================================================
--[[
Unit test for driver
Copyright 2016 Xiang Zhang
--]]

local Driver = require('driver')

--  A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
         and name:match('[%g]+Test') then
            print('\nExecuting '..name)
            func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Creating driver')
   config.train_data.file = 'data/dianping/unittest_charbag.t7b'
   config.test_data.file = 'data/dianping/unittest_charbag.t7b'
   config.driver.steps = 10000
   config.driver.epoches = 30
   config.driver.interval = 1
   config.driver.location = '/tmp'
   config.driver.debug = true
   local driver = Driver(config, config.driver)

   self.config = config
   self.driver = driver
end

function joe:driverTest()
   local driver = self.driver
   print('Testing driver')
   driver:run()
end

joe.main()
return joe


================================================
FILE: linearnet/unittest/model.lua
================================================
--[[
Unit test for LinearNet model program
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local math = require('math')
local string = require('string')
local sys = require('sys')

local Data = require('data')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.file = 'data/dianping/unittest_charbag.t7b'
   print('Loading data from '..config.train_data.file)
   self.data = Data(config.train_data)
   print('Loading the model')
   self.model = Model(config.model)
   print(self.model.linear)
   print('Resetting model')
   self.model:reset(1e-3)
   print(self.model.linear.weight:std())
end

function joe:propagationTest()
   local data = self.data
   local model = self.model
   local weight = self.model.linear.weight
   local bias = self.model.linear.bias

   print('Testing forward and backward propagation for 10 times')
   for i = 1, 10 do
      print('Zero gradient of parameters')
      sys.tic()
      model:zeroGradParameters()
      sys.toc(true)

      local sample, label = data:getSample()
      print(tostring(i)..', sample '..sample:size(1)..', label '..label[1])
      print('Forward propagating')
      sys.tic()
      local output = model:forward(sample)
      sys.toc(true)

      print('output '..output:dim()..', '..output:size(1))
      print('Backward propagating')
      local grad_output = torch.rand(output:size())
      sys.tic()
      local grad_input = model:backward(sample, grad_output)
      sys.toc(true)
      print('grad_input '..tostring(grad_input))

      print('Update parameters')
      sys.tic()
      model:updateParameters(1e-3)
      sys.toc(true)
      print('weight mean '..weight:mean()..', std '..weight:std()..
               ', bias mean '..bias:mean()..', std '..bias:std())
   end
end

function joe:shareModuleTest()
   local model = self.model
   local linear = model.linear:clone()
   print(torch.pointer(model.linear.weight:storage()),
         torch.pointer(linear.weight:storage()),
         torch.pointer(model.linear.bias:storage()),
         torch.pointer(linear.bias:storage()))
   model:shareModules({linear = linear})
   print(torch.pointer(model.linear.weight:storage()),
         torch.pointer(linear.weight:storage()),
         torch.pointer(model.linear.bias:storage()),
         torch.pointer(linear.bias:storage()))
end

function joe:saveTest()
   local model = self.model
   local weight, bias = model.linear.weight, model.linear.bias
   print('weight mean '..weight:mean()..', std '..weight:std()..
            ', bias mean '..bias:mean()..', std '..bias:std())
   print('Saving model to /tmp/model.t7b')
   model:save('/tmp/model.t7b')
   print('Resetting model with sigma 1e-2')
   model:reset(1e-2)
   print('weight mean '..weight:mean()..', std '..weight:std()..
            ', bias mean '..bias:mean()..', std '..bias:std())
   print('Loading model from /tmp/model.t7b')
   model:load('/tmp/model.t7b')
   print('weight mean '..weight:mean()..', std '..weight:std()..
            ', bias mean '..bias:mean()..', std '..bias:std())
end

joe.main()
return joe


================================================
FILE: linearnet/unittest/test.lua
================================================
--[[
Unit test for LinearNet tester
Copyright 2016 Xiang Zhang
--]]

local Test = require('test')

local math = require('math')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.file = 'data/dianping/unittest_charbag.t7b'
   print('Loading data from '..config.train_data.file)
   self.data = Data(config.train_data)
   print('Loading the model')
   self.model = Model(config.model)
   print(self.model.linear)
   print('Resetting model')
   self.model:reset(1e-2)
   print('Loading the loss')
   self.loss = nn[config.driver.loss:sub(4)]()
   print(self.loss)
   print('Loading the tester')
   self.test = Test(self.data, self.model, self.loss)
end

function joe:runTest()
   local callback = function(test, step)
      print('stp = '..step..
               ', lss = '..test.total_objective..
               ', err = '..test.total_error..
               ', obj = '..test.objective..
               ', lbl = '..test.label[1]..
               ', dcs = '..test.decision[1])
   end
   print('Starting test')
   self.test:run(callback)
end

joe.main()
return joe


================================================
FILE: linearnet/unittest/train.lua
================================================
--[[
Unit test for LinearNet trainer
Copyright 2016 Xiang Zhang
--]]

local Train = require('train')

local math = require('math')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.file = 'data/dianping/unittest_charbag.t7b'
   print('Loading data from '..config.train_data.file)
   self.data = Data(config.train_data)
   print('Loading the model')
   self.model = Model(config.model)
   print(self.model.linear)
   print('Resetting model')
   self.model:reset(1e-2)
   print('Loading the loss')
   self.loss = nn[config.driver.loss:sub(4)]()
   print(self.loss)
   print('Loading the trainer')
   self.train = Train(self.data, self.model, self.loss)
end

function joe:runTest()
   local callback = function(train, step)
      local model = train.model
      if math.fmod(step, 1000) == 0 then
         local max, decision = train.output:max(1)
         print('stp = '..step..
                  ', lbl = '..train.label[1]..
                  ', dcs = '..decision[1]..
                  ', obj = '..train.objective..
                  ', wmn = '..model.linear.weight:mean()..
                  ', wsd = '..model.linear.weight:std()..
                  ', bmn = '..model.linear.bias:mean()..
                  ', bsd = '..model.linear.bias:std())
      end
   end
   local steps = 1000000
   local train = self.train
   print('Training for '..steps..' steps')
   train:run(steps, callback)
end

joe.main()
return joe


================================================
FILE: models/README.txt
================================================
This directory should contain trained models and checkpoints.


================================================
FILE: models/embednet/README.txt
================================================
This directory should contain trained models and checkpoints for embednet.


================================================
FILE: models/fasttext/README.txt
================================================
This directory should contain trained models and checkpoints for fasttext.


================================================
FILE: models/glyphnet/README.txt
================================================
This directory should contain trained models and checkpoints for glyphnet.


================================================
FILE: models/linearnet/README.txt
================================================
This directory should contain trained models and checkpoints for linearnet.


================================================
FILE: models/onehotnet/README.txt
================================================
This directory should contain trained models and checkpoints for onehotnet.


================================================
FILE: onehotnet/archive/11stbinary_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/11st/sentiment/binary_train.t7b -test_data_file data/11st/sentiment/binary_test.t7b -driver_location models/11stbinary/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/11stbinary_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/11st/sentiment/binary_train_rr.t7b -test_data_file data/11st/sentiment/binary_test_rr.t7b -driver_location models/11stbinary/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/11stbinary_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/binary_train.t7b -test_data_file data/11st/sentiment/binary_test.t7b -driver_location models/11stbinary/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/11stbinary_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/binary_train_rr.t7b -test_data_file data/11st/sentiment/binary_test_rr.t7b -driver_location models/11stbinary/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/11stfull_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/11st/sentiment/full_train.t7b -test_data_file data/11st/sentiment/full_test.t7b -driver_location models/11stfull/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/11stfull_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/11st/sentiment/full_train_rr.t7b -test_data_file data/11st/sentiment/full_test_rr.t7b -driver_location models/11stfull/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/11stfull_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/full_train.t7b -test_data_file data/11st/sentiment/full_test.t7b -driver_location models/11stfull/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/11stfull_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/full_train_rr.t7b -test_data_file data/11st/sentiment/full_test_rr.t7b -driver_location models/11stfull/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/amazonbinary_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/amazon/binary_train.t7b -test_data_file data/amazon/binary_test.t7b -driver_location models/amazonbinary/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/amazonbinary_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/amazon/binary_train.t7b -test_data_file data/amazon/binary_test.t7b -driver_location models/amazonbinary/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/amazonfull_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/amazon/full_train.t7b -test_data_file data/amazon/full_test.t7b -driver_location models/amazonfull/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/amazonfull_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/amazon/full_train.t7b -test_data_file data/amazon/full_test.t7b -driver_location models/amazonfull/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/chinanews_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/chinanews/topic/train.t7b -test_data_file data/chinanews/topic/test.t7b -driver_location models/chinanews/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/chinanews_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/chinanews/topic/train_pinyin.t7b -test_data_file data/chinanews/topic/test_pinyin.t7b -driver_location models/chinanews/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/chinanews_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/chinanews/topic/train.t7b -test_data_file data/chinanews/topic/test.t7b -driver_location models/chinanews/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/chinanews_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/chinanews/topic/train_pinyin.t7b -test_data_file data/chinanews/topic/test_pinyin.t7b -driver_location models/chinanews/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/dianping_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua "$@";


================================================
FILE: onehotnet/archive/dianping_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_location models/dianping/onehot4temporal12length2048feature256roman -train_data_file data/dianping/train_pinyin_string.t7b -test_data_file data/dianping/test_pinyin_string.t7b "$@";


================================================
FILE: onehotnet/archive/dianping_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/dianping_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -driver_location models/dianping/onehot4temporal8length1944feature256roman -train_data_file data/dianping/train_pinyin_string.t7b -test_data_file data/dianping/test_pinyin_string.t7b "$@";


================================================
FILE: onehotnet/archive/ifeng_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/ifeng/topic/train.t7b -test_data_file data/ifeng/topic/test.t7b -driver_location models/ifeng/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/ifeng_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/ifeng/topic/train_pinyin.t7b -test_data_file data/ifeng/topic/test_pinyin.t7b -driver_location models/ifeng/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/ifeng_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/ifeng/topic/train.t7b -test_data_file data/ifeng/topic/test.t7b -driver_location models/ifeng/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/ifeng_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/ifeng/topic/train_pinyin.t7b -test_data_file data/ifeng/topic/test_pinyin.t7b -driver_location models/ifeng/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/jdbinary_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/binary_train.t7b -test_data_file data/jd/sentiment/binary_test.t7b -driver_location models/jdbinary/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/jdbinary_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/binary_train_pinyin.t7b -test_data_file data/jd/sentiment/binary_test_pinyin.t7b -driver_location models/jdbinary/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/jdbinary_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/binary_train.t7b -test_data_file data/jd/sentiment/binary_test.t7b -driver_location models/jdbinary/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/jdbinary_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/binary_train_pinyin.t7b -test_data_file data/jd/sentiment/binary_test_pinyin.t7b -driver_location models/jdbinary/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/jdfull_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/full_train.t7b -test_data_file data/jd/sentiment/full_test.t7b -driver_location models/jdfull/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/jdfull_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/jd/sentiment/full_train_pinyin.t7b -test_data_file data/jd/sentiment/full_test_pinyin.t7b -driver_location models/jdfull/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/jdfull_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/full_train.t7b -test_data_file data/jd/sentiment/full_test.t7b -driver_location models/jdfull/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/jdfull_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/full_train_pinyin.t7b -test_data_file data/jd/sentiment/full_test_pinyin.t7b -driver_location models/jdfull/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/jointbinary_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/binary_train.t7b -test_data_file data/joint/binary_test.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/jointbinary_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/binary_train_roman.t7b -test_data_file data/joint/binary_test_roman.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/jointbinary_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/binary_train.t7b -test_data_file data/joint/binary_test.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/jointbinary_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/binary_train_roman.t7b -test_data_file data/joint/binary_test_roman.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/jointfull_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/full_train.t7b -test_data_file data/joint/full_test.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/jointfull_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/joint/full_train_roman.t7b -test_data_file data/joint/full_test_roman.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/jointfull_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/full_train.t7b -test_data_file data/joint/full_test.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/jointfull_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/joint/full_train_roman.t7b -test_data_file data/joint/full_test_roman.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/nytimes_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/nytimes/topic/train.t7b -test_data_file data/nytimes/topic/test.t7b -driver_location models/nytimes/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/nytimes_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/nytimes/topic/train.t7b -test_data_file data/nytimes/topic/test.t7b -driver_location models/nytimes/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/rakutenbinary_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/rakuten/sentiment/binary_train.t7b -test_data_file data/rakuten/sentiment/binary_test.t7b -driver_location models/rakutenbinary/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/rakutenbinary_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/rakuten/sentiment/binary_train_hepburn.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn.t7b -driver_location models/rakutenbinary/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/rakutenbinary_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/binary_train.t7b -test_data_file data/rakuten/sentiment/binary_test.t7b -driver_location models/rakutenbinary/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/rakutenbinary_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/binary_train_hepburn.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn.t7b -driver_location models/rakutenbinary/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/archive/rakutenfull_onehot4temporal12length2048feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/rakuten/sentiment/full_train.t7b -test_data_file data/rakuten/sentiment/full_test.t7b -driver_location models/rakutenfull/onehot4temporal12length2048feature256 "$@";


================================================
FILE: onehotnet/archive/rakutenfull_onehot4temporal12length2048feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -train_data_file data/rakuten/sentiment/full_train_hepburn.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn.t7b -driver_location models/rakutenfull/onehot4temporal12length2048feature256roman "$@";


================================================
FILE: onehotnet/archive/rakutenfull_onehot4temporal8length1944feature256.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/full_train.t7b -test_data_file data/rakuten/sentiment/full_test.t7b -driver_location models/rakutenfull/onehot4temporal8length1944feature256 "$@";


================================================
FILE: onehotnet/archive/rakutenfull_onehot4temporal8length1944feature256roman.sh
================================================
#!/bin/bash

# Archived program command-line for experiment
# Copyright 2016 Xiang Zhang
#
# Usage: bash {this_file} [additional_options]

set -x;
set -e;

qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/full_train_hepburn.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn.t7b -driver_location models/rakutenfull/onehot4temporal8length1944feature256roman "$@";


================================================
FILE: onehotnet/config.lua
================================================
--[[
Configuration for EmbedNet
Copyright Xiang Zhang 2016
--]]

-- Name space
local config = {}

-- Training data configurations
config.train_data = {}
config.train_data.file = 'data/dianping/train_string.t7b'
config.train_data.batch = 16
config.train_data.size = 256

-- Testing data configurations
config.test_data = {}
config.test_data.file = 'data/dianping/test_string.t7b'
config.test_data.batch = 16
config.test_data.size = 256

-- Model configurations
config.model = {}
config.model.cudnn = true

-- Model variations configuration
config.variation = {}

-- Large model configuration
local onehot = {}
onehot[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
onehot[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true}
temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024}
temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[31] = {name = 'nn.LogSoftMax'}
config.variation['large'] =
   {onehot = onehot, temporal = temporal, length = 2048}

-- Small model configuration
local onehot = {}
onehot[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
onehot[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
             outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
onehot[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
onehot[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2}
local temporal = {}
temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
               outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256,
                outputFrameSize = 256, kW = 3, dW = 1, padW = 1}
temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3}
temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true}
temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024}
temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true}
temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true}
temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2}
temporal[21] = {name = 'nn.LogSoftMax'}
config.variation['small'] =
   {onehot = onehot, temporal = temporal, length = 1944}

-- Trainer settings
config.train = {}
config.train.momentum = 0.9
config.train.decay = 1e-5
-- These are just multipliers to config.driver.rate
-- For every config.driver.schedule * config.driver.steps
config.train.rates =
   {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024}

-- Tester settings
config.test = {}

-- Visualizer settings
config.visualizer = {}
config.visualizer.width = 1200
config.visualizer.scale = 4
config.visualizer.height = 64

-- Driver configurations
config.driver = {}
config.driver.type = 'torch.CudaTensor'
config.driver.device = 1
config.driver.loss = 'nn.ClassNLLCriterion'
config.driver.variation = 'large'
config.driver.steps = 100000
config.driver.epoches = 100
config.driver.schedule = 8
config.driver.rate = 1e-5
config.driver.interval = 5
config.driver.location = 'models/dianping/onehot4temporal12length2048feature256'
config.driver.plot = true
config.driver.visualize = true
config.driver.debug = false
config.driver.resume = false

-- Main configuration
config.joe = {}

return config


================================================
FILE: onehotnet/data.lua
================================================
--[[
Data class for OnehotNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local torch = require('torch')

local parent = require('glyphnet/data')

local Data = class(parent)

-- Constructor for Data
-- config: configuration table
--   .file: file for data
--   .batch: batch of data
--   .size: size of the quantization
function Data:_init(config)
   local data = torch.load(config.file)
   self.data = {code = data.index, code_value = data.content}
   self.length = config.length or 2048
   self.size = config.size or 256
   self.batch = config.batch or 16
end

function Data:initSample(sample, label)
   local sample = sample or torch.Tensor(self.batch, self.size, self.length)
   local label = label or torch.Tensor(self.batch)
   sample:zero()
   return sample, label
end

function Data:index(sample, class, item)
   local code, code_value = self.data.code, self.data.code_value
   local position = 1

   for field = 1, code[class][item]:size(1) do
      -- Break if current position is larger than sample length
      if position > sample:size(2) then
         break
      end
      for char = 1, code[class][item][field][2] + 1 do
         -- Break if current position is larger than sample length
         if position > sample:size(2) then
            break
         end
         local char_index = code[class][item][field][1] + char - 1
         sample[code_value[char_index] + 1][position] = 1
         position = position + 1
      end
   end

   return sample
end

return Data


================================================
FILE: onehotnet/driver.lua
================================================
--[[
Driver for OnehotNet training
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')

local parent = require('glyphnet/driver')
local Driver = class(parent)

-- Initialize variation
function Driver:initVariation()
   print('Driver using model variation '..self.variation)
   self.options.model.onehot =
      self.options.variation[self.variation].onehot
   self.options.model.temporal = self.options.variation[self.variation].temporal

   print('Driver adjusting data length to '..
            self.options.variation[self.variation].length)
   self.options.train_data.length =
      self.options.variation[self.variation].length
   self.options.test_data.length =
      self.options.variation[self.variation].length
end

-- Visualize the model
function Driver:visualizeModel()
   local Visualizer = require('visualizer')
   self.options.visualizer.title = 'Onehot model'
   self.onehot_visualizer = self.onehot_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = 'Temporal model'
   self.temporal_visualizer = self.temporal_visualizer or
      Visualizer(self.options.visualizer)
   self.options.visualizer.title = nil

   self.onehot_visualizer:drawSequential(self.model.onehot)
   self.temporal_visualizer:drawSequential(self.model.temporal)
end

return Driver


================================================
FILE: onehotnet/model.lua
================================================
--[[
Model for OnehotNet
Copyright 2016 Xiang Zhang
--]]

local class = require('pl.class')
local nn = require('nn')

local parent = require('glyphnet/model')

local Model = class(parent)

-- Model constructor
-- config: configuration table
--   .onehot: configuration table of the onehot model
--   .temporal: configuration table of the temporal model
--   .file: the model file to load
--   .cudnn: whether to use NVidia CUDNN
function Model:_init(config)
   -- Read or create model
   if config.file then
      local model = torch.load(config.file)
      self.onehot = self:makeCleanSequential(model.onehot)
      self.temporal = self:makeCleanSequential(model.temporal)
   else
      self.onehot = self:createCleanSequential(config.onehot)
      self:initSequential(self.onehot)
      self.temporal = self:createCleanSequential(config.temporal)
      self:initSequential(self.temporal)
   end

   -- Saving configurations
   self.cudnn = config.cudnn
   self.config = config
   self.tensortype = torch.getdefaulttensortype()
end

function Model:forward(input)
   self.feature = self.onehot:forward(input)
   self.output = self.temporal:forward(self.feature)
   return self.output
end

function Model:backward(input, grad_output)
   self.grad_feature = self.temporal:backward(self.feature, grad_output)
   self.grad_input = self.onehot:backward(input, self.grad_feature)
   return self.grad_input
end

function Model:getParameters()
   return nn.Module.getParameters(self)
end

function Model:parameters()
   local parameters, gradients = {}, {}

   if not self.pretrain then
      local onehot_parameters, onehot_gradients =
         self.onehot:parameters()
      for i = 1, #onehot_parameters do
         parameters[#parameters + 1] = onehot_parameters[i]
         gradients[#gradients + 1] = onehot_gradients[i]
      end
   end

   local temporal_parameters, temporal_gradients = self.temporal:parameters()
   for i = 1, #temporal_parameters do
      parameters[#parameters + 1] = temporal_parameters[i]
      gradients[#gradients + 1] = temporal_gradients[i]
   end

   return parameters, gradients
end

function Model:type(tensortype)
   if tensortype ~= nil and tensortype ~= self.tensortype then
      if tensortype == 'torch.CudaTensor' then
         require('cunn')
         self.onehot = self:makeCudaSequential(self.onehot)
         self.temporal = self:makeCudaSequential(self.temporal)
      else
         self.onehot = self:makeCleanSequential(self.onehot)
         self.temporal = self:makeCleanSequential(self.temporal)
      end
      self.onehot:type(tensortype)
      self.temporal:type(tensortype)
      self.tensortype = tensortype
   end

   return self.tensortype
end

function Model:setMode(mode)
   self:setModeSequential(self.onehot, mode)
   self:setModeSequential(self.temporal, mode)
end


function Model:save(file)
   local onehot = self:clearSequential(
      self:makeCleanSequential(self.onehot))
   local temporal = self:clearSequential(
      self:makeCleanSequential(self.temporal))
   torch.save(file, {onehot = onehot, temporal = temporal})
end

return Model


================================================
FILE: onehotnet/unittest/data.lua
================================================
--[[
Unit test for OnehotNet data component
Copyright 2016 Xiang Zhang
--]]

local Data = require('data')

local image = require('image')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.train_data.length = 2048
   config.test_data.length = 2048

   print('Creating testing data object')
   local data = Data(config.test_data)

   self.config = config
   self.data = data
end

function joe:getBatchTest()
   local data = self.data
   print('Getting a batch')
   local sample, label = data:getBatch()
   local win = image.display{image = sample[1]:narrow(2, 1, 512), zoom = 3}
   print('Getting a second batch')
   sample, label = data:getBatch(sample, label)
   win = image.display{
      win = win, image = sample[1]:narrow(2, 1, 512), zoom = 3}
end

function joe:iteratorTest()
   local data = self.data
   local win
   for sample, label, count in data:iterator() do
      win = image.display{
         win = win, image = sample[1]:narrow(2, 1, 512), zoom = 3}
      io.write(count, ':')
      for i = 1, count do
         io.write(' ', label[i])
      end
      io.write('\n')
      io.flush()
   end
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/driver.lua
================================================
--[[
Unit test for OnehotNet driver component
Copyright 2016 Xiang Zhang
--]]

local Driver = require('driver')

--  A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Creating driver')
   config.train_data.file = 'data/dianping/unittest_string.t7b'
   config.test_data.file = 'data/dianping/unittest_string.t7b'
   config.driver.debug = true
   config.driver.device = 3
   config.driver.steps = 10
   config.driver.epoches = 5
   local driver = Driver(config, config.driver)

   self.config = config
   self.driver = driver
end

function joe:driverTest()
   local driver = self.driver
   print('Testing driver')
   driver:run()
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/model.lua
================================================
--[[
Unit Test for OnehotNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal

   local model = Model(config.model)
   print('Onehot model:')
   print(model.onehot)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(2, 256, 2048)
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size())
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Onehot model')
   print(loaded.onehot)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/model_cuda.lua
================================================
--[[
Unit Test for OnehotNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)

   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   config.model.cudnn = false
   local model = Model(config.model)
   model:cuda()
   print('Onehot model:')
   print(model.onehot)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(2, 256, 2048):cuda()
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size()):cuda()
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Onehot model')
   print(loaded.onehot)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/model_cudnn.lua
================================================
--[[
Unit Test for OnehotNet model
Copyright 2016 Xiang Zhang
--]]

local Model = require('model')

local cutorch = require('cutorch')
local sys = require('sys')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')

   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)

   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   config.model.cudnn = true
   local model = Model(config.model)
   model:cuda()
   print('Onehot model:')
   print(model.onehot)
   print('Temporal model:')
   print(model.temporal)

   self.config = config
   self.model = model
end

function joe:modelTest()
   local model = self.model

   local params, grads = model:getParameters()
   grads:zero()
   print('Number of elements in parameters and gradients: '..
            params:nElement()..', '..grads:nElement())

   print('Creating input')
   local input = torch.rand(2, 256, 2048):cuda()
   print(input:size())

   print('Forward propagating')
   sys.tic()
   local output = model:forward(input)
   sys.toc(true)
   print(output:size())

   print('Creating output gradients')
   local grad_output = torch.rand(output:size()):cuda()
   print(grad_output:size())

   print('Backward propagating')
   sys.tic()
   local grad_input = model:backward(input, grad_output)
   sys.toc(true)
   print(grad_input:size())
end

function joe:modeTest()
   local model = self.model

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to train')
   model:setModeTrain()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end

   print('Setting model to test')
   model:setModeTest()
   for i, m in ipairs(model.temporal.modules) do
      if torch.type(m) == 'nn.Dropout' then
         print(i, torch.type(m), m.train)
      end
   end
end

function joe:saveTest()
   local model = self.model
   print('Saving to /tmp/model.t7b')
   model:save('/tmp/model.t7b')

   print('Loading from /tmp/model.t7b')
   local config = self.config
   config.model.file = '/tmp/model.t7b'
   local loaded = Model(config.model)

   print('Onehot model')
   print(loaded.onehot)
   print('Temporal model')
   print(loaded.temporal)

   config.model.file = nil
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/test.lua
================================================
--[[
Unit test for OnehotNet test component
Copyright 2015-2016 Xiang Zhang
--]]

local Test = require('test')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/test_cuda.lua
================================================
--[[
Unit test for OnehotNet test component
Copyright 2016 Xiang Zhang
--]]

local Test = require('test')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create tester')
   local test = Test(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.test = test
   self.config = config
end

function joe:testTest()
   local test = self.test
   local callback = self:callback()

   print('Running tests')
   test:run(callback)
end

function joe:callback()
   return function (test, i)
      print('cnt: '..test.total_count..', err: '..test.total_error..
               ', lss: '..test.total_objective..', obj: '..test.objective..
               ', crr: '..test.error..', dat: '..test.time.data..
               ', fwd: '..test.time.forward..', upd: '..test.time.update)
   end
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/train.lua
================================================
--[[
Unit test for OnehotNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   config.test_data.batch = 2
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   print('Create trainer')
   for i, v in pairs(config.train.rates) do
      config.train.rates[i] = v * config.driver.rate
   end
   local train = Train(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 10 steps')
   train:run(100, callback)
end

function joe:callback()
   self.time = os.time()
   return function (train, i)
      if os.difftime(os.time(), self.time) >= 5 then
         print('stp: '..train.step..', rat: '..train.rate..
                  ', err: '..train.error..', obj: '..train.objective..
                  ', dat: '..train.time.data..', fwd: '..train.time.forward..
                  ', bwd: '..train.time.backward..', upd: '..train.time.update)
         self.time = os.time()
      end
   end
end

joe.main()
return joe


================================================
FILE: onehotnet/unittest/train_cuda.lua
================================================
--[[
Unit test for OnehotNet train component
Copyright 2015-2016 Xiang Zhang
--]]

local Train = require('train')

local cutorch = require('cutorch')
local nn = require('nn')
local os = require('os')

local Data = require('data')
local Model = require('model')

-- A Logic Named Joe
local joe = {}

function joe.main()
   if joe.init then
      print('Initializing testing environment')
      joe:init()
   end
   for name, func in pairs(joe) do
      if type(name) == 'string' and type(func) == 'function'
      and name:match('[%g]+Test') then
         print('\nExecuting '..name)
         func(joe)
      end
   end
end

function joe:init()
   local config = dofile('config.lua')
   print('Setting device to '..config.driver.device)
   cutorch.setDevice(config.driver.device)
   print('Creating data')
   config.test_data.length = config.variation['large'].length
   local data = Data(config.test_data)
   print('Create model')
   config.model.onehot = config.variation['large'].onehot
   config.model.temporal = config.variation['large'].temporal
   local model = Model(config.model)
   model:cuda()
   print('Create loss')
   local loss = nn[config.driver.loss:sub(4)]()
   loss:cuda()
   print('Create trainer')
   for i, v in pairs(config.train.rates) do
      config.train.rates[i] = v * config.driver.rate
   end
   local train = Train(data, model, loss, config.train)

   self.data = data
   self.model = model
   self.loss = loss
   self.train = train
   self.config = config
end

function joe:trainTest()
   local train = self.train
   local callback = self:callback()

   print('Running for 100000 steps')
   train:run(100000, callback)
end

function joe:callback()
   self.time = os.time()
   return function (train, i)
      if os.difftime(os.time(), self.time) >= 5 then
         print('stp: '..train.step..', rat: '..train.rate..
                  ', err: '..train.error..', obj: '..train.objective..
                  ', dat: '..train.time.data..', fwd: '..train.time.forward..
                  ', bwd: '..train.time.backward..', upd: '..train.time.update)
         self.time = os.time()
      end
   end
end

joe.main()
return joe


================================================
FILE: unifont/createunifont.lua
================================================
--[[
Create unifont database from png file
Copyright 2015 Xiang Zhang

Usage: qlua createunifont.lua [input] [output]
--]]

local image = require('image')
local io = require('io')
local math = require("math")
local torch = require("torch")

-- A Logic Named Joe
local joe = {}

function joe.main()
   local input = arg[1] or 'unifont/unifont-8.0.01.png'
   local output = arg[2] or 'unifont/unifont-8.0.01.t7b'
   local row = arg[3] and tonumber(arg[3]) or 256
   local startx = arg[4] and tonumber(arg[4]) or 33
   local starty = arg[5] and tonumber(arg[5]) or 65
   local width = arg[6] and tonumber(arg[6]) or 16
   local height = arg[7] and tonumber(arg[7]) or width
   local num = arg[8] and tonumber(arg[8]) or 65536

   print('Loading data from '..input)
   local im = image.load(input)
   im = im[1]:double():mul(-1):add(1)
   local data = torch.Tensor(num, height, width)

   for i = 1, num do
      local x = startx + math.fmod(i - 1, row) * width
      local y = starty + math.floor((i - 1)/row) * height
      data[i]:copy(im[{{y, y + height - 1},{x, x + width - 1}}])
      if math.fmod(i, 1000) == 0 then
         io.write('\rProcessing character: '..i..'/'..num)
         joe.win = image.display({image = data[i], win = joe.win, zoom = 8})
      end
   end
   joe.win = image.display({image = data[num], win = joe.win, zoom = 8})
   print('\rProcessed characters: '..num..'/'..num)

   print('Saving to '..output)
   torch.save(output, data)
end

joe.main()


================================================
FILE: unifont/unifont/README.txt
================================================
This directory contains GNU Unifont data


================================================
FILE: unifont/visualize.lua
================================================
--[[
Visualizing argument string using GNU Unifont
Copyright 2015 Xiang Zhang
--]]

local bit32 = require('bit32')
local image = require('image')
local torch = require('torch')

local joe = {}

function joe.main()
   local input = arg[1]
   local unifont = arg[2] or 'unifont/unifont-8.0.01.t7b'

   print('Loading unifont from '..unifont)
   local data = torch.load(unifont)
   local sequence = joe.utf8to32(input)
   local im = torch.Tensor(data:size(2), data:size(3) * #sequence)
   for i, c in ipairs(sequence) do
      im:narrow(2, 1 + (i-1)*data:size(3), data:size(3)):copy(data[c + 1])
   end
   print('Visualizing')
   image.display({image = im, zoom = 4})
end

-- Ref: http://lua-users.org/wiki/LuaUnicode
function joe.utf8to32(utf8str)
   assert(type(utf8str) == "string")
   local res, seq, val = {}, 0, nil
   for i = 1, #utf8str do
      local c = string.byte(utf8str, i)
      if seq == 0 then
	 table.insert(res, val)
	 seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
	    c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or
	    error("invalid UTF-8 character sequence")
	 val = bit32.band(c, 2^(8-seq) - 1)
      else
	 val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F))
      end
      seq = seq - 1
   end
   table.insert(res, val)
   table.insert(res, 0)
   return res
end

joe.main()