Repository: zhangxiangxiao/glyph Branch: master Commit: df6ef3262156 Files: 833 Total size: 789.9 KB Directory structure: gitextract_d_gx1n5i/ ├── LICENSE ├── README.md ├── data/ │ ├── 11st/ │ │ ├── construct_rr.py │ │ ├── create_post.py │ │ ├── create_review.py │ │ ├── segment_rr_word.lua │ │ └── segment_word.py │ ├── README.md │ ├── chinanews/ │ │ └── construct_topic.py │ ├── data/ │ │ └── README.txt │ ├── dianping/ │ │ ├── combine_gram_count.lua │ │ ├── construct_charbag.lua │ │ ├── construct_chargram.lua │ │ ├── construct_chartoken.lua │ │ ├── construct_code.lua │ │ ├── construct_pinyin.py │ │ ├── construct_reviews.lua │ │ ├── construct_string.lua │ │ ├── construct_tfidf.lua │ │ ├── construct_word.lua │ │ ├── construct_wordbag.lua │ │ ├── construct_wordgram.lua │ │ ├── construct_wordtoken.lua │ │ ├── convert_string_code.lua │ │ ├── count_chargram.lua │ │ ├── count_wordgram.lua │ │ ├── limit_code.lua │ │ ├── limit_csvlines.sh │ │ ├── queue.lua │ │ ├── remove_duplication.py │ │ ├── remove_null.sh │ │ ├── segment_roman_word.lua │ │ ├── segment_word.py │ │ ├── select_data.lua │ │ ├── shuffle_lines.sh │ │ ├── sort_gram_count.sh │ │ ├── sort_gram_list.sh │ │ ├── split_lines.sh │ │ └── split_train.lua │ ├── ifeng/ │ │ └── construct_topic.py │ ├── jd/ │ │ ├── count_data.lua │ │ ├── create_comment.py │ │ ├── limit_length.lua │ │ └── sort_data.sh │ ├── joint/ │ │ ├── combine_word.lua │ │ └── combine_word_list.lua │ ├── nytimes/ │ │ ├── construct_topic.py │ │ └── count_class.lua │ └── rakuten/ │ ├── construct_hepburn.py │ ├── create_review.py │ └── segment_word.py ├── doc/ │ └── dianping.md ├── embednet/ │ ├── archive/ │ │ ├── 11stbinary_temporal12length512feature256.sh │ │ ├── 11stbinary_temporal12length512feature256byte.sh │ │ ├── 11stbinary_temporal12length512feature256roman.sh │ │ ├── 11stbinary_temporal12length512feature256romanword.sh │ │ ├── 11stbinary_temporal12length512feature256word.sh │ │ ├── 11stbinary_temporal8length486feature256.sh │ │ ├── 11stbinary_temporal8length486feature256byte.sh │ │ ├── 11stbinary_temporal8length486feature256roman.sh │ │ ├── 11stbinary_temporal8length486feature256romanword.sh │ │ ├── 11stbinary_temporal8length486feature256word.sh │ │ ├── 11stfull_temporal12length512feature256.sh │ │ ├── 11stfull_temporal12length512feature256byte.sh │ │ ├── 11stfull_temporal12length512feature256roman.sh │ │ ├── 11stfull_temporal12length512feature256romanword.sh │ │ ├── 11stfull_temporal12length512feature256word.sh │ │ ├── 11stfull_temporal8length486feature256.sh │ │ ├── 11stfull_temporal8length486feature256byte.sh │ │ ├── 11stfull_temporal8length486feature256roman.sh │ │ ├── 11stfull_temporal8length486feature256romanword.sh │ │ ├── 11stfull_temporal8length486feature256word.sh │ │ ├── amazonbinary_temporal12length512feature256.sh │ │ ├── amazonbinary_temporal12length512feature256word.sh │ │ ├── amazonbinary_temporal8length486feature256.sh │ │ ├── amazonbinary_temporal8length486feature256word.sh │ │ ├── amazonfull_temporal12length512feature256.sh │ │ ├── amazonfull_temporal12length512feature256word.sh │ │ ├── amazonfull_temporal8length486feature256.sh │ │ ├── amazonfull_temporal8length486feature256word.sh │ │ ├── chinanews_temporal12length512feature256.sh │ │ ├── chinanews_temporal12length512feature256byte.sh │ │ ├── chinanews_temporal12length512feature256roman.sh │ │ ├── chinanews_temporal12length512feature256romanword.sh │ │ ├── chinanews_temporal12length512feature256word.sh │ │ ├── chinanews_temporal8length486feature256.sh │ │ ├── chinanews_temporal8length486feature256byte.sh │ │ ├── chinanews_temporal8length486feature256roman.sh │ │ ├── chinanews_temporal8length486feature256romanword.sh │ │ ├── chinanews_temporal8length486feature256word.sh │ │ ├── dianping_temporal12length512feature256.sh │ │ ├── dianping_temporal12length512feature256byte.sh │ │ ├── dianping_temporal12length512feature256roman.sh │ │ ├── dianping_temporal12length512feature256romanword.sh │ │ ├── dianping_temporal12length512feature256word.sh │ │ ├── dianping_temporal8length486feature256.sh │ │ ├── dianping_temporal8length486feature256byte.sh │ │ ├── dianping_temporal8length486feature256roman.sh │ │ ├── dianping_temporal8length486feature256romanword.sh │ │ ├── dianping_temporal8length486feature256word.sh │ │ ├── ifeng_temporal12length512feature256.sh │ │ ├── ifeng_temporal12length512feature256byte.sh │ │ ├── ifeng_temporal12length512feature256roman.sh │ │ ├── ifeng_temporal12length512feature256romanword.sh │ │ ├── ifeng_temporal12length512feature256word.sh │ │ ├── ifeng_temporal8length486feature256.sh │ │ ├── ifeng_temporal8length486feature256byte.sh │ │ ├── ifeng_temporal8length486feature256roman.sh │ │ ├── ifeng_temporal8length486feature256romanword.sh │ │ ├── ifeng_temporal8length486feature256word.sh │ │ ├── jdbinary_temporal12length512feature256.sh │ │ ├── jdbinary_temporal12length512feature256byte.sh │ │ ├── jdbinary_temporal12length512feature256roman.sh │ │ ├── jdbinary_temporal12length512feature256romanword.sh │ │ ├── jdbinary_temporal12length512feature256word.sh │ │ ├── jdbinary_temporal8length486feature256.sh │ │ ├── jdbinary_temporal8length486feature256byte.sh │ │ ├── jdbinary_temporal8length486feature256roman.sh │ │ ├── jdbinary_temporal8length486feature256romanword.sh │ │ ├── jdbinary_temporal8length486feature256word.sh │ │ ├── jdfull_temporal12length512feature256.sh │ │ ├── jdfull_temporal12length512feature256byte.sh │ │ ├── jdfull_temporal12length512feature256roman.sh │ │ ├── jdfull_temporal12length512feature256romanword.sh │ │ ├── jdfull_temporal12length512feature256word.sh │ │ ├── jdfull_temporal8length486feature256.sh │ │ ├── jdfull_temporal8length486feature256byte.sh │ │ ├── jdfull_temporal8length486feature256roman.sh │ │ ├── jdfull_temporal8length486feature256romanword.sh │ │ ├── jdfull_temporal8length486feature256word.sh │ │ ├── jointbinary_temporal12length512feature256.sh │ │ ├── jointbinary_temporal12length512feature256byte.sh │ │ ├── jointbinary_temporal12length512feature256roman.sh │ │ ├── jointbinary_temporal12length512feature256romanword.sh │ │ ├── jointbinary_temporal12length512feature256word.sh │ │ ├── jointbinary_temporal8length486feature256.sh │ │ ├── jointbinary_temporal8length486feature256byte.sh │ │ ├── jointbinary_temporal8length486feature256roman.sh │ │ ├── jointbinary_temporal8length486feature256romanword.sh │ │ ├── jointbinary_temporal8length486feature256word.sh │ │ ├── jointfull_temporal12length512feature256.sh │ │ ├── jointfull_temporal12length512feature256byte.sh │ │ ├── jointfull_temporal12length512feature256roman.sh │ │ ├── jointfull_temporal12length512feature256romanword.sh │ │ ├── jointfull_temporal12length512feature256word.sh │ │ ├── jointfull_temporal8length486feature256.sh │ │ ├── jointfull_temporal8length486feature256byte.sh │ │ ├── jointfull_temporal8length486feature256roman.sh │ │ ├── jointfull_temporal8length486feature256romanword.sh │ │ ├── jointfull_temporal8length486feature256word.sh │ │ ├── nytimes_temporal12length512feature256.sh │ │ ├── nytimes_temporal12length512feature256word.sh │ │ ├── nytimes_temporal8length486feature256.sh │ │ ├── nytimes_temporal8length486feature256word.sh │ │ ├── rakutenbinary_temporal12length512feature256.sh │ │ ├── rakutenbinary_temporal12length512feature256byte.sh │ │ ├── rakutenbinary_temporal12length512feature256roman.sh │ │ ├── rakutenbinary_temporal12length512feature256romanword.sh │ │ ├── rakutenbinary_temporal12length512feature256word.sh │ │ ├── rakutenbinary_temporal8length486feature256.sh │ │ ├── rakutenbinary_temporal8length486feature256byte.sh │ │ ├── rakutenbinary_temporal8length486feature256roman.sh │ │ ├── rakutenbinary_temporal8length486feature256romanword.sh │ │ ├── rakutenbinary_temporal8length486feature256word.sh │ │ ├── rakutenfull_temporal12length512feature256.sh │ │ ├── rakutenfull_temporal12length512feature256byte.sh │ │ ├── rakutenfull_temporal12length512feature256roman.sh │ │ ├── rakutenfull_temporal12length512feature256romanword.sh │ │ ├── rakutenfull_temporal12length512feature256word.sh │ │ ├── rakutenfull_temporal8length486feature256.sh │ │ ├── rakutenfull_temporal8length486feature256byte.sh │ │ ├── rakutenfull_temporal8length486feature256roman.sh │ │ ├── rakutenfull_temporal8length486feature256romanword.sh │ │ └── rakutenfull_temporal8length486feature256word.sh │ ├── config.lua │ ├── data.lua │ ├── driver.lua │ ├── model.lua │ ├── unittest/ │ │ ├── data.lua │ │ ├── driver.lua │ │ ├── model.lua │ │ ├── model_cudnn.lua │ │ ├── model_cunn.lua │ │ ├── test.lua │ │ ├── test_cuda.lua │ │ ├── train.lua │ │ └── train_cuda.lua │ └── visualizer.lua ├── fasttext/ │ └── archive/ │ ├── 11stbinary_charbigram.sh │ ├── 11stbinary_charbigram_evaluation.sh │ ├── 11stbinary_charbigram_tuned.sh │ ├── 11stbinary_charpentagram.sh │ ├── 11stbinary_charpentagram_evaluation.sh │ ├── 11stbinary_charpentagram_tuned.sh │ ├── 11stbinary_charunigram.sh │ ├── 11stbinary_charunigram_evaluation.sh │ ├── 11stbinary_charunigram_tuned.sh │ ├── 11stbinary_wordbigram.sh │ ├── 11stbinary_wordbigram_evaluation.sh │ ├── 11stbinary_wordbigram_tuned.sh │ ├── 11stbinary_wordbigramroman.sh │ ├── 11stbinary_wordbigramroman_evaluation.sh │ ├── 11stbinary_wordbigramroman_tuned.sh │ ├── 11stbinary_wordpentagram.sh │ ├── 11stbinary_wordpentagram_evaluation.sh │ ├── 11stbinary_wordpentagram_tuned.sh │ ├── 11stbinary_wordpentagramroman.sh │ ├── 11stbinary_wordpentagramroman_evaluation.sh │ ├── 11stbinary_wordpentagramroman_tuned.sh │ ├── 11stbinary_wordunigram.sh │ ├── 11stbinary_wordunigram_evaluation.sh │ ├── 11stbinary_wordunigram_tuned.sh │ ├── 11stbinary_wordunigramroman.sh │ ├── 11stbinary_wordunigramroman_evaluation.sh │ ├── 11stbinary_wordunigramroman_tuned.sh │ ├── 11stfull_charbigram.sh │ ├── 11stfull_charbigram_evaluation.sh │ ├── 11stfull_charbigram_tuned.sh │ ├── 11stfull_charpentagram.sh │ ├── 11stfull_charpentagram_evaluation.sh │ ├── 11stfull_charpentagram_tuned.sh │ ├── 11stfull_charunigram.sh │ ├── 11stfull_charunigram_evaluation.sh │ ├── 11stfull_charunigram_tuned.sh │ ├── 11stfull_wordbigram.sh │ ├── 11stfull_wordbigram_evaluation.sh │ ├── 11stfull_wordbigram_tuned.sh │ ├── 11stfull_wordbigramroman.sh │ ├── 11stfull_wordbigramroman_evaluation.sh │ ├── 11stfull_wordbigramroman_tuned.sh │ ├── 11stfull_wordpentagram.sh │ ├── 11stfull_wordpentagram_evaluation.sh │ ├── 11stfull_wordpentagram_tuned.sh │ ├── 11stfull_wordpentagramroman.sh │ ├── 11stfull_wordpentagramroman_evaluation.sh │ ├── 11stfull_wordpentagramroman_tuned.sh │ ├── 11stfull_wordunigram.sh │ ├── 11stfull_wordunigram_evaluation.sh │ ├── 11stfull_wordunigram_tuned.sh │ ├── 11stfull_wordunigramroman.sh │ ├── 11stfull_wordunigramroman_evaluation.sh │ ├── 11stfull_wordunigramroman_tuned.sh │ ├── amazonbinary_charbigram.sh │ ├── amazonbinary_charbigram_evaluation.sh │ ├── amazonbinary_charbigram_tuned.sh │ ├── amazonbinary_charpentagram.sh │ ├── amazonbinary_charpentagram_evaluation.sh │ ├── amazonbinary_charpentagram_tuned.sh │ ├── amazonbinary_charunigram.sh │ ├── amazonbinary_charunigram_evaluation.sh │ ├── amazonbinary_charunigram_tuned.sh │ ├── amazonbinary_wordbigram.sh │ ├── amazonbinary_wordbigram_evaluation.sh │ ├── amazonbinary_wordbigram_tuned.sh │ ├── amazonbinary_wordpentagram.sh │ ├── amazonbinary_wordpentagram_evaluation.sh │ ├── amazonbinary_wordpentagram_tuned.sh │ ├── amazonbinary_wordunigram.sh │ ├── amazonbinary_wordunigram_evaluation.sh │ ├── amazonbinary_wordunigram_tuned.sh │ ├── amazonfull_charbigram.sh │ ├── amazonfull_charbigram_evaluation.sh │ ├── amazonfull_charbigram_tuned.sh │ ├── amazonfull_charpentagram.sh │ ├── amazonfull_charpentagram_evaluation.sh │ ├── amazonfull_charpentagram_tuned.sh │ ├── amazonfull_charunigram.sh │ ├── amazonfull_charunigram_evaluation.sh │ ├── amazonfull_charunigram_tuned.sh │ ├── amazonfull_wordbigram.sh │ ├── amazonfull_wordbigram_evaluation.sh │ ├── amazonfull_wordbigram_tuned.sh │ ├── amazonfull_wordpentagram.sh │ ├── amazonfull_wordpentagram_evaluation.sh │ ├── amazonfull_wordpentagram_tuned.sh │ ├── amazonfull_wordunigram.sh │ ├── amazonfull_wordunigram_evaluation.sh │ ├── amazonfull_wordunigram_tuned.sh │ ├── chinanews_charbigram.sh │ ├── chinanews_charbigram_evaluation.sh │ ├── chinanews_charbigram_tuned.sh │ ├── chinanews_charpentagram.sh │ ├── chinanews_charpentagram_evaluation.sh │ ├── chinanews_charpentagram_tuned.sh │ ├── chinanews_charunigram.sh │ ├── chinanews_charunigram_evaluation.sh │ ├── chinanews_charunigram_tuned.sh │ ├── chinanews_wordbigram.sh │ ├── chinanews_wordbigram_evaluation.sh │ ├── chinanews_wordbigram_tuned.sh │ ├── chinanews_wordbigramroman.sh │ ├── chinanews_wordbigramroman_evaluation.sh │ ├── chinanews_wordbigramroman_tuned.sh │ ├── chinanews_wordpentagram.sh │ ├── chinanews_wordpentagram_evaluation.sh │ ├── chinanews_wordpentagram_tuned.sh │ ├── chinanews_wordpentagramroman.sh │ ├── chinanews_wordpentagramroman_evaluation.sh │ ├── chinanews_wordpentagramroman_tuned.sh │ ├── chinanews_wordunigram.sh │ ├── chinanews_wordunigram_evaluation.sh │ ├── chinanews_wordunigram_tuned.sh │ ├── chinanews_wordunigramroman.sh │ ├── chinanews_wordunigramroman_evaluation.sh │ ├── chinanews_wordunigramroman_tuned.sh │ ├── dianping_charbigram.sh │ ├── dianping_charbigram_evaluation.sh │ ├── dianping_charbigram_tuned.sh │ ├── dianping_charpentagram.sh │ ├── dianping_charpentagram_evaluation.sh │ ├── dianping_charpentagram_tuned.sh │ ├── dianping_charunigram.sh │ ├── dianping_charunigram_evaluation.sh │ ├── dianping_charunigram_tuned.sh │ ├── dianping_wordbigram.sh │ ├── dianping_wordbigram_evaluation.sh │ ├── dianping_wordbigram_tuned.sh │ ├── dianping_wordbigramroman.sh │ ├── dianping_wordbigramroman_evaluation.sh │ ├── dianping_wordbigramroman_tuned.sh │ ├── dianping_wordpentagram.sh │ ├── dianping_wordpentagram_evaluation.sh │ ├── dianping_wordpentagram_tuned.sh │ ├── dianping_wordpentagramroman.sh │ ├── dianping_wordpentagramroman_evaluation.sh │ ├── dianping_wordpentagramroman_tuned.sh │ ├── dianping_wordunigram.sh │ ├── dianping_wordunigram_evaluation.sh │ ├── dianping_wordunigram_tuned.sh │ ├── dianping_wordunigramroman.sh │ ├── dianping_wordunigramroman_evaluation.sh │ ├── dianping_wordunigramroman_tuned.sh │ ├── ifeng_charbigram.sh │ ├── ifeng_charbigram_evaluation.sh │ ├── ifeng_charbigram_tuned.sh │ ├── ifeng_charpentagram.sh │ ├── ifeng_charpentagram_evaluation.sh │ ├── ifeng_charpentagram_tuned.sh │ ├── ifeng_charunigram.sh │ ├── ifeng_charunigram_evaluation.sh │ ├── ifeng_charunigram_tuned.sh │ ├── ifeng_wordbigram.sh │ ├── ifeng_wordbigram_evaluation.sh │ ├── ifeng_wordbigram_tuned.sh │ ├── ifeng_wordbigramroman.sh │ ├── ifeng_wordbigramroman_evaluation.sh │ ├── ifeng_wordbigramroman_tuned.sh │ ├── ifeng_wordpentagram.sh │ ├── ifeng_wordpentagram_evaluation.sh │ ├── ifeng_wordpentagram_tuned.sh │ ├── ifeng_wordpentagramroman.sh │ ├── ifeng_wordpentagramroman_evaluation.sh │ ├── ifeng_wordpentagramroman_tuned.sh │ ├── ifeng_wordunigram.sh │ ├── ifeng_wordunigram_evaluation.sh │ ├── ifeng_wordunigram_tuned.sh │ ├── ifeng_wordunigramroman.sh │ ├── ifeng_wordunigramroman_evaluation.sh │ ├── ifeng_wordunigramroman_tuned.sh │ ├── jdbinary_charbigram.sh │ ├── jdbinary_charbigram_evaluation.sh │ ├── jdbinary_charbigram_tuned.sh │ ├── jdbinary_charpentagram.sh │ ├── jdbinary_charpentagram_evaluation.sh │ ├── jdbinary_charpentagram_tuned.sh │ ├── jdbinary_charunigram.sh │ ├── jdbinary_charunigram_evaluation.sh │ ├── jdbinary_charunigram_tuned.sh │ ├── jdbinary_wordbigram.sh │ ├── jdbinary_wordbigram_evaluation.sh │ ├── jdbinary_wordbigram_tuned.sh │ ├── jdbinary_wordbigramroman.sh │ ├── jdbinary_wordbigramroman_evaluation.sh │ ├── jdbinary_wordbigramroman_tuned.sh │ ├── jdbinary_wordpentagram.sh │ ├── jdbinary_wordpentagram_evaluation.sh │ ├── jdbinary_wordpentagram_tuned.sh │ ├── jdbinary_wordpentagramroman.sh │ ├── jdbinary_wordpentagramroman_evaluation.sh │ ├── jdbinary_wordpentagramroman_tuned.sh │ ├── jdbinary_wordunigram.sh │ ├── jdbinary_wordunigram_evaluation.sh │ ├── jdbinary_wordunigram_tuned.sh │ ├── jdbinary_wordunigramroman.sh │ ├── jdbinary_wordunigramroman_evaluation.sh │ ├── jdbinary_wordunigramroman_tuned.sh │ ├── jdfull_charbigram.sh │ ├── jdfull_charbigram_evaluation.sh │ ├── jdfull_charbigram_tuned.sh │ ├── jdfull_charpentagram.sh │ ├── jdfull_charpentagram_evaluation.sh │ ├── jdfull_charpentagram_tuned.sh │ ├── jdfull_charunigram.sh │ ├── jdfull_charunigram_evaluation.sh │ ├── jdfull_charunigram_tuned.sh │ ├── jdfull_wordbigram.sh │ ├── jdfull_wordbigram_evaluation.sh │ ├── jdfull_wordbigram_tuned.sh │ ├── jdfull_wordbigramroman.sh │ ├── jdfull_wordbigramroman_evaluation.sh │ ├── jdfull_wordbigramroman_tuned.sh │ ├── jdfull_wordpentagram.sh │ ├── jdfull_wordpentagram_evaluation.sh │ ├── jdfull_wordpentagram_tuned.sh │ ├── jdfull_wordpentagramroman.sh │ ├── jdfull_wordpentagramroman_evaluation.sh │ ├── jdfull_wordpentagramroman_tuned.sh │ ├── jdfull_wordunigram.sh │ ├── jdfull_wordunigram_evaluation.sh │ ├── jdfull_wordunigram_tuned.sh │ ├── jdfull_wordunigramroman.sh │ ├── jdfull_wordunigramroman_evaluation.sh │ ├── jdfull_wordunigramroman_tuned.sh │ ├── jointbinary_charbigram.sh │ ├── jointbinary_charbigram_evaluation.sh │ ├── jointbinary_charbigram_tuned.sh │ ├── jointbinary_charpentagram.sh │ ├── jointbinary_charpentagram_evaluation.sh │ ├── jointbinary_charpentagram_tuned.sh │ ├── jointbinary_charunigram.sh │ ├── jointbinary_charunigram_evaluation.sh │ ├── jointbinary_charunigram_tuned.sh │ ├── jointbinary_wordbigram.sh │ ├── jointbinary_wordbigram_evaluation.sh │ ├── jointbinary_wordbigram_tuned.sh │ ├── jointbinary_wordbigramroman.sh │ ├── jointbinary_wordbigramroman_evaluation.sh │ ├── jointbinary_wordbigramroman_tuned.sh │ ├── jointbinary_wordpentagram.sh │ ├── jointbinary_wordpentagram_evaluation.sh │ ├── jointbinary_wordpentagram_tuned.sh │ ├── jointbinary_wordpentagramroman.sh │ ├── jointbinary_wordpentagramroman_evaluation.sh │ ├── jointbinary_wordpentagramroman_tuned.sh │ ├── jointbinary_wordunigram.sh │ ├── jointbinary_wordunigram_evaluation.sh │ ├── jointbinary_wordunigram_tuned.sh │ ├── jointbinary_wordunigramroman.sh │ ├── jointbinary_wordunigramroman_evaluation.sh │ ├── jointbinary_wordunigramroman_tuned.sh │ ├── jointfull_charbigram.sh │ ├── jointfull_charbigram_evaluation.sh │ ├── jointfull_charbigram_tuned.sh │ ├── jointfull_charpentagram.sh │ ├── jointfull_charpentagram_evaluation.sh │ ├── jointfull_charpentagram_tuned.sh │ ├── jointfull_charunigram.sh │ ├── jointfull_charunigram_evaluation.sh │ ├── jointfull_charunigram_tuned.sh │ ├── jointfull_wordbigram.sh │ ├── jointfull_wordbigram_evaluation.sh │ ├── jointfull_wordbigram_tuned.sh │ ├── jointfull_wordbigramroman.sh │ ├── jointfull_wordbigramroman_evaluation.sh │ ├── jointfull_wordbigramroman_tuned.sh │ ├── jointfull_wordpentagram.sh │ ├── jointfull_wordpentagram_evaluation.sh │ ├── jointfull_wordpentagram_tuned.sh │ ├── jointfull_wordpentagramroman.sh │ ├── jointfull_wordpentagramroman_evaluation.sh │ ├── jointfull_wordpentagramroman_tuned.sh │ ├── jointfull_wordunigram.sh │ ├── jointfull_wordunigram_evaluation.sh │ ├── jointfull_wordunigram_tuned.sh │ ├── jointfull_wordunigramroman.sh │ ├── jointfull_wordunigramroman_evaluation.sh │ ├── jointfull_wordunigramroman_tuned.sh │ ├── nytimes_charbigram.sh │ ├── nytimes_charbigram_evaluation.sh │ ├── nytimes_charbigram_tuned.sh │ ├── nytimes_charpentagram.sh │ ├── nytimes_charpentagram_evaluation.sh │ ├── nytimes_charpentagram_tuned.sh │ ├── nytimes_charunigram.sh │ ├── nytimes_charunigram_evaluation.sh │ ├── nytimes_charunigram_tuned.sh │ ├── nytimes_wordbigram.sh │ ├── nytimes_wordbigram_evaluation.sh │ ├── nytimes_wordbigram_tuned.sh │ ├── nytimes_wordpentagram.sh │ ├── nytimes_wordpentagram_evaluation.sh │ ├── nytimes_wordpentagram_tuned.sh │ ├── nytimes_wordunigram.sh │ ├── nytimes_wordunigram_evaluation.sh │ ├── nytimes_wordunigram_tuned.sh │ ├── rakutenbinary_charbigram.sh │ ├── rakutenbinary_charbigram_evaluation.sh │ ├── rakutenbinary_charbigram_tuned.sh │ ├── rakutenbinary_charpentagram.sh │ ├── rakutenbinary_charpentagram_evaluation.sh │ ├── rakutenbinary_charpentagram_tuned.sh │ ├── rakutenbinary_charunigram.sh │ ├── rakutenbinary_charunigram_evaluation.sh │ ├── rakutenbinary_charunigram_tuned.sh │ ├── rakutenbinary_wordbigram.sh │ ├── rakutenbinary_wordbigram_evaluation.sh │ ├── rakutenbinary_wordbigram_tuned.sh │ ├── rakutenbinary_wordbigramroman.sh │ ├── rakutenbinary_wordbigramroman_evaluation.sh │ ├── rakutenbinary_wordbigramroman_tuned.sh │ ├── rakutenbinary_wordpentagram.sh │ ├── rakutenbinary_wordpentagram_evaluation.sh │ ├── rakutenbinary_wordpentagram_tuned.sh │ ├── rakutenbinary_wordpentagramroman.sh │ ├── rakutenbinary_wordpentagramroman_evaluation.sh │ ├── rakutenbinary_wordpentagramroman_tuned.sh │ ├── rakutenbinary_wordunigram.sh │ ├── rakutenbinary_wordunigram_evaluation.sh │ ├── rakutenbinary_wordunigram_tuned.sh │ ├── rakutenbinary_wordunigramroman.sh │ ├── rakutenbinary_wordunigramroman_evaluation.sh │ ├── rakutenbinary_wordunigramroman_tuned.sh │ ├── rakutenfull_charbigram.sh │ ├── rakutenfull_charbigram_evaluation.sh │ ├── rakutenfull_charbigram_tuned.sh │ ├── rakutenfull_charpentagram.sh │ ├── rakutenfull_charpentagram_evaluation.sh │ ├── rakutenfull_charpentagram_tuned.sh │ ├── rakutenfull_charunigram.sh │ ├── rakutenfull_charunigram_evaluation.sh │ ├── rakutenfull_charunigram_tuned.sh │ ├── rakutenfull_wordbigram.sh │ ├── rakutenfull_wordbigram_evaluation.sh │ ├── rakutenfull_wordbigram_tuned.sh │ ├── rakutenfull_wordbigramroman.sh │ ├── rakutenfull_wordbigramroman_evaluation.sh │ ├── rakutenfull_wordbigramroman_tuned.sh │ ├── rakutenfull_wordpentagram.sh │ ├── rakutenfull_wordpentagram_evaluation.sh │ ├── rakutenfull_wordpentagram_tuned.sh │ ├── rakutenfull_wordpentagramroman.sh │ ├── rakutenfull_wordpentagramroman_evaluation.sh │ ├── rakutenfull_wordpentagramroman_tuned.sh │ ├── rakutenfull_wordunigram.sh │ ├── rakutenfull_wordunigram_evaluation.sh │ ├── rakutenfull_wordunigram_tuned.sh │ ├── rakutenfull_wordunigramroman.sh │ ├── rakutenfull_wordunigramroman_evaluation.sh │ └── rakutenfull_wordunigramroman_tuned.sh ├── glyphnet/ │ ├── archive/ │ │ ├── 11stbinary_spatial6temporal8length486feature256.sh │ │ ├── 11stbinary_spatial8temporal12length512feature256.sh │ │ ├── 11stfull_spatial6temporal8length486feature256.sh │ │ ├── 11stfull_spatial8temporal12length512feature256.sh │ │ ├── amazonbinary_spatial6temporal8length486feature256.sh │ │ ├── amazonbinary_spatial8temporal12length512feature256.sh │ │ ├── amazonfull_spatial6temporal8length486feature256.sh │ │ ├── amazonfull_spatial8temporal12length512feature256.sh │ │ ├── chinanews_spatial6temporal8length486feature256.sh │ │ ├── chinanews_spatial8temporal12length512feature256.sh │ │ ├── dianping_spatial6temporal8length486feature256.sh │ │ ├── dianping_spatial8temporal12length512feature256.sh │ │ ├── ifeng_spatial6temporal8length486feature256.sh │ │ ├── ifeng_spatial8temporal12length512feature256.sh │ │ ├── jdbinary_spatial6temporal8length486feature256.sh │ │ ├── jdbinary_spatial8temporal12length512feature256.sh │ │ ├── jdfull_spatial6temporal8length486feature256.sh │ │ ├── jdfull_spatial8temporal12length512feature256.sh │ │ ├── jointbinary_spatial6temporal8length486feature256.sh │ │ ├── jointbinary_spatial8temporal12length512feature256.sh │ │ ├── jointfull_spatial6temporal8length486feature256.sh │ │ ├── jointfull_spatial8temporal12length512feature256.sh │ │ ├── nytimes_spatial6temporal8length486feature256.sh │ │ ├── nytimes_spatial8temporal12length512feature256.sh │ │ ├── rakutenbinary_spatial6temporal8length486feature256.sh │ │ ├── rakutenbinary_spatial8temporal12length512feature256.sh │ │ ├── rakutenfull_spatial6temporal8length486feature256.sh │ │ └── rakutenfull_spatial8temporal12length512feature256.sh │ ├── config.lua │ ├── data.lua │ ├── driver.lua │ ├── main.lua │ ├── model.lua │ ├── modules/ │ │ ├── TemporalConvolutionCudnn.lua │ │ ├── TemporalConvolutionMM.lua │ │ ├── TemporalMaxPoolingCudnn.lua │ │ └── TemporalMaxPoolingMM.lua │ ├── modules.lua │ ├── scroll.lua │ ├── scroll.ui │ ├── test.lua │ ├── train.lua │ ├── unittest/ │ │ ├── data.lua │ │ ├── driver.lua │ │ ├── model.lua │ │ ├── model_cuda.lua │ │ ├── model_cudnn.lua │ │ ├── modules_temporal.lua │ │ ├── modules_temporal_cudnn.lua │ │ ├── test.lua │ │ ├── test_cuda.lua │ │ ├── train.lua │ │ └── train_cuda.lua │ └── visualizer.lua ├── linearnet/ │ ├── archive/ │ │ ├── 11stbinary_charbag.sh │ │ ├── 11stbinary_charbagtfidf.sh │ │ ├── 11stbinary_chargram.sh │ │ ├── 11stbinary_chargramtfidf.sh │ │ ├── 11stbinary_wordbag.sh │ │ ├── 11stbinary_wordbagroman.sh │ │ ├── 11stbinary_wordbagtfidf.sh │ │ ├── 11stbinary_wordbagtfidfroman.sh │ │ ├── 11stbinary_wordgram.sh │ │ ├── 11stbinary_wordgramroman.sh │ │ ├── 11stbinary_wordgramtfidf.sh │ │ ├── 11stbinary_wordgramtfidfroman.sh │ │ ├── 11stfull_charbag.sh │ │ ├── 11stfull_charbagtfidf.sh │ │ ├── 11stfull_chargram.sh │ │ ├── 11stfull_chargramtfidf.sh │ │ ├── 11stfull_wordbag.sh │ │ ├── 11stfull_wordbagroman.sh │ │ ├── 11stfull_wordbagtfidf.sh │ │ ├── 11stfull_wordbagtfidfroman.sh │ │ ├── 11stfull_wordgram.sh │ │ ├── 11stfull_wordgramroman.sh │ │ ├── 11stfull_wordgramtfidf.sh │ │ ├── 11stfull_wordgramtfidfroman.sh │ │ ├── amazonbinary_charbag.sh │ │ ├── amazonbinary_charbagtfidf.sh │ │ ├── amazonbinary_chargram.sh │ │ ├── amazonbinary_chargramtfidf.sh │ │ ├── amazonbinary_wordbag.sh │ │ ├── amazonbinary_wordbagtfidf.sh │ │ ├── amazonbinary_wordgram.sh │ │ ├── amazonbinary_wordgramtfidf.sh │ │ ├── amazonfull_charbag.sh │ │ ├── amazonfull_charbagtfidf.sh │ │ ├── amazonfull_chargram.sh │ │ ├── amazonfull_chargramtfidf.sh │ │ ├── amazonfull_wordbag.sh │ │ ├── amazonfull_wordbagtfidf.sh │ │ ├── amazonfull_wordgram.sh │ │ ├── amazonfull_wordgramtfidf.sh │ │ ├── chinanews_charbag.sh │ │ ├── chinanews_charbagtfidf.sh │ │ ├── chinanews_chargram.sh │ │ ├── chinanews_chargramtfidf.sh │ │ ├── chinanews_wordbag.sh │ │ ├── chinanews_wordbagroman.sh │ │ ├── chinanews_wordbagtfidf.sh │ │ ├── chinanews_wordbagtfidfroman.sh │ │ ├── chinanews_wordgram.sh │ │ ├── chinanews_wordgramroman.sh │ │ ├── chinanews_wordgramtfidf.sh │ │ ├── chinanews_wordgramtfidfroman.sh │ │ ├── dianping_charbag.sh │ │ ├── dianping_charbagtfidf.sh │ │ ├── dianping_chargram.sh │ │ ├── dianping_chargramtfidf.sh │ │ ├── dianping_wordbag.sh │ │ ├── dianping_wordbagroman.sh │ │ ├── dianping_wordbagtfidf.sh │ │ ├── dianping_wordbagtfidfroman.sh │ │ ├── dianping_wordgram.sh │ │ ├── dianping_wordgramroman.sh │ │ ├── dianping_wordgramtfidf.sh │ │ ├── dianping_wordgramtfidfroman.sh │ │ ├── ifeng_charbag.sh │ │ ├── ifeng_charbagtfidf.sh │ │ ├── ifeng_chargram.sh │ │ ├── ifeng_chargramtfidf.sh │ │ ├── ifeng_wordbag.sh │ │ ├── ifeng_wordbagroman.sh │ │ ├── ifeng_wordbagtfidf.sh │ │ ├── ifeng_wordbagtfidfroman.sh │ │ ├── ifeng_wordgram.sh │ │ ├── ifeng_wordgramroman.sh │ │ ├── ifeng_wordgramtfidf.sh │ │ ├── ifeng_wordgramtfidfroman.sh │ │ ├── jdbinary_charbag.sh │ │ ├── jdbinary_charbagtfidf.sh │ │ ├── jdbinary_chargram.sh │ │ ├── jdbinary_chargramtfidf.sh │ │ ├── jdbinary_wordbag.sh │ │ ├── jdbinary_wordbagroman.sh │ │ ├── jdbinary_wordbagtfidf.sh │ │ ├── jdbinary_wordbagtfidfroman.sh │ │ ├── jdbinary_wordgram.sh │ │ ├── jdbinary_wordgramroman.sh │ │ ├── jdbinary_wordgramtfidf.sh │ │ ├── jdbinary_wordgramtfidfroman.sh │ │ ├── jdfull_charbag.sh │ │ ├── jdfull_charbagtfidf.sh │ │ ├── jdfull_chargram.sh │ │ ├── jdfull_chargramtfidf.sh │ │ ├── jdfull_wordbag.sh │ │ ├── jdfull_wordbagroman.sh │ │ ├── jdfull_wordbagtfidf.sh │ │ ├── jdfull_wordbagtfidfroman.sh │ │ ├── jdfull_wordgram.sh │ │ ├── jdfull_wordgramroman.sh │ │ ├── jdfull_wordgramtfidf.sh │ │ ├── jdfull_wordgramtfidfroman.sh │ │ ├── jointbinary_charbag.sh │ │ ├── jointbinary_charbagtfidf.sh │ │ ├── jointbinary_chargram.sh │ │ ├── jointbinary_chargramtfidf.sh │ │ ├── jointbinary_wordbag.sh │ │ ├── jointbinary_wordbagroman.sh │ │ ├── jointbinary_wordbagtfidf.sh │ │ ├── jointbinary_wordbagtfidfroman.sh │ │ ├── jointbinary_wordgram.sh │ │ ├── jointbinary_wordgramroman.sh │ │ ├── jointbinary_wordgramtfidf.sh │ │ ├── jointbinary_wordgramtfidfroman.sh │ │ ├── jointfull_charbag.sh │ │ ├── jointfull_charbagtfidf.sh │ │ ├── jointfull_chargram.sh │ │ ├── jointfull_chargramtfidf.sh │ │ ├── jointfull_wordbag.sh │ │ ├── jointfull_wordbagroman.sh │ │ ├── jointfull_wordbagtfidf.sh │ │ ├── jointfull_wordbagtfidfroman.sh │ │ ├── jointfull_wordgram.sh │ │ ├── jointfull_wordgramroman.sh │ │ ├── jointfull_wordgramtfidf.sh │ │ ├── jointfull_wordgramtfidfroman.sh │ │ ├── nytimes_charbag.sh │ │ ├── nytimes_charbagtfidf.sh │ │ ├── nytimes_chargram.sh │ │ ├── nytimes_chargramtfidf.sh │ │ ├── nytimes_wordbag.sh │ │ ├── nytimes_wordbagtfidf.sh │ │ ├── nytimes_wordgram.sh │ │ ├── nytimes_wordgramtfidf.sh │ │ ├── rakutenbinary_charbag.sh │ │ ├── rakutenbinary_charbagtfidf.sh │ │ ├── rakutenbinary_chargram.sh │ │ ├── rakutenbinary_chargramtfidf.sh │ │ ├── rakutenbinary_wordbag.sh │ │ ├── rakutenbinary_wordbagroman.sh │ │ ├── rakutenbinary_wordbagtfidf.sh │ │ ├── rakutenbinary_wordbagtfidfroman.sh │ │ ├── rakutenbinary_wordgram.sh │ │ ├── rakutenbinary_wordgramroman.sh │ │ ├── rakutenbinary_wordgramtfidf.sh │ │ ├── rakutenbinary_wordgramtfidfroman.sh │ │ ├── rakutenfull_charbag.sh │ │ ├── rakutenfull_charbagtfidf.sh │ │ ├── rakutenfull_chargram.sh │ │ ├── rakutenfull_chargramtfidf.sh │ │ ├── rakutenfull_wordbag.sh │ │ ├── rakutenfull_wordbagroman.sh │ │ ├── rakutenfull_wordbagtfidf.sh │ │ ├── rakutenfull_wordbagtfidfroman.sh │ │ ├── rakutenfull_wordgram.sh │ │ ├── rakutenfull_wordgramroman.sh │ │ ├── rakutenfull_wordgramtfidf.sh │ │ └── rakutenfull_wordgramtfidfroman.sh │ ├── config.lua │ ├── data.lua │ ├── driver.lua │ ├── model.lua │ ├── queue.lua │ ├── test.lua │ ├── train.lua │ └── unittest/ │ ├── data.lua │ ├── driver.lua │ ├── model.lua │ ├── test.lua │ └── train.lua ├── models/ │ ├── README.txt │ ├── embednet/ │ │ └── README.txt │ ├── fasttext/ │ │ └── README.txt │ ├── glyphnet/ │ │ └── README.txt │ ├── linearnet/ │ │ └── README.txt │ └── onehotnet/ │ └── README.txt ├── onehotnet/ │ ├── archive/ │ │ ├── 11stbinary_onehot4temporal12length2048feature256.sh │ │ ├── 11stbinary_onehot4temporal12length2048feature256roman.sh │ │ ├── 11stbinary_onehot4temporal8length1944feature256.sh │ │ ├── 11stbinary_onehot4temporal8length1944feature256roman.sh │ │ ├── 11stfull_onehot4temporal12length2048feature256.sh │ │ ├── 11stfull_onehot4temporal12length2048feature256roman.sh │ │ ├── 11stfull_onehot4temporal8length1944feature256.sh │ │ ├── 11stfull_onehot4temporal8length1944feature256roman.sh │ │ ├── amazonbinary_onehot4temporal12length2048feature256.sh │ │ ├── amazonbinary_onehot4temporal8length1944feature256.sh │ │ ├── amazonfull_onehot4temporal12length2048feature256.sh │ │ ├── amazonfull_onehot4temporal8length1944feature256.sh │ │ ├── chinanews_onehot4temporal12length2048feature256.sh │ │ ├── chinanews_onehot4temporal12length2048feature256roman.sh │ │ ├── chinanews_onehot4temporal8length1944feature256.sh │ │ ├── chinanews_onehot4temporal8length1944feature256roman.sh │ │ ├── dianping_onehot4temporal12length2048feature256.sh │ │ ├── dianping_onehot4temporal12length2048feature256roman.sh │ │ ├── dianping_onehot4temporal8length1944feature256.sh │ │ ├── dianping_onehot4temporal8length1944feature256roman.sh │ │ ├── ifeng_onehot4temporal12length2048feature256.sh │ │ ├── ifeng_onehot4temporal12length2048feature256roman.sh │ │ ├── ifeng_onehot4temporal8length1944feature256.sh │ │ ├── ifeng_onehot4temporal8length1944feature256roman.sh │ │ ├── jdbinary_onehot4temporal12length2048feature256.sh │ │ ├── jdbinary_onehot4temporal12length2048feature256roman.sh │ │ ├── jdbinary_onehot4temporal8length1944feature256.sh │ │ ├── jdbinary_onehot4temporal8length1944feature256roman.sh │ │ ├── jdfull_onehot4temporal12length2048feature256.sh │ │ ├── jdfull_onehot4temporal12length2048feature256roman.sh │ │ ├── jdfull_onehot4temporal8length1944feature256.sh │ │ ├── jdfull_onehot4temporal8length1944feature256roman.sh │ │ ├── jointbinary_onehot4temporal12length2048feature256.sh │ │ ├── jointbinary_onehot4temporal12length2048feature256roman.sh │ │ ├── jointbinary_onehot4temporal8length1944feature256.sh │ │ ├── jointbinary_onehot4temporal8length1944feature256roman.sh │ │ ├── jointfull_onehot4temporal12length2048feature256.sh │ │ ├── jointfull_onehot4temporal12length2048feature256roman.sh │ │ ├── jointfull_onehot4temporal8length1944feature256.sh │ │ ├── jointfull_onehot4temporal8length1944feature256roman.sh │ │ ├── nytimes_onehot4temporal12length2048feature256.sh │ │ ├── nytimes_onehot4temporal8length1944feature256.sh │ │ ├── rakutenbinary_onehot4temporal12length2048feature256.sh │ │ ├── rakutenbinary_onehot4temporal12length2048feature256roman.sh │ │ ├── rakutenbinary_onehot4temporal8length1944feature256.sh │ │ ├── rakutenbinary_onehot4temporal8length1944feature256roman.sh │ │ ├── rakutenfull_onehot4temporal12length2048feature256.sh │ │ ├── rakutenfull_onehot4temporal12length2048feature256roman.sh │ │ ├── rakutenfull_onehot4temporal8length1944feature256.sh │ │ └── rakutenfull_onehot4temporal8length1944feature256roman.sh │ ├── config.lua │ ├── data.lua │ ├── driver.lua │ ├── model.lua │ └── unittest/ │ ├── data.lua │ ├── driver.lua │ ├── model.lua │ ├── model_cuda.lua │ ├── model_cudnn.lua │ ├── test.lua │ ├── test_cuda.lua │ ├── train.lua │ └── train_cuda.lua └── unifont/ ├── createunifont.lua ├── unifont/ │ └── README.txt └── visualize.lua ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2017, Xiang Zhang All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # Glyph This repository is used to publish all the code used for the following article: [Xiang Zhang, Yann LeCun, Which Encoding is the Best for Text Classification in Chinese, English, Japanese and Korean?, arXiv 1708.02657](https://arxiv.org/abs/1708.02657) The code and datasets are completely released as of January 2018, including all the code for crawling, preprocessing and training on the datasets. However, the documentation may not be complete yet. That said, readers could refer to the `doc` directory for an example in reproducing all the results for the Dianping dataset, and extend that to other datasets in similar ways. ## Reproducibility Manifesto If anyone sees a number in our paper, there is a script one can execute to reproduce it. No responsibility should be imposed on the user to figure out any experimental parameter barried in the paper's content. ## Datasets The `data` directory contains the preprocessing scripts for all the datasets used in the paper. These datasets are released separately of their processing source code. See below for details. ### Summary The following table is a summary of the datasets. Most of them have millions of samples for training. | Dataset | Language | Classes | Train | Test | |----------------|--------------|---------|------------|-----------| | Dianping | Chinese | 2 | 2,000,000 | 500,000 | | JD full | Chinese | 5 | 3,000,000 | 250,000 | | JD binary | Chinese | 2 | 4,000,000 | 360,000 | | Rakuten full | Japanese | 5 | 4,000,000 | 500,000 | | Rakuten binary | Japanese | 2 | 3,400,000 | 400,000 | | 11st full | Korean | 5 | 750,000 | 100,000 | | 11st binary | Korean | 2 | 4,000,000 | 400,000 | | Amazon full | English | 5 | 3,000,000 | 650,000 | | Amazon binary | English | 2 | 3,600,000 | 400,000 | | Ifeng | Chinese | 5 | 800,000 | 50,000 | | Chinanews | Chinese | 7 | 1,400,000 | 112,000 | | NYTimes | English | 7 | 1,400,000 | 105,000 | | Joint full | Multilingual | 5 | 10,750,000 | 1,500,000 | | Joint binary | Multilingual | 2 | 15,000,000 | 1,560,000 | ### Download Datasets are released separtely of the source code via links from Google Drive. *These datasets should only be used for the purpose of research*. | Dataset | Train | Test | |----------------|--------------------------------|-------------------------------| | Dianping | [Link](https://goo.gl/uKPxyo) | [Link](https://goo.gl/2QZpLx) | | JD full | [Link](https://goo.gl/u3vsak) | [Link](https://goo.gl/hLZRky) | | JD binary | [Link](https://goo.gl/ZPj1ip) | [Link](https://goo.gl/bqiEfP) | | Rakuten full | [Link](https://goo.gl/A7y14i) | [Link](https://goo.gl/ve4mup) | | Rakuten binary | [Link](https://goo.gl/3kYQ2f) | [Link](https://goo.gl/m8FpeH) | | 11st full | [Link](https://goo.gl/F1oPBX) | [Link](https://goo.gl/ZpTLND) | | 11st binary | [Link](https://goo.gl/8Qi7ao) | [Link](https://goo.gl/nbBhFq) | | Amazon full | [Link](https://goo.gl/UzQWaj) | [Link](https://goo.gl/EXkzWs) | | Amazon binary | [Link](https://goo.gl/u7AxWS) | [Link](https://goo.gl/2fft8x) | | Ifeng | [Link](https://goo.gl/AtKsq4) | [Link](https://goo.gl/tLWojy) | | Chinanews | [Link](https://goo.gl/1p4kdx) | [Link](https://goo.gl/rxvhCJ) | | NYTimes | [Link](https://goo.gl/2hZeqd) | [Link](https://goo.gl/66EDa5) | | Joint full | [Link](https://goo.gl/AJfzLC) | [Link](https://goo.gl/mibMsV) | | Joint binary | [Link](https://goo.gl/YLMqNe) | [Link](https://goo.gl/WRXQuJ) | ## GNU Unifont The `glyphnet` scripts require the GNU Unifont character images to run. The file `unifont-8.0.01.t7b.xz` can be downloaded via [this link](https://goo.gl/aFxYHq). ================================================ FILE: data/11st/construct_rr.py ================================================ #!/usr/bin/python3 ''' Convert Korean datasets to Revised Romanization of Korean (RR, MC2000) Copyright 2016 Xiang Zhang Usage: python3 construct_hepburn.py -i [input] -o [output] ''' # Input file INPUT = '../data/11st/sentiment/full_train.csv' # Output file OUTPUT = '../data/11st/sentiment/full_train_rr.csv' import argparse import csv import hanja import unidecode # Hangul romanization libraries from hangul_romanize import Transliter from hangul_romanize.rule import academic # Main program def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output transliter = Transliter(academic) convertRoman(transliter) def romanizeText(transliter, text): text = text.strip() if text != '': hangul_text = hanja.translate(text, 'substitution') return transliter.translit(hangul_text) return text # Convert the text in Chinese to pintin def convertRoman(transliter): # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): new_row.append(unidecode.unidecode(romanizeText( transliter, row[i])).strip().replace('\n','\\n')) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) if __name__ == '__main__': main() ================================================ FILE: data/11st/create_post.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of reviews Copyright 2016 Xiang Zhang Usage: python3 create_post.py -i [input file pattern] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/11st/post/*.json.xz' OUTPUT = '../data/11st/sentiment/post.csv' def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file pattern', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output createData() def createData(): # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files files = glob.glob(INPUT) n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: review = json.loads(line) star = review.get('star', '') title = review.get('title', '') content = review.get('content', '') if star != '': n = n + 1 writer.writerow([star, title.replace('\n', '\\n'), content.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/11st/create_review.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of reviews Copyright 2016 Xiang Zhang Usage: python3 create_review.py -i [input file pattern] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/11st/review/*.json.xz' OUTPUT = '../data/11st/sentiment/review.csv' def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file pattern', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output createData() def createData(): # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files files = glob.glob(INPUT) n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: review = json.loads(line) star = review.get('star', '') title = review.get('title', '') content = review.get('content', '') if star != '': n = n + 1 writer.writerow([star, title.replace('\n', '\\n'), content.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/11st/segment_rr_word.lua ================================================ --[[ Create romananized word data from romanized data in csv for Korean Copyright 2016 Xiang Zhang Usage: th segment_rr_word.lua [input] [output] [list] [read] --]] local ffi = require('ffi') local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/11st/sentiment/full_train_rr.csv' local output = arg[2] or '../data/11st/sentiment/full_train_rr_word.csv' local list = arg[3] or '../data/11st/sentiment/full_train_rr_word_list.csv' local read = (arg[4] == 'true') local word_index, word_total if read then print('Reading word index') word_index, word_total = joe.readWords(list) else print('Counting words') local word_count, word_freq = joe.splitWords(input) print('Sorting words by count') word_index, word_total = joe.sortWords(list, word_count, word_freq) end print('Constructing word index output') joe.constructWords(input, output, word_index, word_total) end function joe.readWords(list) local word_index = tds.Hash() local fd = io.open(list) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: '..n) io.flush() end local content = joe.parseCSVLine(line) content[1] = content[1]:gsub('\\n', '\n') word_index[content[1]] = n end print('\rProcessed lines: '..n) fd:close() return word_index, n end function joe.splitWords(input) local word_count, word_freq = tds.Hash(), tds.Hash() local fd = io.open(input) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) field_set = {} for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") -- All punctuation characters except for hyphen "-" content[i] = content[i]:gsub( '([!"#$%%&\'()*+,./:;<=>?@%[\\%]^_`{|}~])', ' %1 ') for word in content[i]:gmatch('[%S]+') do word_count[word] = (word_count[word] or 0) + 1 if not field_set[word] then field_set[word] = true word_freq[word] = (word_freq[word] or 0) + 1 end end end end print('\rProcessed lines: '..n) fd:close() -- Normalizing word frequencies for key, value in pairs(word_freq) do word_freq[key] = value / n end return word_count, word_freq end function joe.sortWords(list, word_count, word_freq) -- Sort the list of words word_list = tds.Vec() for word, _ in pairs(word_count) do word_list[#word_list + 1] = word end word_list:sort(function (w, v) return word_count[w] > word_count[v] end) -- Create the word index word_index = tds.Hash() for index, word in ipairs(word_list) do word_index[word] = index end -- Write it to file fd = io.open(list, 'w') for index, word in ipairs(word_list) do fd:write('"', word:gsub("\n", "\\n"):gsub("\"", "\"\""), '","', word_count[word], '","', word_freq[word], '"\n') end return word_index, #word_list end function joe.constructWords(input, output, word_index, word_total) local ifd = io.open(input) local ofd = io.open(output, 'w') local n = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) ofd:write('"', content[1], '"') for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") -- All punctuation characters except for hyphen "-" content[i] = content[i]:gsub( '([!"#$%%&\'()*+,./:;<=>?@%[\\%]^_`{|}~])', ' %1 ') local first_write = true ofd:write(',"') for word in content[i]:gmatch('[%S]+') do local index = word_index[word] or word_total + 1 if first_write then first_write = false ofd:write(index) else ofd:write(' ', index) end end ofd:write('"') end ofd:write('\n') end print('\rProcessed lines: '..n) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/11st/segment_word.py ================================================ #!/usr/bin/python3 ''' Convert Korean datasets to Index of Words Copyright 2016 Xiang Zhang Usage: python3 construct_pinyin.py -i [input] -l [list] -o [output] [-r] ''' #Input file INPUT = '../data/11st/sentiment/full_train.csv' #Output file OUTPUT = '../data/11st/sentiment/full_train_word.csv' # List file LIST = '../data/11st/sentiment/full_train_word_list.csv' # Read already defined word list READ = False # Korean dictionary path for MeCab MECAB_DICT_PATH = '/home/xiang/.usr/lib/mecab/dic/mecab-ko-dic' import argparse import csv from konlpy.tag import Mecab # Main program def main(): global INPUT global OUTPUT global LIST parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) parser.add_argument('-l', '--list', help = 'Word list file', default = LIST) parser.add_argument( '-r', '--read', help = 'Read from list file', action = 'store_true') args = parser.parse_args() INPUT = args.input OUTPUT = args.output LIST = args.list READ = args.read if READ: print('Reading word index') word_index = readWords() else: print('Counting words') word_count, word_freq = segmentWords() print('Sorting words by count') word_index = sortWords(word_count, word_freq) print('Constructing word index output') convertWords(word_index) # Read from pre-existing word list def readWords(): # Open the files ifd = open(LIST, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_index = dict() n = 0 for row in reader: word = row[0].replace('\\n', '\n') word_index[word] = n + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) return word_index # Segment the text in Chinese def segmentWords(): mecab = Mecab(MECAB_DICT_PATH) # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_count = dict() word_freq = dict() n = 0 for row in reader: field_set = set() for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = mecab.morphs(field) for word in field_list: word_count[word] = word_count.get(word, 0) + 1 if word not in field_set: field_set.add(word) word_freq[word] = word_freq.get(word, 0) + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() # Normalizing word frequency for word in word_freq: word_freq[word] = float(word_freq[word]) / float(n) return word_count, word_freq # Sort words for a given count dictionary object def sortWords(word_count, word_freq): # Sort the words word_list = sorted( word_count, key = lambda word: word_count[word], reverse = True) # Open the files ofd = open(LIST, 'w', encoding = 'utf-8', newline = '') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over all the words word_index = dict() n = 0 for i in range(len(word_list)): word = word_list[i] row = [word.replace('\n', '\\n'), str(word_count[word]), str(word_freq[word])] writer.writerow(row) word_index[word] = i + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing word: {}'.format(n), end = '') print('\rProcessed words: {}'.format(n)) ofd.close() return word_index # Convert the text in Chinese to word list def convertWords(word_index): mecab = Mecab(MECAB_DICT_PATH) # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = mecab.morphs(field) new_row.append(' '.join(map( str, map(lambda word: word_index.get(word, len(word_index) + 1), field_list)))) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/README.md ================================================ # Datasets This directory contains the preprocessing scripts for all the datasets used in the paper. These datasets are released separately of their processing source code. ================================================ FILE: data/chinanews/construct_topic.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of news articles Copyright 2016 Xiang Zhang Usage: python3 construct_topic.py -i [input directory] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/chinanews/article' OUTPUT = '../data/chinanews/topic/news.csv' CATEGORY_FILE = '../data/chinanews/category/category.json' def main(): global INPUT global OUTPUT global CATEGORY_FILE parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file directory', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) parser.add_argument( '-c', '--category', help = 'Category file', default = CATEGORY_FILE) args = parser.parse_args() INPUT = args.input OUTPUT = args.output CATEGORY_FILE = args.category createData() def createData(): # Open the category file classes = dict() cfd = open(CATEGORY_FILE, encoding = 'utf-8') i = 1 for line in cfd: category = json.loads(line) classes[category['code']] = i i = i + 1 # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files for prefix in classes: files = glob.glob(INPUT + '/' + prefix + '_*.json.xz') index = classes[prefix] n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: news = json.loads(line) title = news.get('title', '') content = news.get('content', list()) abstract = '' if len(content) > 0: abstract = content[0] n = n + 1 writer.writerow([index, title.replace('\n', '\\n'), abstract.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/data/README.txt ================================================ This directory should contain training and testing datasets. ================================================ FILE: data/dianping/combine_gram_count.lua ================================================ --[[ Combine sorted gram counts Copyright 2016 Xiang Zhang Usage: th combine_gram_count.lua [input_prefix] [output] [samples] [chunks] Comment: This program also outputs lines with counts as the firt unquoted csv value, so that one can use GNU sort easily. --]] local io = require('io') local math = require('math') local string = require('string') -- A Logic Named Joe local joe = {} function joe.main() local input_prefix = arg[1] or '../data/dianping/train_chargram_count_sort/' local output = arg[2] or '../data/dianping/train_chargram_count_combine.csv' local samples = arg[3] and tonumber(arg[3]) or 2000000 local chunks = arg[4] and tonumber(arg[4]) or 100 print('Combine chunks') joe.combineChunks(input_prefix, output, samples, chunks) end function joe.combineChunks(input_prefix, output, samples, chunks) local n = 0 local ofd = io.open(output, 'w') local current = {} for i = 1, chunks do local ifd = io.open(input_prefix..i..'.csv') for line in ifd:lines() do n = n + 1 if math.fmod(n, 100000) == 0 then io.write('\rProcessing line ', n) io.flush() end local content = joe.parseCSVLine(line) if current[1] ~= content[1] then if current[1] ~= nil then ofd:write(current[3], ',"', current[1], '","', current[2]:gsub('"', '""'), '","', current[4] / samples, '","', current[3], '"\n') end current = content else current[3] = current[3] + content[3] current[4] = current[4] + content[4] end end ifd:close() end ofd:write(current[3], ',"', current[1], '","', current[2]:gsub('"', '""'), '","', current[4] / samples, '","', current[3], '"\n') ofd:close() print('\rProcessed lines: '..n) end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_charbag.lua ================================================ --[[ Construct unicode character bag-of-element format from unicode serialization Copyright 2016 Xiang Zhang Usage: th construct_charbag.lua [input] [output] [list] [read] [limit] [replace] --]] local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_code.t7b' local output = arg[2] or '../data/dianping/train_charbag.t7b' local list = arg[3] or '../data/dianping/train_charbag_list.csv' local read = (arg[4] == 'true') local limit = arg[5] and tonumber(arg[5]) or 200000 local replace = arg[6] and tonumber(arg[6]) or 200001 print('Loading data from '..input) local data = torch.load(input) print('Counting character') local count, freq = joe.countBag(data, limit, replace) print('Total number of values: '..count) if read == true then print('Reading frequency from '..list) freq = joe.readList(list) else print('Outputing frequency list to '..list) joe.writeList(freq, list) end print('Constructing character bag data') local bag = joe.constructBag(data, count, limit, replace) print('Saving to '..output) torch.save(output, bag) end function joe.writeList(freq, list) local fd = io.open(list, 'w') for i = 1, freq:size(1) do local char = (i <= 65536) and joe.utf8str(i - 1) or '' -- Do not print control characters if i < 11 or (i > 11 and i < 33) then char = '' end fd:write('"', i, '","', char:gsub('\n', '\\n'):gsub('"', '""'), '","', freq[i], '"\n') end end function joe.readList(list) local freq = {} local fd = io.open(list) for line in fd:lines() do local content = joe.parseCSVLine(line) content[2] = content[2]:gsub('\\n', '\n') freq[#freq + 1] = tonumber(content[3]) end return torch.Tensor(freq) end function joe.countBag(data, limit, replace) local code, code_value = data.code, data.code_value local count = 0 local freq = torch.zeros(math.max(limit, replace)) -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() end local index = {} -- Iterate through the fields for k = 1, code[i][j]:size(1) do for l = 1, code[i][j][k][2] do local char = code_value[code[i][j][k][1] + l - 1] if char > limit then char = replace end if not index[char] then count = count + 1 index[char] = 1 freq[char] = freq[char] + 1 else index[char] = index[char] + 1 end end end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end -- Normalizing the frequency local sum = 0 for i = 1, #code do sum = sum + code[i]:size(1) end freq:div(sum) return count, freq end function joe.constructBag(data, count, limit, replace) local code, code_value = data.code, data.code_value local bag = {} local bag_index = torch.LongTensor(count) local bag_value = torch.DoubleTensor(count) local count = 0 -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) bag[i] = torch.LongTensor(code[i]:size(1), 2) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() end local index = {} local pointer = {} bag[i][j][1] = count + 1 -- Iterate through the fields for k = 1, code[i][j]:size(1) do for l = 1, code[i][j][k][2] do local char = code_value[code[i][j][k][1] + l - 1] if char > limit then char = replace end if not index[char] then count = count + 1 index[char] = 1 pointer[#pointer + 1] = char else index[char] = index[char] + 1 end end end table.sort(pointer) bag[i][j][2] = #pointer for m = 1, #pointer do bag_index[bag[i][j][1] + m - 1] = pointer[m] if pointer[m] > limit then bag_value[bag[i][j][1] + m - 1] = 0 else bag_value[bag[i][j][1] + m - 1] = index[pointer[m]] end end if #pointer > 0 and bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div( bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum()) end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return {bag = bag, bag_index = bag_index, bag_value = bag_value} end joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} function joe.utf8str(decimal) local bytemarkers = joe.bytemarkers if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_chargram.lua ================================================ --[[ Construct unicode character ngrams format from unicode serialization Copyright 2016 Xiang Zhang Usage: th construct_chargram.lua [input] [output] [list] [read] [gram] [limit] [replace] --]] local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_code.t7b' local output = arg[2] or '../data/dianping/train_chargram.t7b' local list = arg[3] or '../data/dianping/train_chargram_list.csv' local read = (arg[4] == nil) or(arg[4] == 'true') local gram = arg[5] and tonumber(arg[5]) or 5 local limit = arg[6] and tonumber(arg[6]) or 1000000 local replace = arg[7] and tonumber(arg[7]) or 1000001 print('Loading data from '..input) local data = torch.load(input) local freq, dict, ngrams if read == true then print('Reading frequency from '..list) freq, dict = joe.readList(list) else print('Constructing dictionary and frequency list') freq, dict, ngrams = joe.constructList(data, gram) print('Outputing frequency list to '..list) joe.writeList(freq, ngrams, list) end print('Counting character ngrams data') local count = joe.countBag(data, dict, gram, limit, replace) print('Total number of ngrams in data is '..count) print('Constructing character bag data') local bag = joe.constructBag(data, dict, count, gram, limit, replace) print('Saving to '..output) torch.save(output, bag) end function joe.constructList(data, gram) local count = tds.Hash() local docs = tds.Hash() local code, code_value = data.code, data.code_value -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() collectgarbage() end local index = {} -- Iterate through the fields for k = 1, code[i][j]:size(1) do -- Iterate through the grams for n = 1, gram do -- Iterate through the positions for l = 1, code[i][j][k][2] - n + 1 do local ngram = tostring(code_value[code[i][j][k][1] + l - 1]) for m = 2, n do ngram = ngram..' '..tostring( code_value[code[i][j][k][1] + l - 1 + m - 1]) end if not index[ngram] then docs[ngram] = (docs[ngram] or 0) + 1 index[ngram] = 0 end index[ngram] = index[ngram] + 1 count[ngram] = (count[ngram] or 0) + 1 end end end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end local ngrams = tds.Vec() for ngram, value in pairs(count) do ngrams[#ngrams + 1] = ngram end ngrams:sort(function(a, b) return count[a] > count[b] end) local sum = 0 for i = 1, #code do sum = sum + code[i]:size(1) end local dict = tds.Hash() local freq = torch.Tensor(#ngrams) for index, ngram in ipairs(ngrams) do dict[ngram] = index freq[index] = (docs[ngram] or 0) / sum end return freq, dict, ngrams end function joe.writeList(freq, ngrams, list) local fd = io.open(list, 'w') for i = 1, freq:size(1) do local ngram_string = '' for code in ngrams[i]:gmatch('[%S]+') do local code = tonumber(code) local char = (code <= 65536 and (code > 32 or code == 11)) and joe.utf8str(code - 1) or ' ' ngram_string = ngram_string..char end fd:write('"', ngrams[i], '","', ngram_string:gsub('\n', '\\n'):gsub('"', '""'), '","', freq[i], '"\n') end end function joe.readList(list) local freq_table = tds.Vec() local dict = tds.Hash() local fd = io.open(list) for line in fd:lines() do local content = joe.parseCSVLine(line) content[2] = content[2]:gsub('\\n', '\n') freq_table[#freq_table + 1] = tonumber(content[3]) dict[content[1]] = #freq_table end local freq = torch.Tensor(#freq_table) for i, v in ipairs(freq_table) do freq[i] = v end return freq, dict end function joe.countBag(data, dict, gram, limit, replace) local count = 0 local code, code_value = data.code, data.code_value -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() collectgarbage() end local index = {} -- Iterate through the fields for k = 1, code[i][j]:size(1) do -- Iterate through the grams for n = 1, gram do -- Iterate through the positions for l = 1, code[i][j][k][2] - n + 1 do local ngram = tostring(code_value[code[i][j][k][1] + l - 1]) for m = 2, n do ngram = ngram..' '..tostring( code_value[code[i][j][k][1] + l - 1 + m - 1]) end local ngram_index = dict[ngram] if ngram_index == nil or ngram_index > limit then ngram_index = replace end if not index[ngram_index] then index[ngram_index] = 0 count = count + 1 end index[ngram_index] = index[ngram_index] + 1 end end end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return count end function joe.constructBag(data, dict, count, gram, limit, replace) local code, code_value = data.code, data.code_value local bag = {} local bag_index = torch.LongTensor(count) local bag_value = torch.DoubleTensor(count) local count = 0 -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) bag[i] = torch.LongTensor(code[i]:size(1), 2) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() collectgarbage() end local index = {} local pointer = {} bag[i][j][1] = count + 1 -- Iterate through the fields for k = 1, code[i][j]:size(1) do -- Iterate through the grams for n = 1, gram do -- Iterate through the positions for l = 1, code[i][j][k][2] - n + 1 do local ngram = tostring(code_value[code[i][j][k][1] + l - 1]) for m = 2, n do ngram = ngram..' '..tostring( code_value[code[i][j][k][1] + l - 1 + m - 1]) end local ngram_index = dict[ngram] if ngram_index == nil or ngram_index > limit then ngram_index = replace end if not index[ngram_index] then count = count + 1 index[ngram_index] = 0 pointer[#pointer + 1] = ngram_index end index[ngram_index] = index[ngram_index] + 1 end end end table.sort(pointer) bag[i][j][2] = #pointer for m = 1, #pointer do bag_index[bag[i][j][1] + m - 1] = pointer[m] if pointer[m] > limit then bag_value[bag[i][j][1] + m - 1] = 0 else bag_value[bag[i][j][1] + m - 1] = index[pointer[m]] end end if #pointer > 0 and bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div( bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum()) end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return {bag = bag, bag_index = bag_index, bag_value = bag_value} end joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} function joe.utf8str(decimal) local bytemarkers = joe.bytemarkers if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_chartoken.lua ================================================ --[[ Create chartoken format for fastText Copyright 2017 Xiang Zhang Usage: th construct_chartoken.lua [input] [output] --]] local bit32 = require('bit32') local io = require('io') local math = require('math') local string = require('string') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train.csv' local output = arg[2] or '../data/dianping/train_chartoken.txt' print('Construct token') joe.constructToken(input, output) end function joe.constructToken(input, output) local ifd = io.open(input) local ofd = io.open(output, 'w') local n = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) ofd:write('__label__', class) for i = 2, #content do content[i] = content[i]:gsub('\\n', ' '):gsub( '[%z\001-\031\127]', ' '):gsub('^%s*(.-)%s*$', '%1') local sequence = joe.utf8to32(content[i]) for j, code in ipairs(sequence) do if code > 32 then ofd:write(' ', joe.utf8str(code)) end end end ofd:write('\n') end print('\rProcessed lines: '..n) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end -- UTF-8 decoding function -- Ref: http://lua-users.org/wiki/LuaUnicode function joe.utf8to32(utf8str) assert(type(utf8str) == 'string') local res, seq, val = {}, 0, nil for i = 1, #utf8str do local c = string.byte(utf8str, i) if seq == 0 then table.insert(res, val) seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or error('Invalid UTF-8 character sequence') val = bit32.band(c, 2^(8-seq) - 1) else val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F)) end seq = seq - 1 end table.insert(res, val) table.insert(res, 0) return res end -- UTF-8 encoding function -- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol -- -in-lua function joe.utf8str(decimal) local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end joe.main() return joe ================================================ FILE: data/dianping/construct_code.lua ================================================ --[[ Construct unicode serialization format from string serialization format Copyright 2015-2016 Xiang Zhang Usage: th construct_code.lua [input] [output] [limit] [replace] --]] local bit32 = require('bit32') local ffi = require('ffi') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_string.t7b' local output = arg[2] or '../data/dianping/train_code.t7b' local limit = arg[3] and tonumber(arg[3]) or 65536 local replace = arg[4] and tonumber(arg[4]) or 33 print('Loading data from '..input) local data = torch.load(input) print('Counting UTF-8 code') local count = joe.countCode(data) print('Total number of codes: '..count) print('Constructing UTF-8 code data') local code = joe.constructCode(data, count, limit, replace) print('Saving to '..output) torch.save(output, code) end function joe.countCode(data) local index, content = data.index, data.content local count = 0 -- Iterate through the classes for i = 1, #index do print('Processing for class '..i) -- Iterate through the samples for j = 1, index[i]:size(1) do if math.fmod(j, 10000) == 0 then io.write('\rProcessing text: ', j, '/', index[i]:size(1)) io.flush() end -- Iterate through the fields for k = 1, index[i][j]:size(1) do local text = ffi.string( torch.data(content:narrow(1, index[i][j][k][1], 1))) local sequence = joe.utf8to32(text) count = count + #sequence end end print('\rProcessed texts: '..index[i]:size(1)..'/'..index[i]:size(1)) end return count end function joe.constructCode(data, count, limit, replace) local index, content = data.index, data.content local code = {} local code_value = torch.LongTensor(count) local p = 1 -- Iterate through the classes for i = 1, #index do print('Processing for class '..i) code[i] = index[i]:clone():zero() -- Iterate through the samples for j = 1, index[i]:size(1) do if math.fmod(j, 10000) == 0 then io.write('\rProcessing text: ', j, '/', index[i]:size(1)) io.flush() end -- Iterate through the fields for k = 1, index[i][j]:size(1) do local text = ffi.string( torch.data(content:narrow(1, index[i][j][k][1], 1))) local sequence = joe.utf8to32(text) code[i][j][k][1] = p code[i][j][k][2] = #sequence for l = 1, #sequence do code_value[p + l - 1] = sequence[l] + 1 if limit and code_value[p + l - 1] > limit then code_value[p + l - 1] = replace end end p = p + #sequence end end print('\rProcessed texts: '..index[i]:size(1)..'/'..index[i]:size(1)) end return {code = code, code_value = code_value} end -- UTF-8 decoding function -- Ref: http://lua-users.org/wiki/LuaUnicode function joe.utf8to32(utf8str) assert(type(utf8str) == 'string') local res, seq, val = {}, 0, nil for i = 1, #utf8str do local c = string.byte(utf8str, i) if seq == 0 then table.insert(res, val) seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or error('Invalid UTF-8 character sequence') val = bit32.band(c, 2^(8-seq) - 1) else val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F)) end seq = seq - 1 end table.insert(res, val) table.insert(res, 0) return res end joe.main() return joe ================================================ FILE: data/dianping/construct_pinyin.py ================================================ #!/usr/bin/python3 ''' Convert Chinese datasets to Pinyin format Copyright 2016 Xiang Zhang Usage: python3 construct_pinyin.py -i [input] -o [output] ''' #Input file INPUT = '../data/dianping/train.csv' #Output file OUTPUT = '../data/dianping/train_pinyin.csv' import argparse import csv import pypinyin import unidecode # Main program def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output convertPinyin() # Convert the text in Chinese to pintin def convertPinyin(): # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): new_row.append(' '.join(map( str.strip, map(lambda s: s.replace('\n', '\\n'), map(unidecode.unidecode, pypinyin.lazy_pinyin( row[i], style = pypinyin.TONE2)))))) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) if __name__ == '__main__': main() ================================================ FILE: data/dianping/construct_reviews.lua ================================================ --[[ Create reviews in csv format from original txt file Copyright 2015-2016 Xiang Zhang Usage: th construct_reviews [input] [output] --]] local cjson = require('cjson') local io = require('io') local math = require('math') local joe = {} function joe.main() local input = arg[1] or '../data/dianping/reviews.txt' local output = arg[2] or '../data/dianping/reviews.csv' local ifd = io.open(input) local ofd = io.open(output, "w") local n = 0 local valid = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n, ', valid: ', valid) io.flush() end -- Skip the first line if n > 1 then -- Break content to url and json local point = line:find('%^') local data = line:sub(point + 2):gsub("^%s*(.-)%s*$", "%1") -- Parse the data local parsed = cjson.decode(data) local content = parsed.content:gsub("^%s*(.-)%s*$", "%1") local rate = tonumber(parsed.rate) -- Record to csv if rate and rate >= 0 and #content > 0 then valid = valid + 1 content = content:gsub("\n", "\\n"):gsub("\"", "\"\"") ofd:write('"'..rate..'","'..content..'"\n') end end end ifd:close() ofd:close() print('\rProcessed lines: '..n..', valid: '..valid) end joe.main() return joe ================================================ FILE: data/dianping/construct_string.lua ================================================ --[[ Create string serialization format from csv files Copyright 2015-2016 Xiang Zhang Usage: th construct_string.lua [input] [output] --]] local ffi = require('ffi') local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train.csv' local output = arg[2] or '../data/dianping/train_string.t7b' print('Counting samples') local count, bytes, fields = joe.countSamples(input) for i, v in ipairs(count) do print('Number of samples in class '..i..': '..v) end print('Total number of bytes: '..bytes) print('Number of text fields: '..fields) print('Constructing data') local data = joe.constructData(input, count, bytes, fields) print('Saving to '..output) torch.save(output, data) end function joe.countSamples(input) local count = {} local bytes = 0 local fields = nil local n = 0 local fd = io.open(input) for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) count[class] = count[class] and count[class] + 1 or 1 for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") bytes = bytes + content[i]:len() + 1 end fields = fields or #content - 1 if fields ~= #content - 1 then error('Number of fields is not '..fields..' at line '..n) end end print('\rProcessed lines: '..n) fd:close() return count, bytes, fields end function joe.constructData(input, count, bytes, fields) local data = torch.ByteTensor(bytes) local index = {} for i, v in ipairs(count) do index[i] = torch.LongTensor(v, fields, 2) end local progress = {} local n = 0 local p = 1 local fd = io.open(input) for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) progress[class] = progress[class] and progress[class] + 1 or 1 for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") index[class][progress[class]][i - 1][1] = p index[class][progress[class]][i - 1][2] = content[i]:len() ffi.copy(torch.data(data:narrow(1, p, content[i]:len() + 1)), content[i]) p = p + content[i]:len() + 1 end end print('\rProcessed lines: '..n) fd:close() return {content = data, index = index} end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_tfidf.lua ================================================ --[[ Construct tfidf format from bag format Copyright 2016 Xiang Zhang Usage: th construct_tfidf.lua [input] [output] [list] [limit] --]] local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_charbag.t7b' local output = arg[2] or '../data/dianping/train_charbagtfidf.t7b' local list = arg[3] or '../data/dianping/train_charbag_list.csv' local limit = arg[4] and tonumber(arg[4]) or 200000 print('Loading data from '..input) local data = torch.load(input) print('Loading frequency list from '..list) local freq = joe.readList(list) print('Frequency list length '..freq:size(1)) print('Constructing bag-of-elements TFIDF data') local tfidf = joe.constructTfidf(data, freq, limit) print('Saving to '..output) torch.save(output, tfidf) end function joe.readList(list) local freq = {} local fd = io.open(list) for line in fd:lines() do local content = joe.parseCSVLine(line) content[2] = content[2]:gsub('\\n', '\n') freq[#freq + 1] = tonumber(content[3]) end return torch.Tensor(freq) end function joe.constructTfidf(data, freq, limit) local bag, bag_index, bag_value = data.bag, data.bag_index, data.bag_value local tfidf_value = bag_value:clone() local freq = freq if freq:size(1) > limit then freq:narrow(1, limit + 1, freq:size(1) - limit):zero() elseif freq:size(1) < limit + 1 then local new_freq = freq.new(limit + 1):zero() new_freq:narrow(1, 1, freq:size(1)):copy(freq) freq = new_freq end freq:apply(function (x) return x > 0 and math.log(1/x) or 0 end) local indexed = freq:index(1, bag_index) tfidf_value:cmul(indexed) -- Iterate through the classes for i = 1, #bag do print('Processing for class '..i) -- Iterate through the samples for j = 1, bag[i]:size(1) do if math.fmod(j, 10000) == 0 then io.write('\rProcessing sample: ', j, '/', bag[i]:size(1)) io.flush() end if bag[i][j][2] > 0 and tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):div( tfidf_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum()) end end print('\rProcessed samples: '..bag[i]:size(1)..'/'..bag[i]:size(1)) end return {bag = bag, bag_index = bag_index, bag_value = tfidf_value} end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_word.lua ================================================ --[[ Create word serialization format from csv files Copyright 2015-2016 Xiang Zhang Usage: th construct_word.lua [input] [output] --]] local ffi = require('ffi') local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_word.csv' local output = arg[2] or '../data/dianping/train_word.t7b' print('Counting samples') local count, length, fields = joe.countSamples(input) for i, v in ipairs(count) do print('Number of samples in class '..i..': '..v) end print('Total number of words: '..length) print('Number of text fields: '..fields) print('Constructing data') local data = joe.constructData(input, count, length, fields) print('Saving to '..output) torch.save(output, data) end function joe.countSamples(input) local count = {} local length = 0 local fields = nil local n = 0 local fd = io.open(input) for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) count[class] = count[class] and count[class] + 1 or 1 for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1') local _, current_length = content[i]:gsub('(%d+)', '%1') length = length + current_length end fields = fields or #content - 1 if fields ~= #content - 1 then error('Number of fields is not '..fields..' at line '..n) end end print('\rProcessed lines: '..n) fd:close() return count, length, fields end function joe.constructData(input, count, length, fields) local data = torch.LongTensor(length) local index = {} for i, v in ipairs(count) do index[i] = torch.LongTensor(v, fields, 2) end local progress = {} local n = 0 local p = 1 local fd = io.open(input) for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) progress[class] = progress[class] and progress[class] + 1 or 1 for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1') index[class][progress[class]][i - 1][1] = p local current_length = 0 for word in content[i]:gmatch('%d+') do data[p] = tonumber(word) p = p + 1 end index[class][progress[class]][i - 1][2] = p - index[class][progress[class]][i - 1][1] end end print('\rProcessed lines: '..n) fd:close() return {code = index, code_value = data} end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_wordbag.lua ================================================ --[[ Construct word bag-of-element format Copyright 2016 Xiang Zhang Usage: th construct_wordbag.lua [input] [output] [limit] [replace] --]] local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_word.t7b' local output = arg[2] or '../data/dianping/train_wordbag.t7b' local limit = arg[3] and tonumber(arg[3]) or 200000 local replace = arg[4] and tonumber(arg[4]) or 200001 print('Loading data from '..input) local data = torch.load(input) print('Counting words') local count = joe.countBag(data, limit, replace) print('Total number of values: '..count) print('Constructing word bag data') local bag = joe.constructBag(data, count, limit, replace) print('Saving to '..output) torch.save(output, bag) end function joe.countBag(data, limit, replace) local code, code_value = data.code, data.code_value local count = 0 -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() end local index = {} -- Iterate through the fields for k = 1, code[i][j]:size(1) do for l = 1, code[i][j][k][2] do local word = code_value[code[i][j][k][1] + l - 1] if word > limit then word = replace end if not index[word] then count = count + 1 index[word] = 1 else index[word] = index[word] + 1 end end end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return count end function joe.constructBag(data, count, limit, replace) local code, code_value = data.code, data.code_value local bag = {} local bag_index = torch.LongTensor(count) local bag_value = torch.DoubleTensor(count) local count = 0 -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) bag[i] = torch.LongTensor(code[i]:size(1), 2) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() end local index = {} local pointer = {} bag[i][j][1] = count + 1 -- Iterate through the fields for k = 1, code[i][j]:size(1) do for l = 1, code[i][j][k][2] do local word = code_value[code[i][j][k][1] + l - 1] if word > limit then word = replace end if not index[word] then count = count + 1 index[word] = 1 pointer[#pointer + 1] = word else index[word] = index[word] + 1 end end end table.sort(pointer) bag[i][j][2] = #pointer for m = 1, #pointer do bag_index[bag[i][j][1] + m - 1] = pointer[m] if pointer[m] > limit then bag_value[bag[i][j][1] + m - 1] = 0 else bag_value[bag[i][j][1] + m - 1] = index[pointer[m]] end end if #pointer > 0 and bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div( bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum()) end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return {bag = bag, bag_index = bag_index, bag_value = bag_value} end joe.main() return joe ================================================ FILE: data/dianping/construct_wordgram.lua ================================================ --[[ Constructngrams format from serialization Copyright 2016 Xiang Zhang Usage: th construct_wordgram.lua [input] [output] [list] [gram] [limit] [replace] --]] local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_word.t7b' local output = arg[2] or '../data/dianping/train_wordgram.t7b' local list = arg[3] or '../data/dianping/train_wordgram_list.csv' local gram = arg[4] and tonumber(arg[4]) or 5 local limit = arg[5] and tonumber(arg[5]) or 1000000 local replace = arg[6] and tonumber(arg[6]) or 1000001 print('Loading data from '..input) local data = torch.load(input) print('Reading frequency from '..list) local freq, dict = joe.readList(list) print('Counting character ngrams data') local count = joe.countBag(data, dict, gram, limit, replace) print('Total number of ngrams in data is '..count) print('Constructing character bag data') local bag = joe.constructBag(data, dict, count, gram, limit, replace) print('Saving to '..output) torch.save(output, bag) end function joe.readList(list) local freq_table = tds.Vec() local dict = tds.Hash() local fd = io.open(list) for line in fd:lines() do local content = joe.parseCSVLine(line) content[2] = content[2]:gsub('\\n', '\n') freq_table[#freq_table + 1] = tonumber(content[3]) dict[content[1]] = #freq_table end local freq = torch.Tensor(#freq_table) for i, v in ipairs(freq_table) do freq[i] = v end return freq, dict end function joe.countBag(data, dict, gram, limit, replace) local count = 0 local code, code_value = data.code, data.code_value -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() collectgarbage() end local index = {} -- Iterate through the fields for k = 1, code[i][j]:size(1) do -- Iterate through the grams for n = 1, gram do -- Iterate through the positions for l = 1, code[i][j][k][2] - n + 1 do local ngram = tostring(code_value[code[i][j][k][1] + l - 1]) for m = 2, n do ngram = ngram..' '..tostring( code_value[code[i][j][k][1] + l - 1 + m - 1]) end local ngram_index = dict[ngram] if ngram_index == nil or ngram_index > limit then ngram_index = replace end if not index[ngram_index] then index[ngram_index] = 0 count = count + 1 end index[ngram_index] = index[ngram_index] + 1 end end end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return count end function joe.constructBag(data, dict, count, gram, limit, replace) local code, code_value = data.code, data.code_value local bag = {} local bag_index = torch.LongTensor(count) local bag_value = torch.DoubleTensor(count) local count = 0 -- Iterate through the classes for i = 1, #code do print('Processing for class '..i) bag[i] = torch.LongTensor(code[i]:size(1), 2) -- Iterate through the samples for j = 1, code[i]:size(1) do if math.fmod(j, 1000) == 0 then io.write('\rProcessing text: ', j, '/', code[i]:size(1)) io.flush() collectgarbage() end local index = {} local pointer = {} bag[i][j][1] = count + 1 -- Iterate through the fields for k = 1, code[i][j]:size(1) do -- Iterate through the grams for n = 1, gram do -- Iterate through the positions for l = 1, code[i][j][k][2] - n + 1 do local ngram = tostring(code_value[code[i][j][k][1] + l - 1]) for m = 2, n do ngram = ngram..' '..tostring( code_value[code[i][j][k][1] + l - 1 + m - 1]) end local ngram_index = dict[ngram] if ngram_index == nil or ngram_index > limit then ngram_index = replace end if not index[ngram_index] then count = count + 1 index[ngram_index] = 0 pointer[#pointer + 1] = ngram_index end index[ngram_index] = index[ngram_index] + 1 end end end table.sort(pointer) bag[i][j][2] = #pointer for m = 1, #pointer do bag_index[bag[i][j][1] + m - 1] = pointer[m] if pointer[m] > limit then bag_value[bag[i][j][1] + m - 1] = 0 else bag_value[bag[i][j][1] + m - 1] = index[pointer[m]] end end if #pointer > 0 and bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum() ~= 0 then bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):div( bag_value:narrow(1, bag[i][j][1], bag[i][j][2]):sum()) end end print('\rProcessed texts: '..code[i]:size(1)..'/'..code[i]:size(1)) end return {bag = bag, bag_index = bag_index, bag_value = bag_value} end joe.bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} function joe.utf8str(decimal) local bytemarkers = joe.bytemarkers if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/construct_wordtoken.lua ================================================ --[[ Construct word token format from csv files Copyright 2017 Xiang Zhang Usage: th construct_wordtoken [input] [list] [output] --]] local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_word.csv' local list = arg[2] or '../data/dianping/train_word_list.csv' local output = arg[3] or '../data/dianping/train_wordtoken.txt' print('Reading list from '..list) local word_list = joe.readList(list) print('Constructing word token') joe.constructToken(input, output, word_list) end function joe.readList(list) local word_list = tds.Vec() local fd = io.open(list) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) word_list[#word_list + 1] = content[1]:gsub('\\n', '\n'):gsub('[%z\001-\032\127]', ' '):gsub( '^%s*(.-)%s*$', '%1') end print('\rProcessed lines: '..n) fd:close() return word_list end function joe.constructToken(input, output, word_list) local ifd = io.open(input) local ofd = io.open(output, 'w') local n = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) ofd:write('__label__', class) for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub('^%s*(.-)%s*$', '%1') for word in content[i]:gmatch('%d+') do local word_string = word_list[tonumber(word)] or '' ofd:write(' ', word_string) end end ofd:write('\n') end print('\rProcessed lines: '..n) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/convert_string_code.lua ================================================ --[[ Convert string serialization to code Copyright 2016 Xiang Zhang Usage: th convert_string_code.lua [input] [output] --]] local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_string.t7b' local output = arg[2] or '../data/dianping/train_string_code.t7b' print('Reading from '..input) local input_data = torch.load(input) print('Converting to code format') local output_data = joe.convert(input_data) print('Saving to '..output) torch.save(output, output_data) end function joe.convert(input_data) local output_data = {} output_data.code = input_data.index output_data.code_value = input_data.content return output_data end joe.main() return joe ================================================ FILE: data/dianping/count_chargram.lua ================================================ --[[ Parallelized chargram counting program Copyright Xiang Zhang 2016 Usage: th count_chargram.lua [input] [output_prefix] [grams] [chunks] [threads] [batch] [buffer] Comment: This program is a map-reduce like process. During map, each sample is separated into character-ngrams. During reduce, these character-ngrams are aggregated per-batch samples and output to file chunks. Which files chunk to put the gram is determined by a hash value of the gram string, therefore instances of the same gram always end up in the same file. This program is necessary because a linear aggregation program can easily overflow memory for several millions of samples. --]] local hash = require('hash') local io = require('io') local math = require('math') local tds = require('tds') local threads = require('threads') local torch = require('torch') local Queue = require('queue') -- Library configurations threads.serialization('threads.sharedserialize') -- A Logic Named Joe local joe = {} -- Constant values joe.SEED = 0 -- Main program entry function joe.main() local input = arg[1] or '../data/dianping/train_code.t7b' local output_prefix = arg[2] or '../data/dianping/train_chargram_count/' local num_grams = arg[3] and tonumber(arg[3]) or 5 local chunks = arg[4] and tonumber(arg[4]) or 100 local num_threads = arg[5] and tonumber(arg[5]) or 10 local batch = arg[6] and tonumber(arg[6]) or 100000 local buffer = arg[7] and tonumber(arg[7]) or 1000 print('Loading data from '..input) local data = torch.load(input) print('Opening output files with prefix '..output_prefix) local fds = {} for i = 1, chunks do fds[i] = io.open(output_prefix..tostring(i)..'.csv', 'w') end joe.fds = fds print('Setting finished threads to 0') joe.finished = 0 print('Creating record') joe.record = tds.Hash() print('Setting item counter to 0') joe.count = 0 print('Storing options') joe.batch = batch print('Creating queues') local queue = Queue(buffer) print('Creating mutex') local mutex = threads.Mutex() print('Creating '..num_threads..' threads') local init_thread = joe.initThread() local block = threads.Threads(num_threads, init_thread) block:specific(true) print('Deploying thread jobs') joe.deployThreads(data, num_grams, queue, mutex, block, num_threads) print('Entering main thread loop') while joe.finished < num_threads do local rpc = queue:pop() joe[rpc.func](unpack(rpc.arg)) end if math.fmod(joe.count, batch) ~= 0 then print('Writing records to files at '..joe.count) joe.writeRecord() end print('Destroying mutex') mutex:free() print('Closing files') for _, fd in ipairs(fds) do fd:close() end print('Synchronizing and terminating the threads') block:synchronize() block:terminate() end -- Thread initialization callback function joe.initThread() return function () local torch = require('torch') local Queue = require('queue') end end -- Thread job deploying threads function joe.deployThreads(data, num_grams, queue, mutex, block, num_threads) local progress = torch.LongTensor(2) progress[1] = 1 progress[2] = 0 for i = 1, num_threads do print('Deploying job for thread '..i) local thread_job = joe.threadJob( data, num_grams, queue, mutex:id(), progress, i) block:addjob(i, thread_job) local rpc = queue:pop() while rpc.func ~= 'notifyDeploy' do joe[rpc.func](unpack(rpc.arg)) rpc = queue:pop() end print('rpc = notifyDeploy, thread = '..rpc.arg[1]) end end -- Write records to file function joe.writeRecord() for code, item in pairs(joe.record) do local chunk = hash.hash(code, joe.SEED, #joe.fds) + 1 joe.fds[chunk]:write( '"', code, '","', item[1]:gsub('\n', '\\n'):gsub('"', '""'), '","', item[2], '","', item[3], '"\n') end joe.record = tds.Hash() collectgarbage() end -- Thread job function joe.threadJob(data, num_grams, queue, mutex_id, progress, thread_id) local utf8str = joe.utf8str() return function() local math = require('math') local string = require('string') local threads = require('threads') local mutex = threads.Mutex(mutex_id) -- Notify the deployment queue:push{func = 'notifyDeploy', arg = {__threadid}} local code, code_value = data.code, data.code_value local class, item -- Obtain next sample local function nextSample() mutex:lock() if code[progress[1]] == nil then class = progress[1] item = progress[2] elseif code[progress[1]]:size(1) < progress[2] + 1 then progress[1] = progress[1] + 1 progress[2] = 1 class = progress[1] item = progress[2] else progress[2] = progress[2] + 1 class = progress[1] item = progress[2] end mutex:unlock() end local n = 0 nextSample() while code[class] ~= nil do n = n + 1 if math.fmod(n, 100) == 0 then queue:push{ func = 'print', arg = {__threadid, 'Processing class '..class..', item '..item.. ', total '..n}} collectgarbage() end local term_count, doc_count = {}, {} -- Iterate through the fields for i = 1, code[class][item]:size(1) do -- Iterate through the grams for j = 1, num_grams do -- Iterate through the positions for k = 1, code[class][item][i][2] - j + 1 do local code_string = tostring( code_value[code[class][item][i][1] + k - 1]) for l = 2, j do code_string = code_string..' '..tostring( code_value[code[class][item][i][1] + k - 1 + l - 1]) end if not term_count[code_string] then term_count[code_string] = 1 doc_count[code_string] = 1 else term_count[code_string] = term_count[code_string] + 1 end end end end -- Compress record to data local items = {} for code_string, _ in pairs(term_count) do local gram_string = '' for value in code_string:gmatch('[%S]+') do local value = tonumber(value) gram_string = gram_string.. ((value <= 65536 and (value > 32 or value == 11)) and utf8str(value - 1) or ' ') end items[#items + 1] = { code_string, gram_string, term_count[code_string], doc_count[code_string]} end -- Send data to record queue:push{func = 'recordItem', arg = {__threadid, items}} nextSample() end -- Notify main thread that this thread has ended queue:push{func = 'notifyExit', arg = {__threadid}} end end -- Record item function joe.recordItem(thread_id, items) for _, item in pairs(items) do if joe.record[item[1]] then joe.record[item[1]][2] = joe.record[item[1]][2] + item[3] joe.record[item[1]][3] = joe.record[item[1]][3] + item[4] else joe.record[item[1]] = tds.Vec{item[2], item[3], item[4]} end end joe.count = joe.count + 1 -- Check write if math.fmod(joe.count, joe.batch) == 0 then print('Writing records to files at '..joe.count) joe.writeRecord() end end -- Print information function joe.print(thread_id, message) print('rpc = print, thread = '..thread_id..', message = '..message) end -- Notify exit function joe.notifyExit(thread_id) joe.finished = joe.finished + 1 print('rpc = notifyExit, thread = '..thread_id.. ', finished = '..joe.finished) end -- UTF-8 encoding function -- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol -- -in-lua function joe.utf8str() local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} return function (decimal) local string = require('string') if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end end joe.main() return joe ================================================ FILE: data/dianping/count_wordgram.lua ================================================ --[[ Parallelized wordgram counting program Copyright Xiang Zhang 2016 Usage: th count_wordgram.lua [input] [output_prefix] [list] [grams] [chunks] [threads] [batch] [buffer] Comment: This program is a map-reduce like process. During map, each sample is separated into character-ngrams. During reduce, these character-ngrams are aggregated per-batch samples and output to file chunks. Which files chunk to put the gram is determined by a hash value of the gram string, therefore instances of the same gram always end up in the same file. This program is necessary because a linear aggregation program can easily overflow memory for several millions of samples. --]] local hash = require('hash') local io = require('io') local math = require('math') local tds = require('tds') local threads = require('threads') local torch = require('torch') local Queue = require('queue') -- Library configurations threads.serialization('threads.sharedserialize') -- A Logic Named Joe local joe = {} -- Constant values joe.SEED = 0 -- Main program entry function joe.main() local input = arg[1] or '../data/dianping/train_word.t7b' local output_prefix = arg[2] or '../data/dianping/train_wordgram_count/' local list = arg[3] or '../data/dianping/train_word_list.csv' local num_grams = arg[4] and tonumber(arg[4]) or 5 local chunks = arg[5] and tonumber(arg[5]) or 100 local num_threads = arg[6] and tonumber(arg[6]) or 10 local batch = arg[7] and tonumber(arg[7]) or 100000 local buffer = arg[8] and tonumber(arg[8]) or 1000 print('Loading data from '..input) local data = torch.load(input) print('Loading list from '..list) local freq, word_list = joe.readList(list) print('Opening output files with prefix '..output_prefix) local fds = {} for i = 1, chunks do fds[i] = io.open(output_prefix..tostring(i)..'.csv', 'w') end joe.fds = fds print('Setting finished threads to 0') joe.finished = 0 print('Creating record') joe.record = tds.Hash() print('Setting item counter to 0') joe.count = 0 print('Storing options') joe.batch = batch print('Creating queues') local queue = Queue(buffer) print('Creating mutex') local mutex = threads.Mutex() print('Creating '..num_threads..' threads') local init_thread = joe.initThread() local block = threads.Threads(num_threads, init_thread) block:specific(true) print('Deploying thread jobs') joe.deployThreads( data, word_list, num_grams, queue, mutex, block, num_threads) print('Entering main thread loop') while joe.finished < num_threads do local rpc = queue:pop() joe[rpc.func](unpack(rpc.arg)) end if math.fmod(joe.count, batch) ~= 0 then print('Writing records to files at '..joe.count) joe.writeRecord() end print('Destroying mutex') mutex:free() print('Closing files') for _, fd in ipairs(fds) do fd:close() end print('Synchronizing and terminating the threads') block:synchronize() block:terminate() end -- Thread initialization callback function joe.initThread() return function () local torch = require('torch') local Queue = require('queue') end end -- Thread job deploying threads function joe.deployThreads( data, word_list, num_grams, queue, mutex, block, num_threads) local progress = torch.LongTensor(2) progress[1] = 1 progress[2] = 0 for i = 1, num_threads do print('Deploying job for thread '..i) local thread_job = joe.threadJob( data, word_list, num_grams, queue, mutex:id(), progress, i) block:addjob(i, thread_job) local rpc = queue:pop() while rpc.func ~= 'notifyDeploy' do joe[rpc.func](unpack(rpc.arg)) rpc = queue:pop() end print('rpc = notifyDeploy, thread = '..rpc.arg[1]) end end -- Write records to file function joe.writeRecord() for code, item in pairs(joe.record) do local chunk = hash.hash(code, joe.SEED, #joe.fds) + 1 joe.fds[chunk]:write( '"', code, '","', item[1]:gsub('\n', '\\n'):gsub('"', '""'), '","', item[2], '","', item[3], '"\n') end joe.record = tds.Hash() collectgarbage() end -- Thread job function joe.threadJob( data, word_list, num_grams, queue, mutex_id, progress, thread_id) local utf8str = joe.utf8str() return function() local math = require('math') local string = require('string') local threads = require('threads') local mutex = threads.Mutex(mutex_id) -- Notify the deployment queue:push{func = 'notifyDeploy', arg = {__threadid}} local code, code_value = data.code, data.code_value local class, item -- Obtain next sample local function nextSample() mutex:lock() if code[progress[1]] == nil then class = progress[1] item = progress[2] elseif code[progress[1]]:size(1) < progress[2] + 1 then progress[1] = progress[1] + 1 progress[2] = 1 class = progress[1] item = progress[2] else progress[2] = progress[2] + 1 class = progress[1] item = progress[2] end mutex:unlock() end local n = 0 nextSample() while code[class] ~= nil do n = n + 1 if math.fmod(n, 100) == 0 then queue:push{ func = 'print', arg = {__threadid, 'Processing class '..class..', item '..item.. ', total '..n}} collectgarbage() end local term_count, doc_count = {}, {} -- Iterate through the fields for i = 1, code[class][item]:size(1) do -- Iterate through the grams for j = 1, num_grams do -- Iterate through the positions for k = 1, code[class][item][i][2] - j + 1 do local code_string = tostring( code_value[code[class][item][i][1] + k - 1]) for l = 2, j do code_string = code_string..' '..tostring( code_value[code[class][item][i][1] + k - 1 + l - 1]) end if not term_count[code_string] then term_count[code_string] = 1 doc_count[code_string] = 1 else term_count[code_string] = term_count[code_string] + 1 end end end end -- Compress record to data local items = {} for code_string, _ in pairs(term_count) do local gram_string = '' for value in code_string:gmatch('[%S]+') do local value = tonumber(value) gram_string = gram_string..' '..(word_list[value] or '') end items[#items + 1] = { code_string, gram_string, term_count[code_string], doc_count[code_string]} end -- Send data to record queue:push{func = 'recordItem', arg = {__threadid, items}} nextSample() end -- Notify main thread that this thread has ended queue:push{func = 'notifyExit', arg = {__threadid}} end end -- Record item function joe.recordItem(thread_id, items) for _, item in pairs(items) do if joe.record[item[1]] then joe.record[item[1]][2] = joe.record[item[1]][2] + item[3] joe.record[item[1]][3] = joe.record[item[1]][3] + item[4] else joe.record[item[1]] = tds.Vec{item[2], item[3], item[4]} end end joe.count = joe.count + 1 -- Check write if math.fmod(joe.count, joe.batch) == 0 then print('Writing records to files at '..joe.count) joe.writeRecord() end end -- Print information function joe.print(thread_id, message) print('rpc = print, thread = '..thread_id..', message = '..message) end -- Notify exit function joe.notifyExit(thread_id) joe.finished = joe.finished + 1 print('rpc = notifyExit, thread = '..thread_id.. ', finished = '..joe.finished) end -- UTF-8 encoding function -- Ref: http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol -- -in-lua function joe.utf8str() local bytemarkers = {{0x7FF, 192}, {0xFFFF, 224}, {0x1FFFFF, 240}} return function (decimal) local string = require('string') if decimal < 128 then return string.char(decimal) end local charbytes = {} for bytes,vals in ipairs(bytemarkers) do if decimal <= vals[1] then for b = bytes + 1, 2, -1 do local mod = decimal % 64 decimal = (decimal - mod) / 64 charbytes[b] = string.char(128+mod) end charbytes[1] = string.char(vals[2] + decimal) break end end return table.concat(charbytes) end end function joe.readList(list) local freq = {} local word_list = tds.Hash() local fd = io.open(list) for line in fd:lines() do local content = joe.parseCSVLine(line) content[2] = content[2]:gsub('\\n', '\n') freq[#freq + 1] = tonumber(content[3]) word_list[#freq] = content[1]:gsub('\\n', '\n') end return torch.Tensor(freq), word_list end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/limit_code.lua ================================================ --[[ Limit the maximum code value Copyright 2016 Xiang Zhang Usage: th limit_code.lua [input] [output] [limit] --]] local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_word.t7b' local output = arg[2] or '../data/dianping/train_word_limit.t7b' local limit = arg[3] and tonumber(arg[3]) or 200000 print('Loading data from '..input) local data = torch.load(input) print('Limiting code to '..limit) local code = joe.limitCode(data, limit) print('Saving to '..output) torch.save(output, code) end function joe.limitCode(data, limit) local code, code_value = data.code, data.code_value local preserve = code_value:le(limit):long() local replace = code_value:gt(limit):long() code_value:cmul(preserve):add(replace:mul(limit + 1)) return {code = code, code_value = code_value} end joe.main() return joe ================================================ FILE: data/dianping/limit_csvlines.sh ================================================ #!/bin/bash # Limit csv files to designated number of lines # Copyright 2015 Xiang Zhang # # Usage: bash limit_csvlines.sh [input] [output] [limit] set -x; set -e; head -n ${3:-1000001} $1 > $2; ================================================ FILE: data/dianping/queue.lua ================================================ --[[ Multithreaded queue based on tds Copyright 2015 Xiang Zhang --]] local class = require('pl.class') local ffi = require('ffi') local serialize = require('threads.sharedserialize') local tds = require('tds') local threads = require('threads') local torch = require('torch') -- Append an underscore to distinguish between metatable and class name local Queue_ = torch.class('Queue') -- Constructor -- n: buffer size function Queue_:__init(size) self.data = tds.hash() self.pointer = torch.LongTensor(3):fill(1) self.pointer[3] = 0 self.size = size or 10 self.mutex = threads.Mutex() self.added_condition = threads.Condition() self.removed_condition = threads.Condition() end function Queue_:push(item) local storage = serialize.save(item) self.mutex:lock() while self.pointer[3] == self.size do self.removed_condition:wait(self.mutex) end self.data[self.pointer[1]] = storage:string() self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1 self.pointer[3] = self.pointer[3] + 1 self.mutex:unlock() self.added_condition:signal() end function Queue_:pop() self.mutex:lock() while self.pointer[3] == 0 do self.added_condition:wait(self.mutex) end local storage = torch.CharStorage():string(self.data[self.pointer[2]]) self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1 self.pointer[3] = self.pointer[3] - 1 self.mutex:unlock() self.removed_condition:signal() local item = serialize.load(storage) return item end function Queue_:push_async(item) if self.pointer[3] == self.size then return end local storage = serialize.save(item) self.mutex:lock() if self.pointer[3] == self.size then self.mutex:unlock() return end self.data[self.pointer[1]] = storage:string() self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1 self.pointer[3] = self.pointer[3] + 1 self.mutex:unlock() self.added_condition:signal() return item end function Queue_:pop_async() if self.pointer[3] == 0 then return end self.mutex:lock() if self.pointer[3] == 0 then self.mutex:unlock() return end local storage = torch.CharStorage():string(self.data[self.pointer[2]]) self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1 self.pointer[3] = self.pointer[3] - 1 self.mutex:unlock() self.removed_condition:signal() local item = serialize.load(storage) return item end function Queue_:free() self.mutex:free() self.added_condition:free() self.removed_condition:free() end function Queue_:__write(f) local data = self.data f:writeLong(torch.pointer(data)) tds.C.tds_hash_retain(data) local pointer = self.pointer f:writeLong(torch.pointer(pointer)) pointer:retain() f:writeObject(self.size) f:writeObject(self.mutex:id()) f:writeObject(self.added_condition:id()) f:writeObject(self.removed_condition:id()) end function Queue_:__read(f) local data = f:readLong() data = ffi.cast('tds_hash&', data) ffi.gc(data, tds.C.tds_hash_free) self.data = data local pointer = f:readLong() pointer = torch.pushudata(pointer, 'torch.LongTensor') self.pointer = pointer self.size = f:readObject() self.mutex = threads.Mutex(f:readObject()) self.added_condition = threads.Condition(f:readObject()) self.removed_condition = threads.Condition(f:readObject()) end -- Return class name, not the underscored metatable return Queue ================================================ FILE: data/dianping/remove_duplication.py ================================================ #!/usr/bin/python3 ''' Remove duplication from csv format file Copyright 2015 Xiang Zhang Usage: python3 remove_duplication.py -i [input] -o [output] ''' # Python 3 compatibility from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals # Input file INPUT = '../data/dianping/reviews_nonull.csv' # Output file OUTPUT = '../data/dianping/reviews_nodup.csv' import argparse import csv # Main program def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output removeDuplicate() # Deduplicate the text using python set def removeDuplicate(): # Open the files ifd = open(INPUT, newline = '', encoding = 'utf-8') ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 valid = 0 s = set() for row in reader: line = ' '.join(row[1:]) n = n + 1 if line not in s: valid = valid + 1 s.add(line) writer.writerow(row) if n % 10000 == 0: print('\rProcessing line: {}, valid: {}'.format(n, valid), end = '') print('\rProcessed lines: {}, valid: {}'.format(n, valid)) if __name__ == '__main__': main() ================================================ FILE: data/dianping/remove_null.sh ================================================ #!/bin/bash # Remove NULL character from file # Copyright 2015 Xiang Zhang # # Usage: bash remove_null.sh [input] [output] set -x; set -e; tr -d '\000' < $1 > $2; ================================================ FILE: data/dianping/segment_roman_word.lua ================================================ --[[ Create romananized word data from romanized data in csv Copyright 2016 Xiang Zhang Usage: th segment_roman_word.lua [input] [output] [list] [read] --]] local ffi = require('ffi') local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/dianping/train_pinyin.csv' local output = arg[2] or '../data/dianping/train_pinyin_word.csv' local list = arg[3] or '../data/dianping/train_pinyin_word_list.csv' local read = (arg[4] == 'true') local word_index, word_total if read then print('Reading word index') word_index, word_total = joe.readWords(list) else print('Counting words') local word_count, word_freq = joe.splitWords(input) print('Sorting words by count') word_index, word_total = joe.sortWords(list, word_count, word_freq) end print('Constructing word index output') joe.constructWords(input, output, word_index, word_total) end function joe.readWords(list) local word_index = tds.Hash() local fd = io.open(list) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: '..n) io.flush() end local content = joe.parseCSVLine(line) content[1] = content[1]:gsub('\\n', '\n') word_index[content[1]] = n end print('\rProcessed lines: '..n) fd:close() return word_index, n end function joe.splitWords(input) local word_count, word_freq = tds.Hash(), tds.Hash() local fd = io.open(input) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) field_set = {} for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") content[i] = content[i]:gsub('(%p)', ' %1 ') for word in content[i]:gmatch('[%S]+') do word_count[word] = (word_count[word] or 0) + 1 if not field_set[word] then field_set[word] = true word_freq[word] = (word_freq[word] or 0) + 1 end end end end print('\rProcessed lines: '..n) fd:close() -- Normalizing word frequencies for key, value in pairs(word_freq) do word_freq[key] = value / n end return word_count, word_freq end function joe.sortWords(list, word_count, word_freq) -- Sort the list of words local word_list = tds.Vec() for word, _ in pairs(word_count) do word_list[#word_list + 1] = word end word_list:sort(function (w, v) return word_count[w] > word_count[v] end) -- Create the word index local word_index = tds.Hash() for index, word in ipairs(word_list) do word_index[word] = index end -- Write it to file local fd = io.open(list, 'w') for index, word in ipairs(word_list) do fd:write('"', word:gsub("\n", "\\n"):gsub("\"", "\"\""), '","', word_count[word], '","', word_freq[word], '"\n') end fd:close() return word_index, #word_list end function joe.constructWords(input, output, word_index, word_total) local ifd = io.open(input) local ofd = io.open(output, 'w') local n = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) ofd:write('"', content[1], '"') for i = 2, #content do content[i] = content[i]:gsub('\\n', '\n'):gsub("^%s*(.-)%s*$", "%1") content[i] = content[i]:gsub('(%p)', ' %1 ') local first_write = true ofd:write(',"') for word in content[i]:gmatch('[%S]+') do local index = word_index[word] or word_total + 1 if first_write then first_write = false ofd:write(index) else ofd:write(' ', index) end end ofd:write('"') end ofd:write('\n') end print('\rProcessed lines: '..n) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/segment_word.py ================================================ #!/usr/bin/python3 ''' Convert Chinese datasets to Index of Words Copyright 2016 Xiang Zhang Usage: python3 segment_word.py -i [input] -l [list] -o [output] [-r] ''' #Input file INPUT = '../data/dianping/train.csv' #Output file OUTPUT = '../data/dianping/train_word.csv' # List file LIST = '../data/dianping/train_word_list.csv' # Read already defined word list READ = False import argparse import csv import jieba # Main program def main(): global INPUT global OUTPUT global LIST parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) parser.add_argument('-l', '--list', help = 'Word list file', default = LIST) parser.add_argument( '-r', '--read', help = 'Read from list file', action = 'store_true') args = parser.parse_args() INPUT = args.input OUTPUT = args.output LIST = args.list READ = args.read if READ: print('Reading word index') word_index = readWords() else: print('Counting words') word_count, word_freq = segmentWords() print('Sorting words by count') word_index = sortWords(word_count, word_freq) print('Constructing word index output') convertWords(word_index) # Read from pre-existing word list def readWords(): # Open the files ifd = open(LIST, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_index = dict() n = 0 for row in reader: word = row[0].replace('\\n', '\n') word_index[word] = n + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) return word_index # Segment the text in Chinese def segmentWords(): # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_count = dict() word_freq = dict() n = 0 for row in reader: field_set = set() for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = jieba.cut(field) for word in field_list: word_count[word] = word_count.get(word, 0) + 1 if word not in field_set: field_set.add(word) word_freq[word] = word_freq.get(word, 0) + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() # Normalizing word frequency for word in word_freq: word_freq[word] = float(word_freq[word]) / float(n) return word_count, word_freq # Sort words for a given count dictionary object def sortWords(word_count, word_freq): # Sort the words word_list = sorted( word_count, key = lambda word: word_count[word], reverse = True) # Open the files ofd = open(LIST, 'w', encoding = 'utf-8', newline = '') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over all the words word_index = dict() n = 0 for i in range(len(word_list)): word = word_list[i] row = [word.replace('\n', '\\n'), str(word_count[word]), str(word_freq[word])] writer.writerow(row) word_index[word] = i + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing word: {}'.format(n), end = '') print('\rProcessed words: {}'.format(n)) ofd.close() return word_index # Convert the text in Chinese to word list def convertWords(word_index): # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = jieba.cut(field) new_row.append(' '.join(map( str, map(lambda word: word_index.get(word, len(word_index) + 1), field_list)))) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/dianping/select_data.lua ================================================ --[[ Select data from non-duplicate datasets Copyright 2015 Xiang Zhang Usage: th select_data.lua [count] [input] [output] --]] local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local count = arg[1] or '../data/dianping/reviews_count.csv' local input = arg[2] or '../data/dianping/reviews_nodup.csv' local output = arg[3] or '../data/dianping/data.csv' local map = {} local index = {} local cfd = io.open(count) for line in cfd:lines() do local content = joe.parseCSVLine(line) local class = tonumber(content[1]) local target = tonumber(content[2]) local total = tonumber(content[3]) local choose = tonumber(content[4]) print('Constructing index '..class..'>'..target..': '..choose..'/'..total) map[class] = target index[class] = torch.ByteTensor(total):fill(1) local perm = torch.randperm(total) for i = 1, total - choose do index[class][perm[i]] = 0 end end cfd:close() local n = 0 local progress = {} local ifd = io.open(input) local ofd = io.open(output, 'w') for line in ifd:lines() do n = n + 1 if math.fmod(n, 100000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) local target = map[class] progress[class] = progress[class] and progress[class] + 1 or 1 if index[class] and index[class][progress[class]] == 1 then ofd:write( '"', target, '"', (line:sub(content[1]:len() + 3) or ''), '\n') end end print('\rProcessed lines: '..n) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine (line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/dianping/shuffle_lines.sh ================================================ #!/bin/bash # Shuffle lines in a text file # Copyright 2017 Xiang Zhang # # Usage: bash shuffle_lines.sh [input] [output] set -x; set -e; shuf $1 > $2; ================================================ FILE: data/dianping/sort_gram_count.sh ================================================ #!/bin/bash # Sort distributed grams file # Copyright 2016 Xiang Zhang # # Usage: bash sort_gram_count.sh [input_directory] [output_directory] [temporary] [memory] set -x; set -e; for file in $1/*.csv; do sort -S ${4:-50%} -t ',' -k1,1 -T ${3:-/scratch} $file > $2/`basename $file` done; ================================================ FILE: data/dianping/sort_gram_list.sh ================================================ #!/bin/bash # Sort list of grams and cut the count # Copyright 2016 Xiang Zhang # # Usage: bash sort_gram_list.sh [input] [output] [temporary] [memory] set -x; set -e; sort -S ${4:-50%} -t ',' -k1,1nr -T ${3:-/scratch} $1 | cut -f 2- -d ',' > $2; ================================================ FILE: data/dianping/split_lines.sh ================================================ #!/bin/bash # Split lines in a text file # Copyright 2017 Xiang Zhang # # Usage: bash split_lines.sh [lines] [input] [output_prefix] # # Note: .txt postfix will be automatically added. set -x; set -e; split -d -a 1 --additional-suffix=.txt -l $1 $2 $3; ================================================ FILE: data/dianping/split_train.lua ================================================ --[[ Split data into training and testing subsets Copyright 2015 Xiang Zhang Usage: th split_train [count] [input] [train] [test] --]] local io = require('io') local math = require('math') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local count = arg[1] or '../data/dianping/data_count.csv' local input = arg[2] or '../data/dianping/data.csv' local train = arg[3] or '../data/dianping/train.csv' local test = arg[4] or '../data/dianping/test.csv' local index = {} local cfd = io.open(count) for line in cfd:lines() do local content = joe.parseCSVLine(line) local class = tonumber(content[1]) local total = tonumber(content[2]) local train_count = tonumber(content[3]) local test_count = tonumber(content[4]) print('Constructing index '..class..': '..train_count.. ','..test_count..','..total) index[class] = torch.ByteTensor(total):zero() local perm = torch.randperm(total) for i = 1, test_count do index[class][perm[i]] = 1 end end cfd:close() local n = 0 local progress = {} local ifd = io.open(input) local trfd = io.open(train, 'w') local tefd = io.open(test, 'w') for line in ifd:lines() do n = n + 1 if math.fmod(n, 100000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) progress[class] = progress[class] and progress[class] + 1 or 1 if index[class] and index[class][progress[class]] == 0 then trfd:write(line, '\n') else tefd:write(line, '\n') end end print('\rProcessed lines: '..n) ifd:close() trfd:close() tefd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine (line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/ifeng/construct_topic.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of news articles Copyright 2016 Xiang Zhang Usage: python3 construct_topic.py -i [input directory] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/ifeng/article' OUTPUT = '../data/ifeng/topic/news.csv' # Classes # 1: Mainlaind China Politics # 2: International # 3: Taiwan, Hong Kong and Macau Politics # 4: Military # 5: Society CLASSES = {'11528': 1, '11574': 2, '11490': 3, '7609': 3, '4550': 4, '7837': 5} def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file pattern', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output createData() def createData(): # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files for prefix in CLASSES: files = glob.glob(INPUT + '/' + prefix + '_*.json.xz') index = CLASSES[prefix] n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: news = json.loads(line) title = news.get('title', '') content = news.get('content', list()) abstract = '' if len(content) > 0: abstract = content[0] n = n + 1 writer.writerow([index, title.replace('\n', '\\n'), abstract.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/jd/count_data.lua ================================================ --[[ Count data for each class and length Copyright 2016 Xiang Zhang Usage: th count_data.lua [input] [output] --]] local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/jd/sentiment/comment_sorted_nonull.csv' local output = arg[2] or '../data/jd/sentiment/comment_sorted_count.t7b' print('Counting data') local count = joe.count(input) joe.count = count print('Saving to '..output) torch.save(output, count) print('Plotting result') joe.plot(count) end function joe.count(input) local count = {} local max_class = 0 local max_length = 0 local fd = io.open(input) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 100000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) local length = 0 for i = 2, #content do length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len() end count[class] = count[class] or {} count[class][length] = (count[class][length] or 0) + 1 if class > max_class then max_class = class end if length > max_length then max_length = length end end print('\rProcessed lines: '..n) print('total classes = '..max_class..', maximum length = '..max_length) fd:close() local result = torch.Tensor(max_class, max_length):zero() for class, class_count in pairs(count) do if class > 0 then for length, length_count in pairs(class_count) do if length > 0 then result[class][length] = length_count end end end end return result end function joe.plot(count) require('gnuplot') local cumulated = count:cumsum(2) local plots = {} for class = 1, cumulated:size(1) do plots[class] = {tostring(class), cumulated[class], '-'} end local figure = gnuplot.figure() gnuplot.plot(unpack(plots)) end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine (line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/jd/create_comment.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of comments Copyright 2016 Xiang Zhang Usage: python3 create_data.py -i [input file pattern] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/jd/comment/*.json.xz' OUTPUT = '../data/jd/sentiment/comment.csv' def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file pattern', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output createData() def createData(): # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files files = glob.glob(INPUT) n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: review = json.loads(line) score = int(review['content'].get('score', -1)) title = review['content'].get('title', '') content = review['content'].get('content', '') if score != -1: n = n + 1 writer.writerow([score, title.replace('\n', '\\n'), content.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/jd/limit_length.lua ================================================ --[[ Limit length for data Copyright 2016 Xiang Zhang Usage: th limit_length.lua [input] [output] [min] [max] --]] -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/jd/sentiment/comment_sorted_nonull.csv' local output = arg[2] or '../data/jd/sentiment/comment_sorted_limited.csv' local min = tonumber(arg[3] or 0) local max = tonumber(arg[4] or math.huge) print('Limiting data') joe.limit(input, output, min, max) end function joe.limit(input, output, min, max) local ifd = io.open(input) local ofd = io.open(output, 'w') local n = 0 local m = 0 for line in ifd:lines() do n = n + 1 local content = joe.parseCSVLine(line) local length = 0 for i = 2, #content do length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len() end if length >= min and length <= max then m = m + 1 ofd:write(line, '\n') end if math.fmod(n, 100000) == 0 then io.write('\rProcessing line: ', n, ', Saved lines: ', m) io.flush() end end print('\rProcessed lines: '..n..', Saved lines: '..m) ifd:close() ofd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine (line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/jd/sort_data.sh ================================================ #!/bin/bash # Sort comma-separated file starting from the second field # Copyright 2016 Xiang Zhang # # Usage: bash sort_data.sh [input_file] [output_file] [temporary] [memory] set -x; set -e; sort -S ${4:-50%} -t ',' -k2 -u -T ${3:-/scratch} $1 > $2; ================================================ FILE: data/joint/combine_word.lua ================================================ --[[ Combine two word data together Copyright 2016 Xiang Zhang Usage: th combine_word_list.lua [input_1] [list_1] [input_2] [list_2] ... [output] [list] --]] local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = {} local input_list = {} for i = 1, math.floor(#arg / 2) - 1 do input[i] = arg[2 * i - 1] input_list[i] = arg[2 * i] end local output = arg[math.floor(#arg / 2) * 2 - 1] or '../data/joint/binary_train_word.csv' local output_list = arg[math.floor(#arg / 2) * 2] or '../data/joint/binary_train_word_list.csv' print('Loading output list from '..output_list) local list, count, freq, dict = joe.readList(output_list) print('Opening output file '..output) local ofd = io.open(output, 'w') for i = 1, #input do print('Loading input list from '..input_list[i]) local local_list, local_count, local_freq, local_dict = joe.readList(input_list[i]) print('Building input to output map') local map = joe.buildMap(local_list, dict) print('Processing data from '..input[i]) joe.processInput(input[i], map, ofd, list) end print('Closing output file '..output) ofd:close() end function joe.readList(file) local list = tds.Vec() local count = tds.Vec() local freq = tds.Vec() local dict = tds.Hash() local fd = io.open(file) for line in fd:lines() do local content = joe.parseCSVLine(line) content[1] = content[1]:gsub('\\n', '\n') list:insert(content[1]) count:insert(tonumber(content[2])) freq:insert(tonumber(content[3])) dict[content[1]] = #list end fd:close() return list, count, freq, dict end function joe.buildMap(input_list, dict) local map = tds.Vec() for i = 1, #input_list do map[i] = dict[input_list[i]] end return map end function joe.processInput(input, map, ofd, list) local ifd = io.open(input) local n = 0 for line in ifd:lines() do n = n + 1 if math.fmod(n, 10000) == 0 then io.write('\rProcessing line: ', n) io.flush() end -- Write class local content = joe.parseCSVLine(line) ofd:write('"', content[1], '"') -- Write title and comment for i = 2, #content do ofd:write(',"') for word in content[i]:gmatch('%d+') do ofd:write(map[tonumber(word)] or #list + 1, ' ') end ofd:write('"') end -- Write end of line ofd:write('\n') end print('\rProcessed lines: '..n) ifd:close() end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/joint/combine_word_list.lua ================================================ --[[ Combine two word data together Copyright 2016 Xiang Zhang Usage: th combine_word_list.lua [list_1] [size_1] [list_2] [size_2] ... [output] --]] local io = require('io') local math = require('math') local tds = require('tds') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input_list = {} local input_size = {} for i = 1, math.floor(#arg / 2) do input_list[i] = arg[2 * i - 1] input_size[i] = arg[2 * i] end local output_list = arg[math.floor(#arg / 2) * 2 + 1] or '../data/joint/binary_train_word_list.csv' local word = {} for i = 1, #input_list do print('Loading list from '..input_list[i]) local list, count, freq, dict = joe.readInputList(input_list[i]) word[i] = {list = list, count = count, freq = freq, dict = dict} end print('Merging word lists') local list, count_table, freq_table, dict = joe.mergeWords(word, input_size) print('Writing merged word list to '..output_list) joe.writeOutputList(output_list, list, count_table, freq_table, dict) end function joe.readInputList(file) local list = tds.Vec() local count = tds.Vec() local freq = tds.Vec() local dict = tds.Hash() local fd = io.open(file) for line in fd:lines() do local content = joe.parseCSVLine(line) content[1] = content[1]:gsub('\\n', '\n') list:insert(content[1]) count:insert(tonumber(content[2])) freq:insert(tonumber(content[3])) dict[content[1]] = #list end fd:close() return list, count, freq, dict end function joe.writeOutputList(file, list, count_table, freq_table, dict) local fd = io.open(file, 'w') for index, word in ipairs(list) do fd:write('"', word:gsub('\n', '\\n'):gsub('"', '""'), '","', count_table[word], '","', freq_table[word], '"\n') end fd:close() end function joe.mergeWords(word, size) local total_size = 0 for i, s in ipairs(size) do total_size = total_size + s end local list = tds.Vec() local count_table = tds.Hash() local freq_table = tds.Hash() for i, w in ipairs(word) do for j, v in ipairs(w.list) do if count_table[v] == nil then list:insert(v) count_table[v] = w.count[j] freq_table[v] = w.freq[j] * size[i] / total_size else count_table[v] = count_table[v] + w.count[j] freq_table[v] = freq_table[v] + w.freq[j] * size[i] / total_size end if math.fmod(j, 100000) == 0 then io.write('\rProcessing list ', i, ': ', j, '/', #w.list) io.flush() end end print('\rProcessed list '..i..': '..(#w.list)..'/'..(#w.list)) end print('Sorting merged word list') list:sort(function(a, b) return count_table[a] > count_table[b] end) print('Constructing merged word dictionary') local dict = tds.Hash() for i, w in ipairs(list) do dict[w] = i end return list, count_table, freq_table, dict end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine(line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/nytimes/construct_topic.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of news articles Copyright 2016 Xiang Zhang Usage: python3 construct_topic.py -i [input directory] -o [output file] ''' import argparse import csv import glob import json import lzma import re import urllib.parse INPUT = '../data/nytimes/article' OUTPUT = '../data/nytimes/topic/news.csv' CLASS = '../data/nytimes/topic/class.csv' def main(): global INPUT global OUTPUT global CLASS parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file directory', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) parser.add_argument( '-c', '--classes', help = 'Class file', default = CLASS) args = parser.parse_args() INPUT = args.input OUTPUT = args.output CLASS = args.classes createData() def createData(): # Open the category file classes = dict() count = 0 # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files files = glob.glob(INPUT + '/*.json.xz') n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: news = json.loads(line) title = news.get('title', '') content = news.get('content', list()) abstract = '' if len(content) > 0: abstract = content[0] url = news.get('url', '') if url != '': path = urllib.parse.urlparse(url).path start_match = re.match(r'/\d\d\d\d/\d\d/\d\d/', path) end_match = re.match(r'/\d\d\d\d/\d\d/\d\d/[^/]+', path) if start_match != None and end_match != None: classname = path[start_match.end():end_match.end()] if classes.get(classname, None) == None: classes[classname] = count + 1 count = count + 1 index = classes[classname] writer.writerow([index, title.replace('\n', '\\n'), abstract.replace('\n', '\\n')]) n = n + 1 ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() # Open the class file cfd = open(CLASS, 'w', newline = '', encoding = 'utf-8') class_writer = csv.writer( cfd, quoting = csv.QUOTE_ALL, lineterminator = '\n') for key in classes: class_writer.writerow([classes[key], key]) cfd.close() if __name__ == '__main__': main() ================================================ FILE: data/nytimes/count_class.lua ================================================ --[[ Count data for each class and length Copyright 2016 Xiang Zhang Usage: th count_data.lua [input] [output] --]] local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or '../data/nytimes/topic/news_sorted.csv' local output = arg[2] or '../data/nytimes/topic/news_sorted_class.t7b' print('Counting data') local count = joe.count(input) joe.count = count print('Saving to '..output) torch.save(output, count) end function joe.count(input) local count = {} local fd = io.open(input) local n = 0 for line in fd:lines() do n = n + 1 if math.fmod(n, 100000) == 0 then io.write('\rProcessing line: ', n) io.flush() end local content = joe.parseCSVLine(line) local class = tonumber(content[1]) local length = 0 for i = 2, #content do length = length + content[i]:gsub("^%s*(.-)%s*$", "%1"):len() end count[class] = (count[class] or 0) + 1 end print('\rProcessed lines: '..n) fd:close() return count end -- Parsing csv line -- Ref: http://lua-users.org/wiki/LuaCsv function joe.parseCSVLine (line,sep) local res = {} local pos = 1 sep = sep or ',' while true do local c = string.sub(line,pos,pos) if (c == "") then break end if (c == '"') then -- quoted value (ignore separator within) local txt = "" repeat local startp,endp = string.find(line,'^%b""',pos) txt = txt..string.sub(line,startp+1,endp-1) pos = endp + 1 c = string.sub(line,pos,pos) if (c == '"') then txt = txt..'"' end -- check first char AFTER quoted string, if it is another -- quoted string without separator, then append it -- this is the way to "escape" the quote char in a quote. until (c ~= '"') table.insert(res,txt) assert(c == sep or c == "") pos = pos + 1 else -- no quotes used, just look for the first separator local startp,endp = string.find(line,sep,pos) if (startp) then table.insert(res,string.sub(line,pos,startp-1)) pos = endp + 1 else -- no separator found -> use rest of string and terminate table.insert(res,string.sub(line,pos)) break end end end return res end joe.main() return joe ================================================ FILE: data/rakuten/construct_hepburn.py ================================================ #!/usr/bin/python3 ''' Convert Japanese datasets to Hepburn Romanization Copyright 2016 Xiang Zhang Usage: python3 construct_hepburn.py -i [input] -o [output] ''' # Input file INPUT = '../data/rakuten/sentiment/full_train.csv' # Output file OUTPUT = '../data/rakuten/sentiment/full_train_hepburn.csv' import argparse import csv import MeCab import romkan import unidecode # Main program def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output mecab = MeCab.Tagger() convertRoman(mecab) def romanizeText(mecab, text): parsed = mecab.parse(text) result = list() for token in parsed.split('\n'): splitted = token.split('\t') if len(splitted) == 2: word = splitted[0] features = splitted[1].split(',') if len(features) > 7 and features[7] != '*': result.append(romkan.to_hepburn(features[7])) else: result.append(word) return result # Convert the text in Chinese to pintin def convertRoman(mecab): # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): new_row.append(' '.join(map( str.strip, map(lambda s: s.replace('\n', '\\n'), map(unidecode.unidecode, romanizeText(mecab, row[i])))))) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) if __name__ == '__main__': main() ================================================ FILE: data/rakuten/create_review.py ================================================ #!/usr/bin/python3 ''' Create data from list of LZMA compressed archives of reviews Copyright 2016 Xiang Zhang Usage: python3 create_data.py -i [input file pattern] -o [output file] ''' import argparse import csv import glob import json import lzma INPUT = '../data/rakuten/review/*.json.xz' OUTPUT = '../data/rakuten/sentiment/review.csv' def main(): global INPUT global OUTPUT parser = argparse.ArgumentParser() parser.add_argument( '-i', '--input', help = 'Input file pattern', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) args = parser.parse_args() INPUT = args.input OUTPUT = args.output createData() def createData(): # Open the output file ofd = open(OUTPUT, 'w', newline = '', encoding = 'utf-8') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Grab the files files = glob.glob(INPUT) n = 0 filecount = 0 for filename in files: filecount = filecount + 1 print('Processing file {}/{}: {}. Processed items {}.'.format( filecount, len(files), filename, n)) try: ifd = lzma.open(filename, 'rt', encoding = 'utf-8') for line in ifd: review = json.loads(line) rate = review.get('rate', '') title = review.get('title', '') comment = review.get('comment', '') if rate != '': n = n + 1 writer.writerow([rate, title.replace('\n', '\\n'), comment.replace('\n', '\\n')]) ifd.close() except Exception as e: print('Exception (ignored): {}'.format(e)) ofd.close() if __name__ == '__main__': main() ================================================ FILE: data/rakuten/segment_word.py ================================================ #!/usr/bin/python3 ''' Convert Japanese datasets to Index of Words Copyright 2016 Xiang Zhang Usage: python3 construct_pinyin.py -i [input] -l [list] -o [output] [-r] ''' #Input file INPUT = '../data/rakuten/sentiment/full_train.csv' #Output file OUTPUT = '../data/rakuten/sentiment/full_train_word.csv' # List file LIST = '../data/rakuten/sentiment/full_train_word_list.csv' # Read already defined word list READ = False import argparse import csv import MeCab # Main program def main(): global INPUT global OUTPUT global LIST parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file', default = INPUT) parser.add_argument( '-o', '--output', help = 'Output file', default = OUTPUT) parser.add_argument('-l', '--list', help = 'Word list file', default = LIST) parser.add_argument( '-r', '--read', help = 'Read from list file', action = 'store_true') args = parser.parse_args() INPUT = args.input OUTPUT = args.output LIST = args.list READ = args.read if READ: print('Reading word index') word_index = readWords() else: print('Counting words') word_count, word_freq = segmentWords() print('Sorting words by count') word_index = sortWords(word_count, word_freq) print('Constructing word index output') convertWords(word_index) # Read from pre-existing word list def readWords(): # Open the files ifd = open(LIST, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_index = dict() n = 0 for row in reader: word = row[0].replace('\\n', '\n') word_index[word] = n + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) return word_index # Segment the text in Chinese def segmentWords(): mecab = MeCab.Tagger() # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) # Loop over the csv rows word_count = dict() word_freq = dict() n = 0 for row in reader: field_set = set() for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = list() parsed_result = mecab.parse(field) for token in parsed_result.split('\n'): splitted_token = token.split('\t') if len(splitted_token) == 2: word = splitted_token[0] field_list.append(word) for word in field_list: word_count[word] = word_count.get(word, 0) + 1 if word not in field_set: field_set.add(word) word_freq[word] = word_freq.get(word, 0) + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() # Normalizing word frequency for word in word_freq: word_freq[word] = float(word_freq[word]) / float(n) return word_count, word_freq # Sort words for a given count dictionary object def sortWords(word_count, word_freq): # Sort the words word_list = sorted( word_count, key = lambda word: word_count[word], reverse = True) # Open the files ofd = open(LIST, 'w', encoding = 'utf-8', newline = '') writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over all the words word_index = dict() n = 0 for i in range(len(word_list)): word = word_list[i] row = [word.replace('\n', '\\n'), str(word_count[word]), str(word_freq[word])] writer.writerow(row) word_index[word] = i + 1 n = n + 1 if n % 1000 == 0: print('\rProcessing word: {}'.format(n), end = '') print('\rProcessed words: {}'.format(n)) ofd.close() return word_index # Convert the text in Chinese to word list def convertWords(word_index): mecab = MeCab.Tagger() # Open the files ifd = open(INPUT, encoding = 'utf-8', newline = '') ofd = open(OUTPUT, 'w', encoding = 'utf-8', newline = '') reader = csv.reader(ifd, quoting = csv.QUOTE_ALL) writer = csv.writer(ofd, quoting = csv.QUOTE_ALL, lineterminator = '\n') # Loop over the csv rows n = 0 for row in reader: new_row = list() new_row.append(row[0]) for i in range(1, len(row)): field = row[i].replace('\\n', '\n') field_list = list() parsed_result = mecab.parse(field) for token in parsed_result.split('\n'): splitted_token = token.split('\t') if len(splitted_token) == 2: word = splitted_token[0] field_list.append(word) new_row.append(' '.join(map( str, map(lambda word: word_index.get(word, len(word_index) + 1), field_list)))) writer.writerow(new_row) n = n + 1 if n % 1000 == 0: print('\rProcessing line: {}'.format(n), end = '') print('\rProcessed lines: {}'.format(n)) ifd.close() ofd.close() if __name__ == '__main__': main() ================================================ FILE: doc/dianping.md ================================================ # Dianping This documentation contains information on how to reproduce all the results for the `Dianping` datasets in the paper. The root directory `/` in this documentation indicates the root directory of this repository. ## Download the dataset Original text data for training and testing are available via these two links: [`train.csv.xz`](https://goo.gl/uKPxyo) [`test.csv.xz`](https://goo.gl/2QZpLx). When you download them, make sure to put them in the `/data/data/dianping` directory and unxz so that you have `train.csv` and `test.csv` available. ## GlyphNet This section introduces how to prepare and run GlyphNet experiments. ### Prepare GNU Unifont Running the glyphnet training script requires the GNU Unifont character images. We have built these images into a Torch 7 binary serialization file and it can be download via this link: [`unifont-8.0.1.t7b.xz`](https://goo.gl/aFxYHq). After downloading, put it in `/unifont/unifont` directory and unxz so that you have `unifont-8.0.1.t7b` available. ### Build Byte Serialization Files The next step is to build the serialized code files. The first step is to build the string serialization files. Switch to the `/data/dianping` directory, then execute the following commands ```bash th construct_string.lua ../data/dianping/train.csv ../data/dianping/train_string.t7b th construct_string.lua ../data/dianping/test.csv ../data/dianping/test_string.t7b ``` These 2 commands will build byte serialization files for the samples in its original language. It assumes the texts are contained in a comma-separated-value format in which the first field is treated as the class index (starting from 1), and the remaining fields are all texts. The output files contain a lua table that has the following members * `index`: a table that contains index tensors for each class. For example `index[i]` is an n x m x 2 `LongTensor` that contains the starting position and length of byte string representing each sample in class i. We assume that class i contains n samples, and there are m text fields in the CSV file. * `content`: a `ByteTensor` that contains the serialization of the strings of all samples. Each string is ended with a 0 byte, which is not included in the length count in `index`. ### Build Unicode Serialization Files From this byte-level serialization, we will be able to construct serialization files that contain unicode values to be used in the `glyphnet` training scripts. To do this, execute the following 2 commands ```bash th construct_code.lua ../data/dianping/train_string.t7b ../data/dianping/train_code.t7b th construct_code.lua ../data/dianping/test_string.t7b ../data/dianping/test_code.t7b ``` Each of these code files contain a lua table that has 2 `LongTensor` members: `code` and `code_value`. The have a similar structure as the `index` and `content` members of the byte serialization files, but in this case they are for unicode values. ### Execute the Experiments Then, you can switch to `/glyphnet`, and execute the following scripts to run the training program for the large GlyphNet ```bash mkdir -p models/dianping/spatial8temporal12length512feature256 ./archive/dianping_spatial8temporal12length512feature256.sh ``` The first command simply creates a directory where checkpointing files will be written into during training. Note that the shell scripts also accepts command-line parameters and can pass it directly to the training program. The most useful ones are probably `-driver_visualize false` and `-driver_plot false`, that disable visualization and plotting so that you can run the training programs on a headless server. You can also use `-driver_resume true` to resume from checkpointed experiments. These parameters are available for all Torch 7 training programs. Similarly, the following commands execute the experiment for the small GlyphNet ```bash mkdir -p models/dianping/spatial6temporal8length486feature256 ./archive/dianping_spatial6temporal8length486feature256.sh ``` ## OnehotNet This section details how to execute OnehotNet experiments. Note that OnehotNet in this article are operating at byte-level for either the original text or the romanized text. In the case of romanized text, it is the same as character-level. ### Byte-Level OnehotNet for Original Text To train OnehotNet for the original text, we only need the previously built byte serialization files. If you do not have them, see previous sections for using `construct_string.lua` data processing scripts. #### Execute the Experiments Assuming your current working directory is `/onehotnet`, the following commands execute experiments for large OnehotNet on the original text samples. ```bash mkdir -p models/dianping/onehot4temporal12length2048feature256 ./archive/dianping_onehot4temporal12length2048feature256.sh ``` Similarly, the small OnehotNet experiments can be done using the following commands ```bash mkdir -p models/dianping/onehot4temporal8length1944feature256 ./archive/dianping/onehot4temporal8length19444feature256.sh ``` ### Character-Level OnehotNet for Romanized Text This section details how to execute OnehotNet for romanized text. But before that, we need to build the romanized data first. #### Build Romanized Text Serialization Files The first step is to convert the original text into a romanization format. This is done in this project automatically using the [`pypinyin`](https://github.com/mozillazg/python-pinyin) package (version 0.12 for the results in the paper). You also want to install [`jieba`](https://github.com/fxsjy/jieba) (version 0.38 for the results in the paper) so that `pypinyin` can use it for word segmentation. All these packages were installed in a Python 3 environment. Switch the working directory to `/data/dianping`, the following commands converting the original text to a romanization format for the Dianping dataset. ```bash python3 construct_pinyin.py -i ../data/dianping/train.csv -o ../data/dianping/train_pinyin.csv python3 construct_pinyin.py -i ../data/dianping/test.csv -o ../data/dianping/test_pinyin.csv ``` Then, we can use `construct_string.lua` again for constructing the byte serialization of romanized texts. ```bash th construct_string.lua ../data/dianping/train_pinyin.csv ../data/dianping/train_pinyin_string.t7b th construct_string.lua ../data/dianping/test_pinyin.csv ../data/dianping/test_pinyin_string.t7b ``` #### Execute the Experiments Assuming your current working directory is `/onehotnet`, the following commands execute experiments for large OnehotNet on the romanized text samples. ```bash mkdir -p models/dianping/onehot4temporal12length2048feature256roman ./archive/dianping_onehot4temporal12length2048feature256roman.sh ``` Similarly, the small OnehotNet experiments can be done using the following commands ```bash mkdir -p models/dianping/onehot4temporal8length1944feature256roman ./archive/dianping/onehot4temporal8length19444feature256roman.sh ``` ## EmbedNet This section introduces how to build the data files and executing experiments for EmbedNet. ### Character-Level EmbedNet for Original Text Since we already built the serialization data files for unicode characters for GlyphNet, we can directly use them. The only step required is to run the commands for training the models. Assuming the current working directory is `/embednet`, the following commands will start the training process for large character-level EmbedNet. ```bash mkdir -p models/dianping/temporal12length512feature256 ./archive/dianping_temporal12length512feature256.sh ``` And for small character-level EmbedNet ```bash mkdir -p models/dianping/temporal8length486feature256 ./archive/dianping_temporal8length486feature256.sh ``` ### Byte-Level EmbedNet for Original Text This section details how to train byte-level EmbedNet for the original text #### Convert Byte Serialization Files Since the EmbedNet training program assumes the data files contain a table of 2 members `code` and `code_value`, we need to change the variable names in the string serialization files to match this. This can be done in `/data/dianping` by executing the following commands ```bash th convert_string_code.lua ../data/dianping/train_string.t7b ../data/dianping/train_string_code.t7b th convert_string_code.lua ../data/dianping/test_string.t7b ../data/dianping/test_string_code.t7b ``` #### Execute the Experiments Assuming the current working director is `/embednet`, the following commands start the training process for the large byte-level EmbedNet ```bash mkdir -p models/dianping/temporal12length512feature256byte ./archive/dianping_temporal12length512feature256byte.sh ``` And for small byte-level EmbedNet ```bash mkdir -p models/dianping/temporal8length486feature256byte ./archive/dianping_temporal8length486feature256byte.sh ``` ### Character-Level EmbedNet for Romanized Text Note that characters for romanized text is the same as bytes. Therefore, the steps are exactly the same as the byte-level EmbedNet, except for romanized text instead of original text. #### Convert Byte Serialization Files In `/data/dianping`, execute the following commands ```bash th convert_string_code.lua ../data/dianping/train_pinyin_string.t7b ../data/dianping/train_pinyin_string_code.t7b th convert_string_code.lua ../data/dianping/test_pinyin_string.t7b ../data/dianping/test_pinyin_string_code.t7b ``` #### Execute the Experiments Assuming the current working director is `/embednet`, the following commands start the training process for the large character-level EmbedNet for romanized text ```bash mkdir -p models/dianping/temporal12length512feature256roman ./archive/dianping_temporal12length512feature256roman.sh ``` And for small EmbedNet ```bash mkdir -p models/dianping/temporal8length486feature256roman ./archive/dianping_temporal8length486feature256roman.sh ``` ### Word-Level Embednet for Original Text This section introduces how to segment word from the text, build the word serialization files, and execute the commands. #### Build Word Serialization Files for Original Text The first step for building the word serialization files is to segment the words. This is done by executing a Python 3 script as follows, assuming you have the [`jieba`](https://github.com/fxsjy/jieba) package installed (version 0.38 for the results in the paper) and the working directory is `/data/dianping`. ```bash python3 segment_word.py -i ../data/dianping/train.csv -o ../data/dianping/train_word.csv -l ../data/dianping/train_word_list.csv python3 segment_word.py -i ../data/dianping/test.csv -o ../data/dianping/test_word.csv -l ../data/dianping/train_word_list.csv -r ``` The first command generate 2 data files. `train_word.csv` is a file containing sequences of indices of segmented words from the original text fields, whereas `train_word_list.csv` contains the list of words. The second command read the same list of words generated from the training data (therefore the `-r` option) and use that list to build sequences for the testing data. This is done deliberately so that new words not in the training data are not considered for classification results. The second step is to build the word serialization files from the segmentation results. ```bash th construct_word.lua ../data/dianping/train_word.csv ../data/dianping/train_word.t7b th construct_word.lua ../data/dianping/test_word.csv ../data/dianping/test_word.t7b ``` #### Execute the Experiments When we have `train_word.t7b` and `test_word.t7b`, we can start executing the experiments for word-level EmbedNet models. Assume that the current directory is `/embednet`, the following commands start the training process for the large word-level EmbedNet for original text ```bash mkdir -p models/dianping/temporal12length512feature256word ./archive/dianping_temporal12length512feature256word.sh ``` And for small EmbedNet ```bash mkdir -p models/dianping/temporal8length486feature256word ./archive/dianping_temporal8length486feature256word.sh ``` ### Word-Level EmbedNet for Romanized Text Similar to the original text, romanized text also require word segmentation before being able to pass through the EmbedNet training program. #### Build Word Serialization Files for Romanized Text Word segmentation for romanized text is pretty simple. Assume you are in `/data/dianping`, the following commands do the job ```bash th segment_roman_word.lua ../data/dianping/train_pinyin.csv ../data/dianping/train_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv th segment_roman_word.lua ../data/dianping/test_pinyin.csv ../data/dianping/test_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv true ``` Note the additional `true` argument in the second command-line to inform the script to use the training word list for constructing the indices for the testing data. Then, word serialization files can be built from the segmentation results using the following commands. ```bash th construct_word.lua ../data/dianping/train_pinyin_word.csv ../data/dianping/train_pinyin_word.t7b th construct_word.lua ../data/dianping/test_pinyin_word.csv ../data/dianping/test_pinyin_word.t7b ``` #### Execute the Experiments When we have `train_pinyinword.t7b` and `test_pinyinword.t7b`, we can start executing the experiments for word-level EmbedNet models. Assume that the current directory is `/embednet`, the following commands start the training process for the large word-level EmbedNet for original text ```bash mkdir -p models/dianping/temporal12length512feature256romanword ./archive/dianping_temporal12length512feature256romanword.sh ``` And for small EmbedNet ```bash mkdir -p models/dianping/temporal8length486feature256romanword ./archive/dianping_temporal8length486feature256romanword.sh ``` ## Linear Model This section details how to reproduce the results for linear models. ### Character-Level 1-Gram Linear Model for Original Text To run the linear model for using bag-of-character features, we need to build the feature serialization files first. #### Build Character-Level 1-Gram Feature Serialization Files To build the character-level 1-gram feature serialization files, execute the following commands from `/data/dianping`. ```bash th construct_charbag.lua ../data/dianping/train_code.t7b ../data/dianping/train_charbag.t7b ../data/dianping/train_charbag_list.csv th construct_charbag.lua ../data/dianping/test_code.t7b ../data/dianping/test_charbag.t7b ../data/dianping/train_charbag_list.csv true ``` The first command creates a file `train_charbag.t7b`, which contains a table that has the following members * `bag`: a table where `bag[i]` contains a n-by-2 `LongTensor`. It contains the beginning index and length of values in `bag_index` and `bag_value` for each sample. * `bag_index`: a 1-D `LongTensor` that contains the character indices of all samples. * `bag_value`: a 1-D `DoubleTensor` that contains the frequency of the corresponding character indices. The seond command creates the feature serialization file for testing data, but using the same character index that was created from training data. The additional `true` parameter means to read from list rather than create a new one. All of the feature serialization files for linear models has the same data structure design. To prepare feature serialization files for the TFIDF variant of bag-of-character linear model, execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_charbag.t7b ../data/dianping/train_charbagtfidf.t7b ../data/dianping/train_charbag_list.csv th construct_tfidf.lua ../data/dianping/test_charbag.t7b ../data/dianping/test_charbagtfidf.t7b ../data/dianping/train_charbag_list.csv ``` Note that constructing serialization files for testing data still uses the character frequency list from training data. #### Execute the Experiments To execute the experiment for character-level 1-gram linear model, execute the following commands from `/linearnet` ```bash mkdir -p models/dianping/charbag ./archive/dianping_charbag.sh ``` To execute the experiment for the TFIDF version, execute the following command from `/linearnet` ```bash mkdir -p models/dianping/charbagtfidf ./archive/dianping_charbagtfidf.sh ``` ### Character-Level 5-Gram Linear Model for Original Text Before being able to execute the 5-gram experiments, we have to build the feature serialization files first. #### Build Character-Level 5-Gram Feature Serialization Files In this work, 5-gram features actually mean features of grams from 1 to 5. It is usually infeasible to store all of these feature in memory, and building the features coud take a significant amount of time. Therefore, we build a list of grams ranked by their frequency via a multi-threaded program first, and then build the 5-gram feature serialization files using it. To build the list of character grams, execute the following commands from `/data/dianping` ```bash mkdir -p ../data/dianping/train_chargram_count th count_chargram.lua ../data/dianping/train_code.t7b ../data/dianping/train_chargram_count/ mkdir -p ../data/dianping/train_chargram_count_sort ./sort_gram_count.sh ../data/dianping/train_chargram_count ../data/dianping/train_chargram_count_sort /tmp th combine_gram_count.lua ../data/dianping/train_chargram_count_sort/ ../data/dianping/train_chargram_count_combine.csv ./sort_gram_list.sh ../data/dianping/train_chargram_count_combine.csv ../data/dianping/train_chargram_list.csv ./limit_csv_lines.sh ../data/dianping/train_chargram_list.csv ../data/dianping/train_chargram_list_limit.csv 1000001 ``` The commands proceeds by first using 10 threads to construct chunks of counts of character grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000. Then, you can build the character-level 5-gram feature serialization files using the following commands from `/data/dianping` ```bash th construct_chargram.lua ../data/dianping/train_code.t7b ../data/dianping/train_chargram.t7b ../data/dianping/train_chargram_list_limit.csv th construct_chargram.lua ../data/dianping/test_code.t7b ../data/dianping/test_chargram.t7b ../data/dianping/train_chargram_list_limit.csv ``` Note that the features for testing data are built using the gram list from the training data. To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_chargram.t7b ../data/dianping/train_chargramtfidf.t7b ../data/dianping/train_chargram_list_limit.csv 1000000 th construct_tfidf.lua ../data/dianping/test_chargram.t7b ../data/dianping/test_chargramtfidf.t7b ../data/dianping/train_chargram_list_limit.csv 1000000 ``` #### Execute the Experiments To execute the experiment for character-level 5-gram linear model, run the following commands from `/linearnet` ```bash mkdir -p models/dianping/chargram ./archive/dianping_chargram.sh ``` And for the TFIDF version ```bash mkdir -p models/dianping/chargramtfidf ./archive/dianping_chargramtfidf.sh ``` ### Word-Level 1-Gram Linear Model for Original Text This section first introduces how to build bag-of-word features, and then details how to execute the experiments. #### Build Word-Level 1-Gram Feature Serialization Files The following commands from `/data/dianping` can create the word-level 1-gram features for linear model ```bash th construct_wordbag.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordbag.t7b 200000 200001 th construct_wordbag.lua ../data/dianping/test_word.t7b ../data/dianping/test_wordbag.t7b 200000 200001 ``` This is possible because the word segmentation process previously done for word-level EmbedNet already sorts the words by its frequency from the training data. The program also automatically limit the number of features to 200000 and replace all other features to the 200001-th one. To construct the TFIDF feature, simply execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_wordbag.t7b ../data/dianping/train_wordbagtfidf.t7b ../data/dianping/train_word_list.csv 200000 th construct_tfidf.lua ../data/dianping/test_wordbag.t7b ../data/dianping/test_wordbagtfidf.t7b ../data/dianping/train_word_list.csv 200000 ``` #### Execute the Experiments From `/linearnet`, the following commands execute the experiment for bag-of-word model ```bash mkdir -p models/dianping/wordbag ./archive/dianping_wordbag.sh ``` And for the TFIDF version ```bash mkdir -p models/dianping/wordbagtfidf ./archive/dianping_wordbagtfidf.sh ``` ### Word-Level 5-Gram Linear Model for Original Text This section introduces how to build word-level 5-gram feature serialization files and how to execute the experiments. #### Build Word-Level 5-Gram Feature Serialization Files Similar to the character-level 5-gram features, we need a multi-threaded program to build the list of grams first before being able to build the feature serialization files. The list can be built by executing the following commands from `/data/dianping` ```bash mkdir -p ../data/dianping/train_wordgram_count th count_wordgram.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordgram_count/ ../data/dianping/train_word_list.csv mkdir -p ../data/dianping/train_wordgram_count_sort ./sort_gram_count.sh ../data/dianping/train_wordgram_count ../data/dianping/train_wordgram_count_sort /tmp th combine_gram_count.lua ../data/dianping/train_wordgram_count_sort/ ../data/dianping/train_wordgram_count_combine.csv ./sort_gram_list.sh ../data/dianping/train_wordgram_count_combine.csv ../data/dianping/train_wordgram_list.csv ./limit_csv_lines.sh ../data/dianping/train_wordgram_list.csv ../data/dianping/train_wordgram_list_limit.csv 1000001 ``` The commands proceeds by first using 10 threads to construct chunks of counts of word grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000. Then, you can build the word-level 5-gram feature serialization files using the following commands from `/data/dianping` ```bash th construct_wordgram.lua ../data/dianping/train_word.t7b ../data/dianping/train_wordgram.t7b ../data/dianping/train_wordgram_list_limit.csv th construct_wordgram.lua ../data/dianping/test_word.t7b ../data/dianping/test_wordgram.t7b ../data/dianping/train_wordgram_list_limit.csv ``` Note that the features for testing data are built using the gram list from the training data. To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_wordgram.t7b ../data/dianping/train_wordgramtfidf.t7b ../data/dianping/train_wordgram_list_limit.csv 1000000 th construct_tfidf.lua ../data/dianping/test_wordgram.t7b ../data/dianping/test_wordgramtfidf.t7b ../data/dianping/train_wordgram_list_limit.csv 1000000 ``` #### Execute the Experiments To execute the experiment for word-level 5-gram linear model, run the following commands from `/linearnet` ```bash mkdir -p models/dianping/wordgram ./archive/dianping_wordgram.sh ``` And for the TFIDF version ```bash mkdir -p models/dianping/wordgramtfidf ./archive/dianping_wordgramtfidf.sh ``` ### Word-Level 1-Gram Linear Model for Romanized Text This section first introduces how to build bag-of-word features for romanized text, and then details how to execute the experiments. #### Build Word-Level 1-Gram Feature Serialization Files The following commands from `/data/dianping` can create the word-level 1-gram features for romanized text ```bash th construct_wordbag.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordbag.t7b 200000 200001 th construct_wordbag.lua ../data/dianping/test_pinyin_word.t7b ../data/dianping/test_pinyin_wordbag.t7b 200000 200001 ``` This is possible because the word segmentation process previously done for romanized word-level EmbedNet already sorts the words by its frequency from the training data. The program also automatically limit the number of features to 200000 and replace all other features to the 200001-th one. To construct the TFIDF feature, simply execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_pinyin_wordbag.t7b ../data/dianping/train_pinyin_wordbagtfidf.t7b ../data/dianping/train_pinyin_word_list.csv 200000 th construct_tfidf.lua ../data/dianping/test_pinyin_wordbag.t7b ../data/dianping/test_pinyin_wordbagtfidf.t7b ../data/dianping/train_pinyin_word_list.csv 200000 ``` #### Execute the Experiments From `/linearnet`, the following commands execute the experiment for bag-of-word model for romanized text ```bash mkdir -p models/dianping/wordbagroman ./archive/dianping_wordbagroman.sh ``` And for the TFIDF version ```bash mkdir -p models/dianping/wordbagtfidfroman ./archive/dianping_wordbagtfidfroman.sh ``` ### Word-Level 5-Gram Linear Model for Romanized Text This section introduces how to build word-level 5-gram feature serialization files for romanized text and how to execute the experiments. #### Build Word-Level 5-Gram Feature Serialization Files Similar to the character-level 5-gram features, we need a multi-threaded program to build the list of grams first before being able to build the feature serialization files. The list can be built by executing the following commands from `/data/dianping` ```bash mkdir -p ../data/dianping/train_pinyin_wordgram_count th count_wordgram.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordgram_count/ ../data/dianping/train_pinyin_word_list.csv mkdir -p ../data/dianping/train_pinyin_wordgram_count_sort ./sort_gram_count.sh ../data/dianping/train_pinyin_wordgram_count ../data/dianping/train_pinyin_wordgram_count_sort /tmp th combine_gram_count.lua ../data/dianping/train_pinyin_wordgram_count_sort/ ../data/dianping/train_pinyin_wordgram_count_combine.csv ./sort_gram_list.sh ../data/dianping/train_pinyin_wordgram_count_combine.csv ../data/dianping/train_pinyin_wordgram_list.csv ./limit_csv_lines.sh ../data/dianping/train_pinyin_wordgram_list.csv ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000001 ``` The commands proceeds by first using 10 threads to construct chunks of counts of word grams, and then sort and combine them to form the combined list. It is then sorted to list grams by its frequency, and finally we choose the 1,000,001 most frequent ones. This should be enough because we are limiting the number of features in 5-gram models to 1,000,000. Then, you can build the word-level 5-gram feature serialization files for romanized text using the following commands from `/data/dianping` ```bash th construct_wordgram.lua ../data/dianping/train_pinyin_word.t7b ../data/dianping/train_pinyin_wordgram.t7b ../data/dianping/trainpinyin_wordgram_list_limit.csv th construct_wordgram.lua ../data/dianping/test_pinyin_word.t7b ../data/dianping/test_pinyin_wordgram.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv ``` Note that the features for testing data are built using the gram list from the training data. To build the feature serialization files for TFIDF version of the model, execute the following commands from `/data/dianping` ```bash th construct_tfidf.lua ../data/dianping/train_pinyin_wordgram.t7b ../data/dianping/train_pinyin_wordgramtfidf.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000000 th construct_tfidf.lua ../data/dianping/test_pinyin_wordgram.t7b ../data/dianping/test_pinyin_wordgramtfidf.t7b ../data/dianping/train_pinyin_wordgram_list_limit.csv 1000000 ``` #### Execute the Experiments To execute the experiment for word-level 5-gram linear model, run the following commands from `/linearnet` ```bash mkdir -p models/dianping/wordgramroman ./archive/dianping_wordgramroman.sh ``` And for the TFIDF version ```bash mkdir -p models/dianping/wordgramtfidfroman ./archive/dianping_wordgramtfidfroman.sh ``` ## fastText This section introduces how to build the token files and run experiments for the fastText models. Note that before being able to execute the experiments in this section, you must make sure that you have [fastText](https://github.com/facebookresearch/fastText) installed and there is `fasttext` command in your `PATH`. ### Character-Level fastText for Original Text We first build the token files for character-level fastText, and then detail how to execute the experiments. #### Build Character-Level Token Files To build the character token files from the original text files, execute the following commands from `/data/dianping` ```bash th construct_chartoken.lua ../data/dianping/train.csv ../data/dianping/train_chartoken.txt th construct_chartoken.lua ../data/dianping/test.csv ../data/dianping/test_chartoken.txt ``` Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio. ```bash ./shuffle_lines.sh ../data/dianping/train_chartoken.txt ../data/dianping/train_chartoken_shuffle.txt ./split_lines.sh 1800000 ../data/dianping/train_chartoken_shuffle.txt ../data/dianping/train_chartoken_shuffle_split_ ``` Note that the second command above will produce 2 files `train_chartoken_shuffle_split_0.txt` and `train_chartoken_shuffle_split_1.txt`. #### Execute the Experiments To execute the character-level 1-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/charunigram_evaluation ./archive/dianping_charunigram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the character-level 1-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/charunigram_tuned ./archive/dianping_charunigram_tuned.sh ``` To execute the character-level 2-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/charbigram_evaluation ./archive/dianping_charbigram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the character-level 2-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/charbigram_tuned ./archive/dianping_charbigram_tuned.sh ``` To execute the character-level 5-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/charpentagram_evaluation ./archive/dianping_charpentagram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the character-level 5-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/charpentagram_tuned ./archive/dianping_charpentagram_tuned.sh ``` ### Word-Level fastText for Original Text We first build the token files for word-level fastText, and then detail how to execute the experiments. #### Build Word-Level Token Files To build the word token files from the original text files, execute the following commands from `/data/dianping` ```bash th construct_wordtoken.lua ../data/dianping/train_word.csv ../data/dianping/train_word_list.csv ../data/dianping/train_wordtoken.txt th construct_wordtoken.lua ../data/dianping/test_word.csv ../data/dianping/train_word_list.csv ../data/dianping/test_wordtoken.txt ``` Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio. ```bash ./shuffle_lines.sh ../data/dianping/train_wordtoken.txt ../data/dianping/train_wordtoken_shuffle.txt ./split_lines.sh 1800000 ../data/dianping/train_wordtoken_shuffle.txt ../data/dianping/train_wordtoken_shuffle_split_ ``` Note that the second command above will produce 2 files `train_wordtoken_shuffle_split_0.txt` and `train_wordtoken_shuffle_split_1.txt`. #### Execute the Experiments To execute the word-level 1-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordunigram_evaluation ./archive/dianping_wordunigram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 1-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordunigram_tuned ./archive/dianping_wordunigram_tuned.sh ``` To execute the word-level 2-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordbigram_evaluation ./archive/dianping_wordbigram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 2-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordbigram_tuned ./archive/dianping_wordbigram_tuned.sh ``` To execute the word-level 5-gram evaluation experiment, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordpentagram_evaluation ./archive/dianping_wordpentagram_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 5-gram experiment, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordpentagram_tuned ./archive/dianping_wordpentagram_tuned.sh ``` ### Word-Level fastText for Romanized Text We first build the token files for word-level fastText on romanized test, and then detail how to execute the experiments. #### Build Word-Level Token Files To build the word token files from the original text files, execute the following commands from `/data/dianping` ```bash th construct_wordtoken.lua ../data/dianping/train_pinyin_word.csv ../data/dianping/train_word_pinyin_list.csv ../data/dianping/train_pinyin_wordtoken.txt th construct_wordtoken.lua ../data/dianping/test_pinyin_word.csv ../data/dianping/train_pinyin_word_list.csv ../data/dianping/test_pinyin_wordtoken.txt ``` Optionally, you can also build the evaluation token files by separating the training dataset to a 1:9 ratio. ```bash ./shuffle_lines.sh ../data/dianping/train_pinyin_wordtoken.txt ../data/dianping/train_pinyin_wordtoken_shuffle.txt ./split_lines.sh 1800000 ../data/dianping/train_pinyin_wordtoken_shuffle.txt ../data/dianping/train_pinyin_wordtoken_shuffle_split_ ``` Note that the second command above will produce 2 files `train_pinyin_wordtoken_shuffle_split_0.txt` and `train_pinyin_wordtoken_shuffle_split_1.txt`. #### Execute the Experiments To execute the word-level 1-gram evaluation experiment on romanized text, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordunigramroman_evaluation ./archive/dianping_wordunigramroman_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 1-gram experiment on romanized text, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordunigramroman_tuned ./archive/dianping_wordunigramroman_tuned.sh ``` To execute the word-level 2-gram evaluation experiment on romanized text, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordbigramroman_evaluation ./archive/dianping_wordbigramroman_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 2-gram experiment on romanized text, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordbigramroman_tuned ./archive/dianping_wordbigramroman_tuned.sh ``` To execute the word-level 5-gram evaluation experiment on romanized text, do the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordpentagramroman_evaluation ./archive/dianping_wordpentagramroman_evaluation.sh ``` This will iterate through 2, 5 and 10 epoches for the best option on the evaluation data. You can check whether the evaluated hyperparameter confirms with that in the paper. To execute the word-level 5-gram experiment on romanized text, use the following commands from `/fasttext` ```bash mkdir -p models/dianping/wordpentagramroman_tuned ./archive/dianping_wordpentagramroman_tuned.sh ``` ================================================ FILE: embednet/archive/11stbinary_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal12length512feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_rr_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal8length486feature256 -driver_variation small -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/binary_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/binary_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_rr_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stbinary_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal12length512feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/11stfull_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_rr_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal8length486feature256 -driver_variation small -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/11stfull_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/11st/sentiment/full_train_rr_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/11st/sentiment/full_test_rr_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_rr_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_rr_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/11stfull_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/11st/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/11st/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/amazonbinary_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonbinary/temporal12length512feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/amazonbinary_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/amazon/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/amazonbinary_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonbinary/temporal8length486feature256 -driver_variation small -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/amazonbinary_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/amazon/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/amazonfull_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonfull/temporal12length512feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/amazonfull_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/amazon/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/amazonfull_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonfull/temporal8length486feature256 -driver_variation small -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/amazonfull_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/amazon/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/amazon/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b -driver_location models/chinanews/temporal12length512feature256 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/chinanews/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/chinanews/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/chinanews/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/chinanews/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b -driver_location models/chinanews/temporal8length486feature256 -driver_variation small "$@"; ================================================ FILE: embednet/archive/chinanews_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/chinanews/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/chinanews/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/chinanews/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/chinanews/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/chinanews_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/chinanews/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/chinanews/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/dianping_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua "$@"; ================================================ FILE: embednet/archive/dianping_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/dianping/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/dianping/train_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/dianping_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/dianping/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/dianping/train_pinyin_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_pinyin_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/dianping_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/dianping/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/dianping/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/dianping_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/dianping/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/dianping/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/dianping_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256 "$@"; ================================================ FILE: embednet/archive/dianping_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256byte -driver_dimension 257 -train_data_file data/dianping/train_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/dianping_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256roman -driver_dimension 257 -train_data_file data/dianping/train_pinyin_string_code.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/dianping/test_pinyin_string_code.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/dianping_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256romanword -driver_dimension 200002 -train_data_file data/dianping/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/dianping_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/temporal8length486feature256word -driver_dimension 200002 -train_data_file data/dianping/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/dianping/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b -driver_location models/ifeng/temporal12length512feature256 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/ifeng/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/ifeng/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/ifeng/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/ifeng/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b -driver_location models/ifeng/temporal8length486feature256 -driver_variation small "$@"; ================================================ FILE: embednet/archive/ifeng_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/ifeng/topic/train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/ifeng/topic/train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/ifeng/topic/test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/ifeng/topic/train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/ifeng_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/ifeng/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/ifeng/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b -driver_location models/jdbinary/temporal12length512feature256 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal8length486feature256 -driver_variation small -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/binary_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/binary_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdbinary_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b -driver_location models/jdfull/temporal12length512feature256 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal8length486feature256 -driver_variation small -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/jdfull_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/jd/sentiment/full_train_pinyin_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/jd/sentiment/full_test_pinyin_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_pinyin_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_pinyin_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jdfull_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/jd/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/jd/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_location models/jointbinary/temporal12length512feature256 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/joint/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/joint/binary_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal12length512feature256romanword -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/binary_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_roman_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/joint/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_location models/jointbinary/temporal8length486feature256 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jointbinary/temporal8length486feature256byte -driver_dimension 257 -train_data_file data/joint/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/joint/binary_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/binary_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/temporal8length486feature256romanword -driver_variation small -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/binary_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_roman_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jointbinary_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jointbinary/temporal8length486feature256word -driver_dimension 200002 -train_data_file data/joint/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/binary_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_location models/jointfull/temporal12length512feature256 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/joint/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/joint/full_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal12length512feature256romanword -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/full_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_roman_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/joint/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_location models/jointfull/temporal8length486feature256 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/joint/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/joint/full_train_roman_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/joint/full_test_roman_byte.t7b -test_data_replace 257 -test_data_shift 1 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal8length486feature256romanword -driver_variation small -driver_steps 400000 -driver_dimension 200002 -train_data_file data/joint/full_train_roman_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_roman_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/jointfull_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/joint/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/joint/full_test_word_limit.t7b -test_data_replace 200002 -driver_steps 400000 "$@"; ================================================ FILE: embednet/archive/nytimes_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/nytimes/temporal12length512feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@"; ================================================ FILE: embednet/archive/nytimes_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/nytimes/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/nytimes/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/nytimes/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/nytimes_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/nytimes/temporal8length486feature256 -driver_variation small -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@"; ================================================ FILE: embednet/archive/nytimes_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/nytimes/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/nytimes/topic/train_word_limit.t7b -train_data_replace 200002 -test_data_file data/nytimes/topic/test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256 -driver_variation small -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/binary_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/binary_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenbinary_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/binary_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/binary_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal12length512feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256byte -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal12length512feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256roman -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal12length512feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256romanword -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal12length512feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal12length512feature256word -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256 -driver_variation small -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal8length486feature256byte.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256byte -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal8length486feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256roman -driver_variation small -driver_dimension 257 -train_data_file data/rakuten/sentiment/full_train_hepburn_byte.t7b -train_data_replace 257 -train_data_shift 1 -test_data_file data/rakuten/sentiment/full_test_hepburn_byte.t7b -test_data_replace 257 -test_data_shift 1 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal8length486feature256romanword.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256romanword -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_hepburn_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_hepburn_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/archive/rakutenfull_temporal8length486feature256word.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/temporal8length486feature256word -driver_variation small -driver_dimension 200002 -train_data_file data/rakuten/sentiment/full_train_word_limit.t7b -train_data_replace 200002 -test_data_file data/rakuten/sentiment/full_test_word_limit.t7b -test_data_replace 200002 "$@"; ================================================ FILE: embednet/config.lua ================================================ --[[ Configuration for EmbedNet Copyright Xiang Zhang 2016 --]] -- Name space local config = {} -- Training data configurations config.train_data = {} config.train_data.file = 'data/dianping/train_code.t7b' config.train_data.batch = 16 config.train_data.replace = 65537 config.train_data.shift = 0 -- Testing data configurations config.test_data = {} config.test_data.file = 'data/dianping/test_code.t7b' config.test_data.batch = 16 config.test_data.replace = 65537 config.test_data.shift = 0 -- Model configurations config.model = {} config.model.cudnn = true -- Model variations configuration config.variation = {} -- Large model configuration local embedding = {} embedding[1] = {name = 'nn.LookupTable', nIndex = 65537, nOutput = 256, paddingValue = config.train_data.replace} embedding[2] = {name = 'nn.Transpose', permutations = {{2, 3}}} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true} temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024} temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[31] = {name = 'nn.LogSoftMax'} config.variation['large'] = {embedding = embedding, temporal = temporal, length = 512} -- Small model configuration local embedding = {} embedding[1] = {name = 'nn.LookupTable', nIndex = 65537, nOutput = 256, paddingValue = config.train_data.replace} embedding[2] = {name = 'nn.Transpose', permutations = {{2, 3}}} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true} temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024} temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[21] = {name = 'nn.LogSoftMax'} config.variation['small'] = {embedding = embedding, temporal = temporal, length = 486} -- Trainer settings config.train = {} config.train.momentum = 0.9 config.train.decay = 1e-5 -- These are just multipliers to config.driver.rate -- For every config.driver.schedule * config.driver.steps config.train.rates = {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024} -- Tester settings config.test = {} -- Visualizer settings config.visualizer = {} config.visualizer.width = 1200 config.visualizer.scale = 4 config.visualizer.height = 64 -- Driver configurations config.driver = {} config.driver.type = 'torch.CudaTensor' config.driver.device = 1 config.driver.loss = 'nn.ClassNLLCriterion' config.driver.variation = 'large' config.driver.dimension = 65537 config.driver.steps = 100000 config.driver.epoches = 100 config.driver.schedule = 8 config.driver.rate = 1e-5 config.driver.interval = 5 config.driver.location = 'models/dianping/temporal12length512feature256' config.driver.plot = true config.driver.visualize = true config.driver.debug = false config.driver.resume = false -- Main configuration config.joe = {} return config ================================================ FILE: embednet/data.lua ================================================ --[[ Data class for Embedding Net Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local torch = require('torch') local parent = require('glyphnet/data') local Data = class(parent) -- Constructor for Data -- config: configuration table -- .file: file for data -- .batch: batch of data -- .replace: the code to for replacing padding space function Data:_init(config) self.data = torch.load(config.file) self.length = config.length or 512 self.batch = config.batch or 16 self.replace = config.replace or 65537 self.shift = config.shift or 0 end function Data:initSample(sample, label) local sample = sample or torch.Tensor(self.batch, self.length) local label = label or torch.Tensor(self.batch) sample:fill(self.replace) return sample, label end function Data:index(sample, class, item) local code, code_value = self.data.code, self.data.code_value local position = 1 for field = 1, code[class][item]:size(1) do -- Break if current position is larger than sample length if position > sample:size(1) then break end -- Determine the actual length local length = code[class][item][field][2] if position + length - 1 > sample:size(1) then length = sample:size(1) - position + 1 end -- Copy the data over if length > 0 then sample:narrow(1, position, length):copy( code_value:narrow(1, code[class][item][field][1], length)):add( self.shift) end -- Increment the position value position = position + length end return sample end return Data ================================================ FILE: embednet/driver.lua ================================================ --[[ Driver for EmbedNet training Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local parent = require('glyphnet/driver') local Driver = class(parent) -- Initialize variation function Driver:initVariation() print('Driver using model variation '..self.variation) self.options.model.embedding = self.options.variation[self.variation].embedding self.options.model.temporal = self.options.variation[self.variation].temporal print('Driver adjusting data length to '.. self.options.variation[self.variation].length) self.options.train_data.length = self.options.variation[self.variation].length self.options.test_data.length = self.options.variation[self.variation].length self.dimension = self.options.driver.dimension print('Driver adjusting data index dimension to '..self.dimension) self.options.model.embedding[1].nIndex = self.dimension self.options.model.embedding[1].paddingValue = self.options.train_data.replace end -- Visualize the model function Driver:visualizeModel() local Visualizer = require('visualizer') self.options.visualizer.title = 'Embedding model' self.embedding_visualizer = self.embedding_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = 'Temporal model' self.temporal_visualizer = self.temporal_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = nil self.embedding_visualizer:drawSequential(self.model.embedding) self.temporal_visualizer:drawSequential(self.model.temporal) end return Driver ================================================ FILE: embednet/model.lua ================================================ --[[ Model for EmbedNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local nn = require('nn') local parent = require('glyphnet/model') local Model = class(parent) -- Model constructor -- config: configuration table -- .embedding: configuration table of the embedding model -- .temporal: configuration table of the temporal model -- .file: the model file to load -- .pretrain: whether the keep the embedding pretrained -- .embedding_file: the file for pretrained embedding model -- .cudnn: whether to use NVidia CUDNN function Model:_init(config) -- Read or create model if config.file then local model = torch.load(config.file) self.embedding = self:makeCleanSequential(model.embedding) self.temporal = self:makeCleanSequential(model.temporal) else if config.embedding_file then self.embedding = self:makeCleanSequential( torch.load(config.embedding_file)) else self.embedding = self:createCleanSequential(config.embedding) self:initSequential(self.embedding) end self.temporal = self:createCleanSequential(config.temporal) self:initSequential(self.temporal) end -- Saving configurations self.pretrain = config.pretrain self.cudnn = config.cudnn self.config = config self.tensortype = torch.getdefaulttensortype() end function Model:forward(input) self.feature = self.embedding:forward(input) self.output = self.temporal:forward(self.feature) return self.output end function Model:backward(input, grad_output) self.grad_feature = self.temporal:backward(self.feature, grad_output) if self.pretrain then return self.grad_feature else self.grad_input = self.embedding:backward(input, self.grad_feature) return self.grad_input end end function Model:getParameters() return nn.Module.getParameters(self) end function Model:parameters() local parameters, gradients = {}, {} if not self.pretrain then local embedding_parameters, embedding_gradients = self.embedding:parameters() for i = 1, #embedding_parameters do parameters[#parameters + 1] = embedding_parameters[i] gradients[#gradients + 1] = embedding_gradients[i] end end local temporal_parameters, temporal_gradients = self.temporal:parameters() for i = 1, #temporal_parameters do parameters[#parameters + 1] = temporal_parameters[i] gradients[#gradients + 1] = temporal_gradients[i] end return parameters, gradients end function Model:type(tensortype) if tensortype ~= nil and tensortype ~= self.tensortype then if tensortype == 'torch.CudaTensor' then require('cunn') self.embedding = self:makeCudaSequential(self.embedding) self.temporal = self:makeCudaSequential(self.temporal) else self.embedding = self:makeCleanSequential(self.embedding) self.temporal = self:makeCleanSequential(self.temporal) end self.embedding:type(tensortype) self.temporal:type(tensortype) self.tensortype = tensortype end return self.tensortype end function Model:setMode(mode) self:setModeSequential(self.embedding, mode) self:setModeSequential(self.temporal, mode) end function Model:save(file) local embedding = self:clearSequential( self:makeCleanSequential(self.embedding)) local temporal = self:clearSequential( self:makeCleanSequential(self.temporal)) torch.save(file, {embedding = embedding, temporal = temporal}) end Model.initModule['nn.LookupTable'] = function (self, m) m.weight:normal(0, math.sqrt(1 / m.weight:size(2))) if m.paddingValue > 0 then m.weight[m.paddingValue]:zero() end end Model.initModule['nn.Transpose'] = function (self, m) end Model.setModeModule['train']['nn.LookupTable'] = function (self, m) end Model.setModeModule['train']['nn.Transpose'] = function (self, m) end Model.setModeModule['test']['nn.LookupTable'] = function(self, m) end Model.setModeModule['test']['nn.Transpose'] = function(self, m) end Model.createCleanModule['nn.LookupTable'] = function (self, m) return nn.LookupTable(m.nIndex, m.nOutput, m.paddingValue) end Model.createCleanModule['nn.Transpose'] = function (self, m) return nn.Transpose(unpack(m.permutations)) end Model.makeCleanModule['nn.LookupTable'] = function(self, m) local new = nn.LookupTable( m.weight:size(1), m.weight:size(2), m.paddingValue) new.weight:copy(m.weight) return new end Model.makeCleanModule['nn.Transpose'] = function (self, m) return nn.Transpose(unpack(m.permutations)) end Model.makeCudaModule['nn.LookupTable'] = function (self, m) local new = nn.LookupTable( m.weight:size(1), m.weight:size(2), m.paddingValue) new.weight:copy(m.weight) return new end Model.makeCudaModule['nn.Transpose'] = function (self, m) return nn.Transpose(unpack(m.permutations)) end return Model ================================================ FILE: embednet/unittest/data.lua ================================================ --[[ Unit test for EmbedNet data component Copyright 2016 Xiang Zhang --]] local Data = require('data') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.length = 512 config.test_data.length = 512 print('Creating testing data object') local data = Data(config.test_data) self.config = config self.data = data end function joe:printSample(sample, label, count) local count = count or sample:size(1) for i = 1, count do io.write(label[i], ':') for j = 1, sample:size(2) do io.write(' ', sample[i][j]) end io.write('\n') end io.flush() end function joe:getBatchTest() local data = self.data print('Getting a batch') local sample, label = data:getBatch() self:printSample(sample, label) print('Getting a second batch') sample, label = data:getBatch(sample, label) self:printSample(sample, label) end function joe:iteratorTest() local data = self.data for sample, label, count in data:iterator() do io.write(count, ':') for i = 1, count do io.write(' ', label[i]) end io.write('\n') io.flush() end end joe.main() return joe ================================================ FILE: embednet/unittest/driver.lua ================================================ --[[ Unit test for EmbedNet driver component Copyright 2016 Xiang Zhang --]] local Driver = require('driver') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Creating driver') config.train_data.file = 'data/dianping/unittest_code.t7b' config.test_data.file = 'data/dianping/unittest_code.t7b' config.driver.debug = true config.driver.device = 3 config.driver.steps = 10 config.driver.epoches = 5 local driver = Driver(config, config.driver) self.config = config self.driver = driver end function joe:driverTest() local driver = self.driver print('Testing driver') driver:run() end joe.main() return joe ================================================ FILE: embednet/unittest/model.lua ================================================ --[[ Unit Test for EmbedNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Embedding model:') print(model.embedding) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(2, 512):mul(65537):ceil() print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()) print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Embedding model') print(loaded.embedding) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: embednet/unittest/model_cudnn.lua ================================================ --[[ Unit Test for EmbedNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal config.model.cudnn = true local model = Model(config.model) model:cuda() print('Embedding model:') print(model.embedding) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(16, 512):mul(65537):ceil():cuda() print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) cutorch.synchronize() sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()):cuda() print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) cutorch.synchronize() sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Embedding model') print(loaded.embedding) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: embednet/unittest/model_cunn.lua ================================================ --[[ Unit Test for EmbedNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal config.model.cudnn = nil local model = Model(config.model) model:cuda() print('Embedding model:') print(model.embedding) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(16, 512):mul(65537):ceil():cuda() print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) cutorch.synchronize() sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()):cuda() print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) cutorch.synchronize() sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Embedding model') print(loaded.embedding) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: embednet/unittest/test.lua ================================================ --[[ Unit test for EmbedNet test component Copyright 2015-2016 Xiang Zhang --]] local Test = require('test') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: embednet/unittest/test_cuda.lua ================================================ --[[ Unit test for EmbedNet test component Copyright 2016 Xiang Zhang --]] local Test = require('test') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: embednet/unittest/train.lua ================================================ --[[ Unit test for EmbedNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create trainer') for i, v in pairs(config.train.rates) do config.train.rates[i] = v * config.driver.rate end local train = Train(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 10 steps') train:run(100, callback) end function joe:callback() self.time = os.time() return function (train, i) if os.difftime(os.time(), self.time) >= 5 then print('stp: '..train.step..', rat: '..train.rate.. ', err: '..train.error..', obj: '..train.objective.. ', dat: '..train.time.data..', fwd: '..train.time.forward.. ', bwd: '..train.time.backward..', upd: '..train.time.update) self.time = os.time() end end end joe.main() return joe ================================================ FILE: embednet/unittest/train_cuda.lua ================================================ --[[ Unit test for EmbedNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.embedding = config.variation['large'].embedding config.model.temporal = config.variation['large'].temporal local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create trainer') for i, v in pairs(config.train.rates) do config.train.rates[i] = v * config.driver.rate end local train = Train(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 100000 steps') train:run(100000, callback) end function joe:callback() self.time = os.time() return function (train, i) if os.difftime(os.time(), self.time) >= 5 then print('stp: '..train.step..', rat: '..train.rate.. ', err: '..train.error..', obj: '..train.objective.. ', dat: '..train.time.data..', fwd: '..train.time.forward.. ', bwd: '..train.time.backward..', upd: '..train.time.update) self.time = os.time() end end end joe.main() return joe ================================================ FILE: embednet/visualizer.lua ================================================ --[[ Visualization module for EmbedNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local parent = require('glyphnet/visualizer') local Visualizer = class(parent) Visualizer.drawModule['nn.LookupTable'] = Visualizer.drawModule['nn.Linear'] return Visualizer ================================================ FILE: fasttext/archive/11stbinary_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charbigram; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charbigram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charbigram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charpentagram; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charpentagram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charpentagram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charunigram; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charunigram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/charunigram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigram; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigramroman; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigramroman_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordbigramroman_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagram; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagramroman; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagramroman_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordpentagramroman_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigram; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigram_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigram_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigramroman; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigramroman_evaluation; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stbinary_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stbinary/wordunigramroman_tuned; TRAIN_DATA=data/11st/sentiment/binary_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/binary_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charbigram; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charbigram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charbigram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charpentagram; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charpentagram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charpentagram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charunigram; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charunigram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/charunigram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigram; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigramroman; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigramroman_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordbigramroman_tuned; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagram; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagramroman; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagramroman_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordpentagramroman_tuned; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigram; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigram_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigram_tuned; TRAIN_DATA=data/11st/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigramroman; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigramroman_evaluation; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_0.txt; TEST_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/11stfull_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/11stfull/wordunigramroman_tuned; TRAIN_DATA=data/11st/sentiment/full_train_rr_wordtoken_shuffle.txt; TEST_DATA=data/11st/sentiment/full_test_rr_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charbigram; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charbigram_evaluation; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charbigram_tuned; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charpentagram; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charpentagram_evaluation; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charpentagram_tuned; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charunigram; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charunigram_evaluation; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/charunigram_tuned; TRAIN_DATA=data/amazon/binary_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordbigram; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordbigram_evaluation; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordbigram_tuned; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordpentagram; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordpentagram_evaluation; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordpentagram_tuned; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordunigram; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordunigram_evaluation; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonbinary_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonbinary/wordunigram_tuned; TRAIN_DATA=data/amazon/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charbigram; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charbigram_evaluation; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charbigram_tuned; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charpentagram; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charpentagram_evaluation; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charpentagram_tuned; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charunigram; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charunigram_evaluation; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/charunigram_tuned; TRAIN_DATA=data/amazon/full_train_chartoken_shuffle.txt; TEST_DATA=data/amazon/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordbigram; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordbigram_evaluation; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordbigram_tuned; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordpentagram; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordpentagram_evaluation; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordpentagram_tuned; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordunigram; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordunigram_evaluation; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/amazon/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/amazonfull_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/amazonfull/wordunigram_tuned; TRAIN_DATA=data/amazon/full_train_wordtoken_shuffle.txt; TEST_DATA=data/amazon/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charbigram; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charbigram_evaluation; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charbigram_tuned; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charpentagram; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charpentagram_evaluation; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charpentagram_tuned; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charunigram; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charunigram_evaluation; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/charunigram_tuned; TRAIN_DATA=data/chinanews/topic/train_chartoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigram; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigram_evaluation; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigram_tuned; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigramroman; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigramroman_evaluation; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordbigramroman_tuned; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagram; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagram_evaluation; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagram_tuned; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagramroman; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagramroman_evaluation; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordpentagramroman_tuned; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigram; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigram_evaluation; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigram_tuned; TRAIN_DATA=data/chinanews/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigramroman; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigramroman_evaluation; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/chinanews_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/chinanews/wordunigramroman_tuned; TRAIN_DATA=data/chinanews/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/chinanews/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charbigram; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charbigram_evaluation; TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charbigram_tuned; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charpentagram; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charpentagram_evaluation; TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charpentagram_tuned; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charunigram; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charunigram_evaluation; TRAIN_DATA=data/dianping/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/charunigram_tuned; TRAIN_DATA=data/dianping/train_chartoken_shuffle.txt; TEST_DATA=data/dianping/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigram; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigram_evaluation; TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigram_tuned; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigramroman; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigramroman_evaluation; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordbigramroman_tuned; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagram; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagram_evaluation; TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagram_tuned; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagramroman; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagramroman_evaluation; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordpentagramroman_tuned; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigram; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigram_evaluation; TRAIN_DATA=data/dianping/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigram_tuned; TRAIN_DATA=data/dianping/train_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigramroman; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigramroman_evaluation; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/dianping/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/dianping_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/dianping/wordunigramroman_tuned; TRAIN_DATA=data/dianping/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/dianping/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charbigram; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charbigram_evaluation; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charbigram_tuned; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charpentagram; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charpentagram_evaluation; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charpentagram_tuned; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charunigram; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charunigram_evaluation; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/charunigram_tuned; TRAIN_DATA=data/ifeng/topic/train_chartoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigram; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigram_evaluation; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigram_tuned; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigramroman; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigramroman_evaluation; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordbigramroman_tuned; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 2 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagram; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagram_evaluation; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagram_tuned; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagramroman; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagramroman_evaluation; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordpentagramroman_tuned; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 2 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigram; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigram_evaluation; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigram_tuned; TRAIN_DATA=data/ifeng/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigramroman; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigramroman_evaluation; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/ifeng_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/ifeng/wordunigramroman_tuned; TRAIN_DATA=data/ifeng/topic/train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/ifeng/topic/test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charbigram; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charbigram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charbigram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charpentagram; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charpentagram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charpentagram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charunigram; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charunigram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/charunigram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigram; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigramroman; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigramroman_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordbigramroman_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagram; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagramroman; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagramroman_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordpentagramroman_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigram; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigram_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigram_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigramroman; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigramroman_evaluation; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdbinary_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdbinary/wordunigramroman_tuned; TRAIN_DATA=data/jd/sentiment/binary_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/binary_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charbigram; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charbigram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charbigram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charpentagram; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charpentagram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charpentagram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charunigram; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charunigram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/charunigram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigram; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigramroman; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigramroman_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordbigramroman_tuned; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagram; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagramroman; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagramroman_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordpentagramroman_tuned; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigram; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigram_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigram_tuned; TRAIN_DATA=data/jd/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigramroman; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigramroman_evaluation; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_0.txt; TEST_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jdfull_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jdfull/wordunigramroman_tuned; TRAIN_DATA=data/jd/sentiment/full_train_pinyin_wordtoken_shuffle.txt; TEST_DATA=data/jd/sentiment/full_test_pinyin_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charbigram; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charbigram_evaluation; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charbigram_tuned; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charpentagram; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charpentagram_evaluation; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charpentagram_tuned; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charunigram; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charunigram_evaluation; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/charunigram_tuned; TRAIN_DATA=data/joint/binary_train_chartoken_shuffle.txt; TEST_DATA=data/joint/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigram; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigram_evaluation; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigram_tuned; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigramroman; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigramroman_evaluation; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordbigramroman_tuned; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagram; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagram_evaluation; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagram_tuned; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagramroman; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagramroman_evaluation; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordpentagramroman_tuned; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigram; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigram_evaluation; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigram_tuned; TRAIN_DATA=data/joint/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigramroman; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigramroman_evaluation; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/binary_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointbinary_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointbinary/wordunigramroman_tuned; TRAIN_DATA=data/joint/binary_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/binary_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charbigram; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charbigram_evaluation; TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charbigram_tuned; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charpentagram; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charpentagram_evaluation; TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charpentagram_tuned; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charunigram; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charunigram_evaluation; TRAIN_DATA=data/joint/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/charunigram_tuned; TRAIN_DATA=data/joint/full_train_chartoken_shuffle.txt; TEST_DATA=data/joint/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigram; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigram_evaluation; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigram_tuned; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigramroman; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigramroman_evaluation; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordbigramroman_tuned; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagram; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagram_evaluation; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagram_tuned; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagramroman; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagramroman_evaluation; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordpentagramroman_tuned; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigram; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigram_evaluation; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigram_tuned; TRAIN_DATA=data/joint/full_train_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigramroman; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigramroman_evaluation; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_0.txt; TEST_DATA=data/joint/full_train_roman_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/jointfull_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/jointfull/wordunigramroman_tuned; TRAIN_DATA=data/joint/full_train_roman_wordtoken_shuffle.txt; TEST_DATA=data/joint/full_test_roman_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charbigram; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charbigram_evaluation; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charbigram_tuned; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charpentagram; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charpentagram_evaluation; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charpentagram_tuned; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charunigram; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charunigram_evaluation; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/charunigram_tuned; TRAIN_DATA=data/nytimes/topic/train_chartoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordbigram; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordbigram_evaluation; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordbigram_tuned; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordpentagram; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordpentagram_evaluation; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordpentagram_tuned; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordunigram; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordunigram_evaluation; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/nytimes/topic/train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/nytimes_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/nytimes/wordunigram_tuned; TRAIN_DATA=data/nytimes/topic/train_wordtoken_shuffle.txt; TEST_DATA=data/nytimes/topic/test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charbigram; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charbigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charbigram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charpentagram; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charpentagram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charpentagram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charunigram; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charunigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/charunigram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigram; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigramroman; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordbigramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagram; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagramroman; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordpentagramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigram; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigram_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigramroman; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenbinary_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenbinary/wordunigramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/binary_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/binary_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charbigram; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charbigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charbigram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charpentagram; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charpentagram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charpentagram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charunigram; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charunigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_charunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/charunigram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_chartoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_chartoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigram; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigramroman; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordbigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordbigramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagram; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagramroman; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordpentagramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordpentagramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 5 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigram; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigram_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigram_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigram_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigram_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigramroman; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigramroman_evaluation.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigramroman_evaluation; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_0.txt; TEST_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle_split_1.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_2 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 2 -thread 10; fasttext test $LOCATION/model_2.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_5 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 5 -thread 10; fasttext test $LOCATION/model_5.bin $TEST_DATA; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model_10 -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model_10.bin $TEST_DATA; ================================================ FILE: fasttext/archive/rakutenfull_wordunigramroman_tuned.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2017 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; LOCATION=models/rakutenfull/wordunigramroman_tuned; TRAIN_DATA=data/rakuten/sentiment/full_train_hepburn_wordtoken_shuffle.txt; TEST_DATA=data/rakuten/sentiment/full_test_hepburn_wordtoken_shuffle.txt; fasttext supervised -input $TRAIN_DATA -output $LOCATION/model -dim 10 -lr 0.1 -wordNgrams 1 -minCount 1 -bucket 10000000 -epoch 10 -thread 10; fasttext test $LOCATION/model.bin $TRAIN_DATA; fasttext test $LOCATION/model.bin $TEST_DATA; ================================================ FILE: glyphnet/archive/11stbinary_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/11stbinary/spatial6temporal8length486feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/11stbinary_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stbinary/spatial8temporal12length512feature256 -train_data_file data/11st/sentiment/binary_train_code.t7b -test_data_file data/11st/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/11stfull_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/11stfull/spatial6temporal8length486feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/11stfull_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/11stfull/spatial8temporal12length512feature256 -train_data_file data/11st/sentiment/full_train_code.t7b -test_data_file data/11st/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/amazonbinary_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/amazonbinary/spatial6temporal8length486feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/amazonbinary_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonbinary/spatial8temporal12length512feature256 -train_data_file data/amazon/binary_train_code.t7b -test_data_file data/amazon/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/amazonfull_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/amazonfull/spatial6temporal8length486feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/amazonfull_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/amazonfull/spatial8temporal12length512feature256 -train_data_file data/amazon/full_train_code.t7b -test_data_file data/amazon/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/chinanews_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/chinanews/spatial6temporal8length486feature256 -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/chinanews_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/chinanews/spatial8temporal12length512feature256 -train_data_file data/chinanews/topic/train_code.t7b -test_data_file data/chinanews/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/dianping_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/spatial6temporal8length486feature256 "$@"; ================================================ FILE: glyphnet/archive/dianping_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua "$@"; ================================================ FILE: glyphnet/archive/ifeng_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/ifeng/spatial6temporal8length486feature256 -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/ifeng_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/ifeng/spatial8temporal12length512feature256 -train_data_file data/ifeng/topic/train_code.t7b -test_data_file data/ifeng/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/jdbinary_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jdbinary/spatial6temporal8length486feature256 -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/jdbinary_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdbinary/spatial8temporal12length512feature256 -train_data_file data/jd/sentiment/binary_train_code.t7b -test_data_file data/jd/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/jdfull_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jdfull/spatial6temporal8length486feature256 -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/jdfull_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jdfull/spatial8temporal12length512feature256 -train_data_file data/jd/sentiment/full_train_code.t7b -test_data_file data/jd/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/jointbinary_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jointbinary/spatial6temporal8length486feature256 -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_steps 400000 "$@"; ================================================ FILE: glyphnet/archive/jointbinary_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointbinary/spatial8temporal12length512feature256 -train_data_file data/joint/binary_train_code.t7b -test_data_file data/joint/binary_test_code.t7b -driver_steps 400000 "$@"; ================================================ FILE: glyphnet/archive/jointfull_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/jointfull/spatial6temporal8length486feature256 -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_steps 400000 "$@"; ================================================ FILE: glyphnet/archive/jointfull_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/jointfull/spatial8temporal12length512feature256 -train_data_file data/joint/full_train_code.t7b -test_data_file data/joint/full_test_code.t7b -driver_steps 400000 "$@"; ================================================ FILE: glyphnet/archive/nytimes_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/nytimes/spatial6temporal8length486feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/nytimes_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/nytimes/spatial8temporal12length512feature256 -train_data_file data/nytimes/topic/train_code.t7b -test_data_file data/nytimes/topic/test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/rakutenbinary_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/rakutenbinary/spatial6temporal8length486feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/rakutenbinary_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenbinary/spatial8temporal12length512feature256 -train_data_file data/rakuten/sentiment/binary_train_code.t7b -test_data_file data/rakuten/sentiment/binary_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/rakutenfull_spatial6temporal8length486feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/rakutenfull/spatial6temporal8length486feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/archive/rakutenfull_spatial8temporal12length512feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/rakutenfull/spatial8temporal12length512feature256 -train_data_file data/rakuten/sentiment/full_train_code.t7b -test_data_file data/rakuten/sentiment/full_test_code.t7b "$@"; ================================================ FILE: glyphnet/config.lua ================================================ --[[ Configuration for GlyphNet Copyright Xiang Zhang 2015-2016 --]] -- Name space local config = {} -- Training data configurations config.train_data = {} config.train_data.file = 'data/dianping/train_code.t7b' config.train_data.unifont = 'unifont/unifont-8.0.01.t7b' config.train_data.batch = 16 -- Testing data configurations config.test_data = {} config.test_data.file = 'data/dianping/test_code.t7b' config.test_data.unifont = 'unifont/unifont-8.0.01.t7b' config.test_data.batch = 16 -- Model configurations config.model = {} config.model.cudnn = true config.model.group = 16 -- Model variations configuration config.variation = {} -- Large network configuration local spatial = {} spatial[1] = {name = 'nn.SpatialConvolution', nInputPlane = 1, nOutputPlane = 64, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[3] = {name = 'nn.SpatialConvolution', nInputPlane = 64, nOutputPlane = 64, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[5] = {name = 'nn.SpatialMaxPooling', kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0} spatial[6] = {name = 'nn.SpatialConvolution', nInputPlane = 64, nOutputPlane = 128, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[8] = {name = 'nn.SpatialConvolution', nInputPlane = 128, nOutputPlane = 128, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[10] = {name = 'nn.SpatialMaxPooling', kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0} spatial[11] = {name = 'nn.SpatialConvolution', nInputPlane = 128, nOutputPlane = 256, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[13] = {name = 'nn.SpatialConvolution', nInputPlane = 256, nOutputPlane = 256, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[15] = {name = 'nn.SpatialMaxPooling', kW = 2, kH = 2, dW = 2, dH = 2, padW = 0, padH = 0} spatial[16] = {name = 'nn.Reshape', size = 1024, bachMode = true} spatial[17] = {name = 'nn.Linear', inputSize = 1024, outputSize = 1024} spatial[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[19] = {name = 'nn.Linear', inputSize = 1024, outputSize = 256} spatial[20] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true} temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024} temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[31] = {name = 'nn.LogSoftMax'} config.variation['large'] = {spatial = spatial, temporal = temporal, length = 512} -- Small network configuration local spatial = {} spatial[1] = {name = 'nn.SpatialConvolution', nInputPlane = 1, nOutputPlane = 64, kW = 3, kH = 3, dW = 1, dH = 1, padW = 2, padH = 2} spatial[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[3] = {name = 'nn.SpatialConvolution', nInputPlane = 64, nOutputPlane = 64, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[5] = {name = 'nn.SpatialMaxPooling', kW = 3, kH = 3, dW = 3, dH = 3, padW = 0, padH = 0} spatial[6] = {name = 'nn.SpatialConvolution', nInputPlane = 64, nOutputPlane = 128, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[8] = {name = 'nn.SpatialConvolution', nInputPlane = 128, nOutputPlane = 128, kW = 3, kH = 3, dW = 1, dH = 1, padW = 1, padH = 1} spatial[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[10] = {name = 'nn.SpatialMaxPooling', kW = 3, kH = 3, dW = 3, dH = 3, padW = 0, padH = 0} spatial[11] = {name = 'nn.Reshape', size = 512, bachMode = true} spatial[12] = {name = 'nn.Linear', inputSize = 512, outputSize = 256} spatial[13] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} spatial[14] = {name = 'nn.Linear', inputSize = 256, outputSize = 256} spatial[15] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true} temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024} temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[21] = {name = 'nn.LogSoftMax'} config.variation['small'] = {spatial = spatial, temporal = temporal, length = 486} -- Trainer settings config.train = {} config.train.momentum = 0.9 config.train.decay = 1e-5 -- These are just multipliers to config.driver.rate -- For every config.driver.schedule * config.driver.steps config.train.rates = {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024} -- Tester settings config.test = {} -- Visualizer settings config.visualizer = {} config.visualizer.width = 1200 config.visualizer.scale = 4 config.visualizer.height = 64 -- Driver configurations config.driver = {} config.driver.type = 'torch.CudaTensor' config.driver.device = 1 config.driver.loss = 'nn.ClassNLLCriterion' config.driver.variation = 'large' config.driver.steps = 100000 config.driver.epoches = 100 config.driver.schedule = 8 config.driver.rate = 1e-5 config.driver.interval = 5 config.driver.location = 'models/dianping/spatial8temporal12length512feature256' config.driver.plot = true config.driver.visualize = true config.driver.debug = false config.driver.resume = false -- Main configuration config.joe = {} return config ================================================ FILE: glyphnet/data.lua ================================================ --[[ Data program for GlyphNet Copyright 2015-2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local torch = require('torch') local Data = class() -- Constructor for Data -- config: configuration table -- .file: the data file location -- .unifont: the unifont data location -- .length: the text length in the data -- .batch: the batch size function Data:_init(config) self.data = torch.load(config.file) self.unifont = torch.load(config.unifont or 'unifont/unifont-8.0.01.t7b') self.length = config.length or 512 self.batch = config.batch or 16 end function Data:getClasses() return #self.data.code end function Data:getBatch(sample, label) local code, code_value = self.data.code, self.data.code_value local sample, label = self:initSample(sample, label) -- Loop over batch dimension for i = 1, sample:size(1) do local class = torch.random(#code) local item = torch.random(code[class]:size(1)) -- Assign sample self:index(sample[i], class, item) -- Assign label label[i] = class end return sample, label end function Data:iterator(sample, label) local code, code_value = self.data.code, self.data.code_value local sample, label = self:initSample(sample, label) local class = 1 local item = 1 local count = 0 return function() if code[class] == nil then return end sample, label = self:initSample(sample, label) count = 0 for i = 1, sample:size(1) do if item > code[class]:size(1) then class = class + 1 item = 1 if code[class] == nil then if count > 0 then break else return end end end self:index(sample[i], class, item) label[i] = class count = count + 1 item = item + 1 end return sample, label, count end end function Data:initSample(sample, label) local height, width = self.unifont:size(3), self.unifont:size(2) local sample = sample or torch.Tensor(self.batch, self.length, height, width) local label = label or torch.Tensor(self.batch) sample:zero() return sample, label end function Data:index(sample, class, item) local code, code_value = self.data.code, self.data.code_value local position = 1 for field = 1, code[class][item]:size(1) do -- Break if current position is larger than sample length if position > sample:size(1) then break end -- Determine the actual length local length = code[class][item][field][2] if position + length - 1 > sample:size(1) then length = sample:size(1) - position + 1 end -- Copy the data over sample:narrow(1, position, length):index( self.unifont, 1, code_value:narrow( 1, code[class][item][field][1], length)) position = position + length end return sample end return Data ================================================ FILE: glyphnet/driver.lua ================================================ --[[ Driver for GlyphNet training Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local nn = require('nn') local os = require('os') local paths = require('paths') local torch = require('torch') local Data = require('data') local Model = require('model') local Train = require('train') local Test = require('test') local Driver = class() -- Constructor for driver -- options: configuration table for other classes -- config: configuration table for driver -- .type: tensor type to do computation -- .device: device id for CUDA. Only valid for .type = 'torch.CudaTensor' -- .loss: the loss class to be used -- .variation: the variation of the model -- .steps: number of steps for each epoch -- .epoches: number of epoches -- .rate: initial learning rate -- .schedule: rate change schedule -- .interval: print time interval -- .location: save location -- .plot: whether to plot the result -- .visualize: whether to visualize the models -- .debug: whether to do debugging -- .resume: whether to do resumption function Driver:_init(options, config) local config = config or {} self.type = config.type or 'torch.DoubleTensor' self.device = config.device or 1 self.loss = config.loss or 'nn.ClassNLLCriterion' self.variation = config.variation or 'large' self.steps = config.steps or 100000 self.epoches = config.epoches or 100 self.rate = config.rate or 1e-3 self.schedule = config.schedule or 8 self.interval = config.interval or 5 self.location = config.location or '.' self.plot = config.plot self.visualize = config.visualize self.debug = config.debug self.resume = config.resume self.options = options -- Update the rates for training local rates = {} for i, v in pairs(self.options.train.rates) do rates[(i - 1) * self.steps * self.schedule + 1] = v * self.rate self.options.train.rates = rates end -- CUDA settings if self.type == 'torch.CudaTensor' then local cutorch = require('cutorch') print('Driver setting device to '..self.device) cutorch.setDevice(self.device) end -- Initialize random seed math.randomseed(os.time()) torch.manualSeed(os.time()) -- Handle model variation self:initVariation() -- Load data print('Driver loading training data') self.train_data = Data(self.options.train_data) print('Driver loading testing data') self.test_data = Data(self.options.test_data) -- Handle final output number of classes. Assuming last module is nn.Linear. local num_class = self.train_data:getClasses() for i = #self.options.model.temporal, 1, -1 do if self.options.model.temporal[i].name == 'nn.Linear' then print('Driver adjusting number of classes in model to '..num_class) self.options.model.temporal[i].outputSize = num_class break end end -- Handle resumption if self.resume then local record_file = paths.concat(self.location, 'record.t7b') print('Driver loading resumption from '..record_file) self.record = torch.load(record_file) local model_file = paths.concat( self.location, 'model_'..#self.record..'.t7b') print('Driver loading model from '..model_file) self.options.model.file = model_file self.model = Model(self.options.model) local state_file = paths.concat( self.location, 'state_'..#self.record..'.t7b') print('Driver loading training state from '..state_file) self.options.train.state = torch.load(state_file) print('Driver setting train step to '..(#self.record * self.steps)) self.options.train.step = #self.record * self.steps for i = 1, #self.record do self:printResult(i) end if self.plot then self:plotRecord() end else self.record = {} print('Driver loading model') self.model = Model(self.options.model) end print('Driver setting model type to '..self.type) self.model:type(self.type) print('Driver loading trainer') self.trainer_loss = nn[self.loss:sub(4)]() self.trainer_loss:type(self.type) self.trainer = Train( self.train_data, self.model, self.trainer_loss, self.options.train) print('Driver loading tester for training data') self.train_tester_loss = nn[self.loss:sub(4)]() self.train_tester_loss:type(self.type) self.train_tester = Test( self.train_data, self.model, self.train_tester_loss, self.options.test) print('Driver loading tester for testing data') self.test_tester_loss = nn[self.loss:sub(4)]() self.test_tester_loss:type(self.type) self.test_tester = Test( self.test_data, self.model, self.test_tester_loss, self.options.test) if self.visualize then self:visualizeModel() end self.time = os.time() end -- Initialize variation function Driver:initVariation() print('Driver using model variation '..self.variation) self.options.model.spatial = self.options.variation[self.variation].spatial self.options.model.temporal = self.options.variation[self.variation].temporal print('Driver adjusting data length to '.. self.options.variation[self.variation].length) self.options.train_data.length = self.options.variation[self.variation].length self.options.test_data.length = self.options.variation[self.variation].length end -- Run the training process function Driver:run() local begin_epoch = #self.record + 1 local end_epoch = #self.record + self.epoches for i = begin_epoch, end_epoch do print('Driver setting model to training mode') self.model:setModeTrain() print('Driver training for epoch '..i) self.trainer:run( self.steps, function(train, step) self:logTrain(train, step) end) if self.visualize then self:visualizeModel() end print('Driver setting model to testing mode') self.model:setModeTest() print('Driver testing on training data for epoch '..i) self.train_tester:run(function(test, step) self:logTest(test, step) end) print('Driver testing on testing data for epoch '..i) self.test_tester:run(function(test, step) self:logTest(test, step) end) print('Driver saving for epoch '..i) self:save() self:printResult() if self.plot then self:plotRecord() end end end -- Save the record and the model function Driver:save() local epoch = epoch or #self.record + 1 -- Make a backup for the record print('Driver backing up record.t7b') local record_file = paths.concat(self.location, 'record.t7b') os.rename(record_file, record_file..'.backup') -- Save the new record print('Driver saving new records to '..record_file) self.record[epoch] = { train_loss = self.train_tester.total_objective, test_loss = self.test_tester.total_objective, train_error = self.train_tester.total_error, test_error = self.test_tester.total_error } torch.save(record_file, self.record) -- Save the model local model_file = paths.concat(self.location, 'model_'..epoch..'.t7b') print('Driver saving model to '..model_file) self.model:save(model_file) -- Save the training state local state_file = paths.concat(self.location, 'state_'..epoch..'.t7b') print('Driver saving training state to '..state_file) torch.save(state_file, self.trainer.state:type(torch.getdefaulttensortype())) end -- Print current result function Driver:printResult(epoch) local epoch = epoch or #self.record print('Driver epoch = '..epoch.. ', train_error = '..self.record[epoch].train_error.. ', test_error = '..self.record[epoch].test_error.. ', train_loss = '..self.record[epoch].train_loss.. ', test_loss = '..self.record[epoch].test_loss) end -- Plot the record function Driver:plotRecord() require('gnuplot') self.error_figure = self.error_figure or gnuplot.figure() self.loss_figure = self.loss_figure or gnuplot.figure() local epoch = torch.linspace(1, #self.record, #self.record) local train_error = torch.Tensor(epoch:size()) local test_error = torch.Tensor(epoch:size()) local train_loss = torch.Tensor(epoch:size()) local test_loss = torch.Tensor(epoch:size()) for i = 1, #self.record do train_error[i] = self.record[i].train_error test_error[i] = self.record[i].test_error train_loss[i] = self.record[i].train_loss test_loss[i] = self.record[i].test_loss end gnuplot.figure(self.error_figure) gnuplot.plot({'Training error', epoch, train_error}, {'Testing error', epoch, test_error}) gnuplot.title('Training and testing error') gnuplot.figure(self.loss_figure) gnuplot.plot({'Training loss', epoch, train_loss}, {'Testing loss', epoch, test_loss}) gnuplot.title('Training and testing loss') end -- Visualize the model function Driver:visualizeModel() local Visualizer = require('visualizer') self.options.visualizer.title = 'Spatial model' self.spatial_visualizer = self.spatial_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = 'Temporal model' self.temporal_visualizer = self.temporal_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = nil self.spatial_visualizer:drawSequential(self.model.spatial) self.temporal_visualizer:drawSequential(self.model.temporal) end -- Log training function Driver:logTrain(train, step) -- If it is not time to log, return if os.difftime(os.time(), self.time) < self.interval then return end local message = 'Train step = '..train.step.. ', rate = '..string.format('%.2e', train.rate).. ', error = '..string.format('%.2e', train.error).. ', loss = '..string.format('%.2e', train.objective).. ', data = '..string.format('%.2e', train.time.data).. ', forward = '..string.format('%.2e', train.time.forward).. ', backward = '..string.format('%.2e', train.time.backward).. ', update = '..string.format('%.2e', train.time.update) if self.debug then message = message.. ', input = ['..string.format("%.2e",train.input:min()).. ' '..string.format("%.2e",train.input:max()).. ' '..string.format("%.2e",train.input:mean()).. ' '..string.format("%.2e",train.input:std())..']'.. ', params = ['..string.format("%.2e",train.params:min()).. ' '..string.format("%.2e",train.params:max()).. ' '..string.format("%.2e",train.params:mean()).. ' '..string.format("%.2e",train.params:std())..']'.. ', grads = ['..string.format("%.2e",train.grads:min()).. ' '..string.format("%.2e",train.grads:max()).. ' '..string.format("%.2e",train.grads:mean()).. ' '..string.format("%.2e",train.grads:std())..']'.. ', state = ['..string.format("%.2e",train.state:min()).. ' '..string.format("%.2e",train.state:max()).. ' '..string.format("%.2e",train.state:mean()).. ' '..string.format("%.2e",train.state:std())..']' if self.visualize then self:visualizeModel() end end print(message) self.time = os.time() end -- Log testing function Driver:logTest(test) -- If it not time to log, return if os.difftime(os.time(), self.time) < self.interval then return end local message = 'Test count = '..test.total_count.. ', error = '..string.format('%.2e', test.error).. ', loss = '..string.format('%.2e', test.objective).. ', total_error = '..string.format('%.2e', test.total_error).. ', total_loss = '..string.format('%.2e', test.total_objective).. ', data = '..string.format('%.2e', test.time.data).. ', forward = '..string.format('%.2e', test.time.forward).. ', update = '..string.format('%.2e', test.time.update) if self.debug then message = message.. ', input = ['..string.format("%.2e",test.input:min()).. ' '..string.format("%.2e",test.input:max()).. ' '..string.format("%.2e",test.input:mean()).. ' '..string.format("%.2e",test.input:std())..']' end print(message) self.time = os.time() end return Driver ================================================ FILE: glyphnet/main.lua ================================================ --[[ Main program for GlyphNet training Copyright 2015 Xiang Zhang --]] local torch = require('torch') local Driver = require('driver') -- A Logic Named Joe local joe = {} function joe.main(arg) -- Load the configuration local config = dofile('config.lua') -- Build parameter table based on configuration local params = joe.buildArgumentTable(config) -- Parse arguments based on configuration config = joe.parseArguments(arg, params, config) -- Build the driver local driver = Driver(config, config.driver) -- Start the driver driver:run() end function joe.buildArgumentTable(config, params, prefix) local params = params or {} local prefix = prefix or '' for key, val in pairs(config) do if type(key) == 'string' then local val_type = type(val) if val_type == 'string' or val_type == 'number' then params[prefix..key] = val elseif val_type == 'boolean' then params[prefix..key] = tostring(val) elseif val_type == 'table' then params = joe.buildArgumentTable(val, params, prefix..key..'_') else print('Joe argument '..prefix..key..' type unsupported') end else print('Joe argument key '..prefix..tostring(key)..' not a string') end end return params end function joe.parseArguments(arg, params, config) local cmd = torch.CmdLine() for key, val in pairs(params) do cmd:option('-'..key, val) end local parsed = cmd:parse(arg) return joe.parseArgumentTable(config, parsed) end function joe.parseArgumentTable(config, params, prefix) local prefix = prefix or '' for key, val in pairs(config) do if type(key) == 'string' then local val_type = type(val) if val_type == 'string' or val_type == 'number' then config[key] = params[prefix..key] or val elseif val_type == 'boolean' then if params[prefix..key] == 'true' then config[key] = true elseif params[prefix..key] == 'false' then config[key] = false else error('Argument '..prefix..key..' must be true or false') end elseif val_type == 'table' then config[key] = joe.parseArgumentTable(val, params, prefix..key..'_') end end end return config end -- Call the main program joe.main(arg) ================================================ FILE: glyphnet/model.lua ================================================ --[[ Model for GlyphNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local cudnn local nn = require('nn') local torch = require('torch') local Modules = require('modules') local Model = class() -- Model constructor -- config: configuration table -- .spatial: configuration table of the spatial network -- .temporal: configuration table of the temporal network -- .file: (optional) the model file -- .cudnn: (optional) whether to use NVidia cudnn -- .group: (optional) number of spatial network groups function Model:_init(config) -- Read or create model if config.file then local model = torch.load(config.file) self.spatial = self:makeCleanSequential(model.spatial) self.temporal = self:makeCleanSequential(model.temporal) else self.spatial = self:createCleanSequential(config.spatial) self.temporal = self:createCleanSequential(config.temporal) self:initSequential(self.spatial) self:initSequential(self.temporal) end -- Saving configurations self.cudnn = config.cudnn self.config = config self.tensortype = torch.getdefaulttensortype() -- Initialize intermediate values self.feature = torch.Tensor() self.feature_cache = torch.Tensor() self.grad_feature = torch.Tensor() self.grad_input = torch.Tensor() -- Initialize groups self:initGroup(config.group) end function Model:initGroup(group) local group = group or 1 -- Clean current network group if self.group then self.group = nil collectgarbage() end -- Create new group self.group = {} for i = 1, group do self.group[i] = self.spatial:clone( 'weight', 'bias', 'gradWeight', 'gradBias') end end function Model:forward(input) -- Do forward propagation for spatial model group local input_group = input:view( #self.group, -1, 1, input:size(3), input:size(4)) local feature = self.group[1]:forward(input_group[1]) self.feature_cache:resize(#self.group, feature:size(1), feature:size(2)) self.feature_cache[1]:copy(feature) for i = 2, #self.group do local feature = self.group[i]:forward(input_group[i]) self.feature_cache[i]:copy(feature) end -- Do forward propagation for temporal model self.feature:resize( input:size(1), self.feature_cache:size(3), input:size(2)):copy( self.feature_cache:view( input:size(1), input:size(2), self.feature_cache:size(3)):transpose( 2, 3)) self.output = self.temporal:forward(self.feature) return self.output end function Model:backward(input, grad_output) -- Do backward propagation for temporal model local grad_feature = self.temporal:backward(self.feature, grad_output) self.grad_feature:resizeAs(self.feature_cache):view( input:size(1), input:size(2), self.feature_cache:size(3)):copy( grad_feature:transpose(2, 3)):div(input:size(2)) -- Do backward propagation for spatial model group local input_group = input:view( #self.group, -1, 1, input:size(3), input:size(4)) self.grad_input:resizeAs(input) local grad_input_group = self.grad_input:view( #self.group, -1, 1, input:size(3), input:size(4)) for i = 1, #self.group do local grad_input = self.group[i]:backward( input_group[i], self.grad_feature[i]) grad_input_group[i]:copy(grad_input) end return self.grad_input end function Model:getParameters() local parameters, gradients = nn.Module.getParameters(self) self:initGroup(#self.group) return parameters, gradients end function Model:parameters() local parameters, gradients = {}, {} local spatial_parameters, spatial_gradients = self.spatial:parameters() for i = 1, #spatial_parameters do parameters[#parameters + 1] = spatial_parameters[i] gradients[#gradients + 1] = spatial_gradients[i] end local temporal_parameters, temporal_gradients = self.temporal:parameters() for i = 1, #temporal_parameters do parameters[#parameters + 1] = temporal_parameters[i] gradients[#gradients + 1] = temporal_gradients[i] end return parameters, gradients end function Model:type(tensortype) if tensortype ~= nil and tensortype ~= self.tensortype then if tensortype == 'torch.CudaTensor' then require('cunn') self.spatial = self:makeCudaSequential(self.spatial) self.temporal = self:makeCudaSequential(self.temporal) else self.spatial = self:makeCleanSequential(self.spatial) self.temporal = self:makeCleanSequential(self.temporal) end self.spatial:type(tensortype) self.temporal:type(tensortype) self.feature = self.feature:type(tensortype) self.feature_cache = self.feature_cache:type(tensortype) self.grad_feature = self.grad_feature:type(tensortype) self.grad_input = self.grad_input:type(tensortype) self.tensortype = tensortype self:initGroup(#self.group) end return self.tensortype end function Model:cuda() return self:type('torch.CudaTensor') end function Model:double() return self:type('torch.DoubleTensor') end function Model:float() return self:type('torch.FloatTensor') end function Model:setMode(mode) self:setModeSequential(self.temporal, mode) self:setModeSequential(self.spatial, mode) for i = 1, #self.group do self:setModeSequential(self.group[i], mode) end end function Model:setModeTrain() self:setMode('train') end function Model:setModeTest() self:setMode('test') end function Model:save(file) local spatial = self:clearSequential( self:makeCleanSequential(self.spatial)) local temporal = self:clearSequential( self:makeCleanSequential(self.temporal)) torch.save(file, {spatial = spatial, temporal = temporal}) end -- Clear sequential model function Model:clearSequential(sequential) local function recursiveClear(key, param) local param = param if torch.type(param) == 'table' then for k, v in pairs(param) do param[k] = recursiveClear(k, v) end elseif torch.isTensor(param) and key ~= 'weight' and key ~= 'bias' then param = param.new() end return param end for _, m in ipairs(sequential.modules) do for k, v in pairs(m) do m[k] = recursiveClear(k, v) end end return sequential end -- Initialize sequential using microsoft initialization function Model:initSequential(sequential) for _, m in ipairs(sequential.modules) do self.initModule[torch.type(m)](self, m) end end -- Setting the mode of sequential modules function Model:setModeSequential(sequential, mode) for _, m in ipairs(sequential.modules) do self.setModeModule[mode][torch.type(m)](self, m) end end -- Create a clean sequential function Model:createCleanSequential(config) local new = nn.Sequential() for _, m in ipairs(config) do new:add(self.createCleanModule[m.name](self, m)) end return new end -- Make a clean sequential function Model:makeCleanSequential(sequential) local new = nn.Sequential() for _, m in ipairs(sequential.modules) do new:add(self.makeCleanModule[torch.type(m)](self, m)) end return new end -- Make a CUDA sequential function Model:makeCudaSequential(sequential) if self.cudnn then cudnn = require('cudnn') end local new = nn.Sequential() for _, m in ipairs(sequential.modules) do new:add(self.makeCudaModule[torch.type(m)](self, m)) end return new end -- Initialize modules Model.initModule = {} Model.initModule['nn.LogSoftMax'] = function (self, m) end Model.initModule['nn.Threshold'] = function (self, m) end Model.initModule['nn.Reshape'] = function (self, m) end Model.initModule['nn.Dropout'] = function (self, m) end Model.initModule['nn.Linear'] = function (self, m) m.bias:zero() m.weight:normal(0, math.sqrt(2 / m.weight:size(1))) end Model.initModule['nn.SpatialConvolution'] = function (self, m) m.bias:zero() m.weight:normal( 0, math.sqrt(2 / m.weight:size(1) / m.weight:size(3) / m.weight:size(4))) end Model.initModule['nn.SpatialMaxPooling'] = function (self, m) end Model.initModule['nn.TemporalConvolutionMM'] = function (self, m) m.bias:zero() m.weight:normal(0, math.sqrt(2 / m.weight:size(1) / m.weight:size(3))) end Model.initModule['nn.TemporalMaxPoolingMM'] = function (self, m) end -- Set module mode to train Model.setModeModule = {} Model.setModeModule['train'] = {} Model.setModeModule['train']['nn.LogSoftMax'] = function (self, m) end Model.setModeModule['train']['cudnn.LogSoftMax'] = Model.setModeModule['train']['nn.LogSoftMax'] Model.setModeModule['train']['nn.Threshold'] = function (self, m) end Model.setModeModule['train']['nn.Reshape'] = function (self, m) end Model.setModeModule['train']['nn.Dropout'] = function (self, m) m.train = true end Model.setModeModule['train']['nn.Linear'] = function (self, m) end Model.setModeModule['train']['nn.SpatialConvolution'] = function (self, m) end Model.setModeModule['train']['cudnn.SpatialConvolution'] = Model.setModeModule['train']['nn.SpatialConvolution'] Model.setModeModule['train']['nn.SpatialMaxPooling'] = function (self, m) end Model.setModeModule['train']['cudnn.SpatialMaxPooling'] = Model.setModeModule['train']['nn.SpatialMaxPooling'] Model.setModeModule['train']['nn.TemporalConvolutionMM'] = function (self, m) end Model.setModeModule['train']['cudnn.TemporalConvolutionCudnn'] = function (self, m) end Model.setModeModule['train']['nn.TemporalMaxPoolingMM'] = function (self, m) end Model.setModeModule['train']['cudnn.TemporalMaxPoolingCudnn'] = Model.setModeModule['train']['nn.TemporalMaxPoolingMM'] -- Set module mode to test Model.setModeModule['test'] = {} Model.setModeModule['test']['nn.LogSoftMax'] = function (self, m) end Model.setModeModule['test']['cudnn.LogSoftMax'] = Model.setModeModule['test']['nn.LogSoftMax'] Model.setModeModule['test']['nn.Threshold'] = function (self, m) end Model.setModeModule['test']['nn.Reshape'] = function (self, m) end Model.setModeModule['test']['nn.Dropout'] = function (self, m) m.train = false end Model.setModeModule['test']['nn.Linear'] = function (self, m) end Model.setModeModule['test']['nn.SpatialConvolution'] = function (self, m) end Model.setModeModule['test']['cudnn.SpatialConvolution'] = Model.setModeModule['test']['nn.SpatialConvolution'] Model.setModeModule['test']['nn.SpatialMaxPooling'] = function (self, m) end Model.setModeModule['test']['cudnn.SpatialMaxPooling'] = Model.setModeModule['test']['nn.SpatialMaxPooling'] Model.setModeModule['test']['nn.TemporalConvolutionMM'] = function (self, m) end Model.setModeModule['test']['cudnn.TemporalConvolutionCudnn'] = function (self, m) end Model.setModeModule['test']['nn.TemporalMaxPoolingMM'] = function (self, m) end Model.setModeModule['test']['cudnn.TemporalMaxPoolingCudnn'] = Model.setModeModule['test']['nn.TemporalMaxPoolingMM'] -- Create clean modules Model.createCleanModule = {} Model.createCleanModule['nn.LogSoftMax'] = function (self, m) return nn.LogSoftMax() end Model.createCleanModule['nn.Threshold'] = function (self, m) return nn.Threshold(m.th, m.v, m.ip) end Model.createCleanModule['nn.Reshape'] = function (self, m) return nn.Reshape(m.size, m.batchMode) end Model.createCleanModule['nn.Dropout'] = function (self, m) return nn.Dropout(m.p, not m.v2, m.inplace) end Model.createCleanModule['nn.Linear'] = function (self, m) return nn.Linear(m.inputSize, m.outputSize, m.bias) end Model.createCleanModule['nn.SpatialConvolution'] = function (self, m) return nn.SpatialConvolution( m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) end Model.createCleanModule['nn.SpatialMaxPooling'] = function (self, m) return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) end Model.createCleanModule['nn.TemporalConvolutionMM'] = function (self, m) return nn.TemporalConvolutionMM( m.inputFrameSize, m.outputFrameSize, m.kW, m.dW, m.padW) end Model.createCleanModule['nn.TemporalMaxPoolingMM'] = function (self, m) return nn.TemporalMaxPoolingMM(m.kW, m.dW) end -- Make clean modules Model.makeCleanModule = {} Model.makeCleanModule['nn.LogSoftMax'] = function (self, m) return nn.LogSoftMax() end Model.makeCleanModule['cudnn.LogSoftMax'] = Model.makeCleanModule['nn.LogSoftMax'] Model.makeCleanModule['nn.Threshold'] = function (self, m) return nn.Threshold(m.threshold, m.val, m.inplace) end Model.makeCleanModule['nn.Reshape'] = function (self, m) return nn.Reshape(m.size, m.batchMode) end Model.makeCleanModule['nn.Dropout'] = function (self, m) return nn.Dropout(m.p, not m.v2, m.inplace) end Model.makeCleanModule['nn.Linear'] = function (self, m) local new = nn.Linear(m.weight:size(2), m.weight:size(1), m.bias) new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCleanModule['nn.SpatialConvolution'] = function (self, m) local new = nn.SpatialConvolution( m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCleanModule['cudnn.SpatialConvolution'] = Model.makeCleanModule['nn.SpatialConvolution'] Model.makeCleanModule['nn.SpatialMaxPooling'] = function (self, m) return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) end Model.makeCleanModule['cudnn.SpatialMaxPooling'] = Model.makeCleanModule['nn.SpatialMaxPooling'] Model.makeCleanModule['nn.TemporalConvolutionMM'] = function (self, m) local new = nn.TemporalConvolutionMM( m.input_feature, m.output_feature, m.kernel, m.stride, m.pad) new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCleanModule['cudnn.TemporalConvolutionCudnn'] = function (self, m) local new = nn.TemporalConvolutionMM( m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW) new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCleanModule['nn.TemporalMaxPoolingMM'] = function (self, m) return nn.TemporalMaxPoolingMM(m.kW, m.dW) end Model.makeCleanModule['cudnn.TemporalMaxPoolingCudnn'] = Model.makeCleanModule['nn.TemporalMaxPoolingMM'] -- Make CUDA modules Model.makeCudaModule = {} Model.makeCudaModule['nn.LogSoftMax'] = function (self, m) if self.cudnn and cudnn.LogSoftMax then return cudnn.LogSoftMax() else return nn.LogSoftMax() end end Model.makeCudaModule['cudnn.LogSoftMax'] = Model.makeCudaModule['nn.LogSoftMax'] Model.makeCudaModule['nn.Threshold'] = function (self, m) return nn.Threshold(m.threshold, m.val, m.inplace) end Model.makeCudaModule['nn.Reshape'] = function (self, m) return nn.Reshape(m.size, m.batchMode) end Model.makeCudaModule['nn.Dropout'] = function (self, m) return nn.Dropout(m.p, not m.v2, m.inplace) end Model.makeCudaModule['nn.Linear'] = function (self, m) local new = nn.Linear(m.weight:size(2), m.weight:size(1), m.bias) new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCudaModule['nn.SpatialConvolution'] = function (self, m) local new if self.cudnn then new = cudnn.SpatialConvolution( m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) else new = nn.SpatialConvolution( m.nInputPlane, m.nOutputPlane, m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) end new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCudaModule['cudnn.SpatialConvolution'] = Model.makeCudaModule['nn.SpatialConvolution'] Model.makeCudaModule['nn.SpatialMaxPooling'] = function (self, m) if self.cudnn then return cudnn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) else return nn.SpatialMaxPooling(m.kW, m.kH, m.dW, m.dH, m.padW, m.padH) end end Model.makeCudaModule['cudnn.SpatialMaxPooling'] = Model.makeCudaModule['nn.SpatialMaxPooling'] Model.makeCudaModule['nn.TemporalConvolutionMM'] = function (self, m) local new if self.cudnn then new = cudnn.TemporalConvolutionCudnn( m.input_feature, m.output_feature, m.kernel, m.stride, m.pad) else new = nn.TemporalConvolutionMM( m.input_feature, m.output_feature, m.kernel, m.stride, m.pad) end new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCudaModule['cudnn.TemporalConvolutionCudnn'] = function (self, m) local new if self.cudnn then new = cudnn.TemporalConvolutionCudnn( m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW) else new = nn.TemporalConvolutionMM( m.nInputPlane, m.nOutputPlane, m.kW, m.dW, m.padW) end new.weight:copy(m.weight) new.bias:copy(m.bias) return new end Model.makeCudaModule['nn.TemporalMaxPoolingMM'] = function (self, m) if self.cudnn then return cudnn.TemporalMaxPoolingCudnn(m.kW, m.dW) else return nn.TemporalMaxPoolingMM(m.kW, m.dW) end end Model.makeCudaModule['cudnn.TemporalMaxPoolingCudnn'] = Model.makeCudaModule['nn.TemporalMaxPoolingMM'] return Model ================================================ FILE: glyphnet/modules/TemporalConvolutionCudnn.lua ================================================ --[[ Temporal max pooling module .with data order consistent with MM Copyright 2016 Xiang Zhang --]] local TemporalConvolutionCudnn, parent = torch.class('cudnn.TemporalConvolutionCudnn', 'cudnn.SpatialConvolution') function TemporalConvolutionCudnn:__init( input_feature, output_feature, kW, dW, padW) parent.__init(self, input_feature, output_feature, kW, 1, dW, 1, padW, 0) end function TemporalConvolutionCudnn:updateOutput(input) local input_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) end local output = parent.updateOutput(self, input_view) if input:dim() ~= output:dim() then if input:dim() == 2 then self.output = output:view(output:size(1), output:size(3)) else self.output = output:view(output:size(1), output:size(2), output:size(4)) end end return self.output end function TemporalConvolutionCudnn:updateGradInput(input, grad_output) local input_view local grad_output_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) grad_output_view = grad_output:view( grad_output:size(1), 1, grad_output:size(2)) self.output = self.output:view( self.output:size(1), 1, self.output:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) grad_output_view = grad_output:view( grad_output:size(1), grad_output:size(2), 1, grad_output:size(3)) self.output = self.output:view( self.output:size(1), self.output:size(2), 1, self.output:size(3)) end local grad_input = parent.updateGradInput(self, input_view, grad_output_view) if self.gradInput:dim() ~= input:dim() then if input:dim() == 2 then self.output = self.output:view( self.output:size(1), self.output:size(3)) self.gradInput = grad_input:view(grad_input:size(1), grad_input:size(3)) else self.output = self.output:view( self.output:size(1), self.output:size(2), self.output:size(4)) self.gradInput = grad_input:view( grad_input:size(1), grad_input:size(2), grad_input:size(4)) end end return self.gradInput end function TemporalConvolutionCudnn:accGradParameters(input, grad_output, scale) local input_view local grad_output_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) grad_output_view = grad_output:view( grad_output:size(1), 1, grad_output:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) grad_output_view = grad_output:view( grad_output:size(1), grad_output:size(2), 1, grad_output:size(3)) end parent.accGradParameters(self, input_view, grad_output_view, scale) end function TemporalConvolutionCudnn:__tostring__() return string.format( '%s(%d -> %d, %d, %d, %d)', torch.type(self), self.nInputPlane, self.nOutputPlane, self.kW, self.dW, self.padW) end ================================================ FILE: glyphnet/modules/TemporalConvolutionMM.lua ================================================ --[[ Temporal convolution module that supports padding Copyright 2016 Xiang Zhang --]] local TemporalConvolutionMM, parent = torch.class('nn.TemporalConvolutionMM', 'nn.Module') function TemporalConvolutionMM:__init( input_feature, output_feature, kernel, stride, pad) parent.__init(self) self.input_feature = input_feature self.output_feature = output_feature self.kernel = kernel self.stride = stride or 1 self.pad = pad or 0 self.weight = torch.Tensor(output_feature, input_feature, kernel) self.bias = torch.Tensor(output_feature) self.gradWeight = torch.Tensor(output_feature, input_feature, kernel) self.gradBias = torch.Tensor(output_feature) self.pad_cache = torch.Tensor() self.unfold_cache = torch.Tensor() self.interlace_cache = torch.Tensor() self.weight_cache = torch.Tensor( self.weight:size(2), self.weight:size(1), self.weight:size(3)) self.reverse_index = torch.LongTensor(self.kernel) for i = 1, self.kernel do self.reverse_index[i] = self.kernel - i + 1 end self:reset() end function TemporalConvolutionMM:reset(stdv) if stdv then stdv = stdv * math.sqrt(3) else stdv = 1/math.sqrt(self.kernel * self.input_feature) end self.weight:uniform(-stdv, stdv) self.bias:uniform(-stdv, stdv) end function TemporalConvolutionMM:updateOutput(input) if input:dim() ~= 2 and input:dim() ~= 3 then error('Input dimension must be 2 or 3') end -- Create temporary input cache that is to be unfolded if input:dim() == 2 then self.pad_cache:resize( input:size(1), input:size(2) + 2 * self.pad):zero():narrow( 2, self.pad + 1, input:size(2)):copy(input) else self.pad_cache:resize( input:size(1), input:size(2), input:size(3) + 2 * self.pad):zero():narrow( 3, self.pad + 1, input:size(3)):copy(input) end -- Unfold the input cache local unfolded = self.pad_cache:unfold( self.pad_cache:dim(), self.kernel, self.stride):transpose( self.pad_cache:dim(), self.pad_cache:dim() + 1) self.unfold_cache:resizeAs(unfolded):copy(unfolded) -- Do matrix multiplication if input:dim() == 2 then self.output:resize( self.output_feature, self.unfold_cache:size(3)):copy( self.bias:view(-1, 1):expandAs(self.output)) self.output:addmm( 1, self.output, 1, self.weight:view(self.weight:size(1), -1), self.unfold_cache:view(-1, self.unfold_cache:size(3))) else self.output:resize( self.unfold_cache:size(1), self.output_feature, self.unfold_cache:size(4)):copy( self.bias:view(1, -1, 1):expandAs(self.output)) local weight = self.weight:view( 1, self.weight:size(1), self.weight:size(2) * self.weight:size(3)):expand( self.unfold_cache:size(1), self.weight:size(1), self.weight:size(2) * self.weight:size(3)) self.output:baddbmm( 1, self.output, 1, weight, self.unfold_cache:view( self.unfold_cache:size(1), -1, self.unfold_cache:size(4))) end return self.output end function TemporalConvolutionMM:updateGradInput(input, grad_output) -- Reverse the weight on the kernel dimension self.weight_cache:indexCopy( 3, self.reverse_index, self.weight:transpose(1, 2)) -- Resize the initialize the interlace cache if input:dim() == 2 then self.interlace_cache:resize( grad_output:size(1), self.stride * (grad_output:size(2) - 1) + 1):zero() self.interlace_cache:narrow( 2, 1, self.interlace_cache:size(2) - 1):unfold( 2, self.stride, self.stride):select(3, 1):copy( grad_output:narrow(2, 1, grad_output:size(2) - 1)) self.interlace_cache:select(2, self.interlace_cache:size(2)):copy( grad_output:select(2, grad_output:size(2))) else self.interlace_cache:resize( grad_output:size(1), grad_output:size(2), self.stride * (grad_output:size(3) - 1) + 1):zero() self.interlace_cache:narrow( 3, 1, self.interlace_cache:size(3) - 1):unfold( 3, self.stride, self.stride):select(4, 1):copy( grad_output:narrow(3, 1, grad_output:size(3) - 1)) self.interlace_cache:select(3, self.interlace_cache:size(3)):copy( grad_output:select(3, grad_output:size(3))) end -- Resize and initialize the padded cache if input:dim() == 2 then self.pad_cache:resize( grad_output:size(1), input:size(2) + self.kernel - 1) local length = math.min( self.pad_cache:size(2), self.interlace_cache:size(2)) self.pad_cache:zero():narrow( 2, (self.pad_cache:size(2) - length) / 2 + 1, length):copy( self.interlace_cache:narrow( 2, (self.interlace_cache:size(2) - length) / 2 + 1, length)) else self.pad_cache:resize( grad_output:size(1), grad_output:size(2), input:size(3) + self.kernel - 1) local length = math.min( self.pad_cache:size(3), self.interlace_cache:size(3)) self.pad_cache:zero():narrow( 3, (self.pad_cache:size(3) - length) / 2 + 1, length):copy( self.interlace_cache:narrow( 3, (self.interlace_cache:size(3) - length) / 2 + 1, length)) end -- Unfold the output cache local unfolded = self.pad_cache:unfold( self.pad_cache:dim(), self.kernel, 1):transpose( self.pad_cache:dim(), self.pad_cache:dim() + 1) self.unfold_cache:resizeAs(unfolded):copy(unfolded) -- Do matrix multiplication self.gradInput:resizeAs(input):zero() if input:dim() == 2 then self.gradInput:addmm( 1, self.gradInput, 1, self.weight_cache:view(self.weight:size(2), -1), self.unfold_cache:view(-1, self.unfold_cache:size(3))) else local weight = self.weight_cache:view( 1, self.weight:size(2), self.weight:size(1) * self.weight:size(3)):expand( unfolded:size(1), self.weight:size(2), self.weight:size(1) * self.weight:size(3)) self.gradInput:baddbmm( 1, self.gradInput, 1, weight, self.unfold_cache:view( self.unfold_cache:size(1), -1, self.unfold_cache:size(4))) end return self.gradInput end function TemporalConvolutionMM:accGradParameters(input, grad_output, scale) local scale = scale or 1 -- Create temporary input cache that is to be unfolded if input:dim() == 2 then self.pad_cache:resize( input:size(1), input:size(2) + 2 * self.pad):zero():narrow( 2, self.pad + 1, input:size(2)):copy(input) else self.pad_cache:resize( input:size(1), input:size(2), input:size(3) + 2 * self.pad):zero():narrow( 3, self.pad + 1, input:size(3)):copy(input) end -- Unfold the input cache local unfolded = self.pad_cache:unfold( self.pad_cache:dim(), self.kernel, self.stride):transpose( self.pad_cache:dim() - 1, self.pad_cache:dim()) self.unfold_cache:resizeAs(unfolded):copy(unfolded) -- Do matrix multiplication local grad_weight = self.gradWeight:view(self.weight:size(1), -1) if input:dim() == 2 then grad_weight:addmm( 1, grad_weight, scale, grad_output, self.unfold_cache:view(unfolded:size(1), -1)) self.gradBias:add(scale, grad_output:sum(2)) else if grad_weight.addbmm then grad_weight:addbmm( 1, grad_weight, scale, grad_output, self.unfold_cache:view( self.unfold_cache:size(1), self.unfold_cache:size(2), -1)) else for i = 1, grad_output:size(1) do grad_weight:addmm( 1, grad_weight, scale, grad_output:select(1, i), self.unfold_cache:select(1, i):view( self.unfold_cache:size(2), -1)) end end self.gradBias:add(scale, grad_output:sum(3):sum(1)) end end TemporalConvolutionMM.sharedAccUpdateGradParameters = TemporalConvolutionMM.accUpdateGradParameters function TemporalConvolutionMM:__tostring__() return string.format( '%s(%d -> %d, %d, %d, %d)', torch.type(self), self.input_feature, self.output_feature, self.kernel, self.stride, self.pad) end ================================================ FILE: glyphnet/modules/TemporalMaxPoolingCudnn.lua ================================================ --[[ Temporal max pooling module with data order consistent with MM Copyright 2016 Xiang Zhang --]] local TemporalMaxPoolingCudnn, parent = torch.class('cudnn.TemporalMaxPoolingCudnn', 'cudnn.SpatialMaxPooling') function TemporalMaxPoolingCudnn:__init(kW, dW, padW) parent.__init(self, kW, 1, dW, 1, padW, 0) end function TemporalMaxPoolingCudnn:updateOutput(input) local input_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) end local output = parent.updateOutput(self, input_view) if self.output:dim() ~= input:dim() then if input:dim() == 2 then self.output = output:view(output:size(1), output:size(3)) else self.output = output:view(output:size(1), output:size(2), output:size(4)) end end return self.output end function TemporalMaxPoolingCudnn:updateGradInput(input, grad_output) local input_view local grad_output_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) grad_output_view = grad_output:view( grad_output:size(1), 1, grad_output:size(2)) self.output = self.output:view( self.output:size(1), 1, self.output:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) grad_output_view = grad_output:view( grad_output:size(1), grad_output:size(2), 1, grad_output:size(3)) self.output = self.output:view( self.output:size(1), self.output:size(2), 1, self.output:size(3)) end local grad_input = parent.updateGradInput(self, input_view, grad_output_view) if self.gradInput:dim() ~= input:dim() then if input:dim() == 2 then self.output = self.output:view( self.utput:size(1), self.output:size(3)) self.gradInput = grad_input:view( grad_input:size(1), grad_input:size(3)) else self.output = self.output:view( self.output:size(1), self.output:size(2), self.output:size(4)) self.gradInput = grad_input:view( grad_input:size(1), grad_input:size(2), grad_input:size(4)) end end return self.gradInput end function TemporalMaxPoolingCudnn:__tostring__() return string.format('%s(%d, %d)', torch.type(self), self.kW, self.dW) end ================================================ FILE: glyphnet/modules/TemporalMaxPoolingMM.lua ================================================ --[[ Temporal max pooling module with data order consistent with MM Copyright 2016 Xiang Zhang --]] local TemporalMaxPoolingMM, parent = torch.class('nn.TemporalMaxPoolingMM', 'nn.SpatialMaxPooling') function TemporalMaxPoolingMM:__init(kW, dW) parent.__init(self, kW, 1, dW, 1) end function TemporalMaxPoolingMM:updateOutput(input) local input_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) end local output = parent.updateOutput(self, input_view) if input:dim() == 2 then self.output = output:view(output:size(1), output:size(3)) else self.output = output:view(output:size(1), output:size(2), output:size(4)) end return self.output end function TemporalMaxPoolingMM:updateGradInput(input, grad_output) local input_view local grad_output_view if input:dim() == 2 then input_view = input:view(input:size(1), 1, input:size(2)) grad_output_view = grad_output:view( grad_output:size(1), 1, grad_output:size(2)) self.output = self.output:view( self.output:size(1), 1, self.output:size(2)) else input_view = input:view(input:size(1), input:size(2), 1, input:size(3)) grad_output_view = grad_output:view( grad_output:size(1), grad_output:size(2), 1, grad_output:size(3)) self.output = self.output:view( self.output:size(1), self.output:size(2), 1, self.output:size(3)) end local grad_input = parent.updateGradInput(self, input_view, grad_output_view) if input:dim() == 2 then self.output = self.output:view( self.utput:size(1), self.output:size(3)) self.gradInput = grad_input:view(grad_input:size(1), grad_input:size(3)) else self.output = self.output:view( self.output:size(1), self.output:size(2), self.output:size(4)) self.gradInput = grad_input:view( grad_input:size(1), grad_input:size(2), grad_input:size(4)) end return self.gradInput end function TemporalMaxPoolingMM:__tostring__() return string.format('%s(%d, %d)', torch.type(self), self.kW, self.dW) end ================================================ FILE: glyphnet/modules.lua ================================================ --[[ Additional modules for GlyphNet Copyright 2016 Xiang Zhang --]] local status, cudnn = pcall(require, 'cudnn') local nn = require('nn') -- nn.TemporalConvolutionMM if not nn.TemporalConvolutionMM then dofile('modules/TemporalConvolutionMM.lua') end -- nn.TemporalMaxPoolingMM if not nn.TemporalMaxPoolingMM then dofile('modules/TemporalMaxPoolingMM.lua') end -- cudnn.TemporalConvolutionCudnn if status == true and not cudnn.TemporalMaxPoolingCudnn then dofile('modules/TemporalConvolutionCudnn.lua') end -- cudnn.TemporalMaxPoolingCudnn if status == true and not cudnn.TemporalMaxPoolingCudnn then dofile('modules/TemporalMaxPoolingCudnn.lua') end return nn ================================================ FILE: glyphnet/scroll.lua ================================================ --[[ The schollable UI Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local Scroll = class() -- Initialize a scroll interface -- width: (optional) the pixel width of the scollable area. Default is 600. -- title: (optional) title for the window function Scroll:_init(width,title) require('qtuiloader') require('qtwidget') require('qttorch') self.file = 'scroll.ui' self.win = qtuiloader.load(self.file) self.frame = self.win.frame self.painter = qt.QtLuaPainter(self.frame) self.width = width or 600 self.height = 0 self.fontSize = 15 self.x = 0 self.y = 0 self.border = 1 self:resize(self.width, self.height) self:setFontSize(self.fontSize) if title then self:setTitle(title) end self:show() end -- Resize the window to designated width and height function Scroll:resize(width, height) self.width = width or self.width self.height = height or self.height self.frame.size = qt.QSize{width = self.width,height = self.height} end -- Set the text width function Scroll:setFontSize(size) self.painter:setfontsize(size or 15) self.fontSize = size end -- Set border width function Scroll:setBorder(width) self.border = width end -- Draw text function Scroll:drawText(text) -- Drawing text must happen on a new line if self.x ~= 0 then self.x = 0 self.y = self.height end -- Determine height and resize if necessary if self.height < self.y+self.fontSize+1 then self:resize(self.width,self.y+self.fontSize+1+self.border) end -- Draw the yellow main text self.painter:gbegin() self.painter:moveto(self.x,self.y+self.fontSize-1) self.painter:setcolor(1,1,0,1) self.painter:show(text) self.painter:stroke() self.painter:gend() -- Draw the black shadow text self.painter:gbegin() self.painter:moveto(self.x,self.y+self.fontSize+1-1) self.painter:setcolor(0,0,0,1) self.painter:show(text) self.painter:stroke() self.painter:gend() -- Move the cursor to next line self.x = 0 if self.height < self.y+self.fontSize+1+self.border then self:resize(self.width,self.y+self.fontSize+1+self.border) end self.y = self.height end -- Draw image function Scroll:drawImage(im, scale) -- Get the image height and width local scale = scale or 1 local height, width if im:dim() == 2 then height = im:size(1) * scale width = im:size(2) * scale elseif im:dim() == 3 then height = im:size(2) * scale width = im:size(3) * scale else error("Image must be 2-dim or 3-dim data") end -- Determine whether a new line is needed if self.x ~= 0 and self.x + width > self.width then self.x = 0 self.y = self.height end -- Determine whether need to resize the document area if self.y + height > self.height then self:resize(self.width, self.y + height + self.border) end -- Draw the image self.painter:gbegin() self.painter:image(self.x, self.y, width, height, qt.QImage.fromTensor(im)) self.painter:stroke() self.painter:gend() -- Move the cursor self.x = self.x + width + self.border end -- Draw a new line function Scroll:drawEndOfLine() self.x = 0 self.y = self.height end -- Hint for heights function Scroll:hintImageHeight(im, scale) -- Get the image height and width local scale = scale or 1 local height, width if im:dim() == 2 then height = im:size(1) * scale width = im:size(2) * scale elseif im:dim() == 3 then height = im:size(2) * scale width = im:size(3) * scale else error("Image must be 2-dim or 3-dim data") end -- Determine whether a new line is needed if self.x ~= 0 and self.x + width > self.width then return self.height else return self.y end end -- Show the window function Scroll:show() self.win:show() end -- Hide the window function Scroll:hide() self.win:hide() end -- Save to file function Scroll:save(file) self.painter:write(file) end -- Set window title function Scroll:setTitle(title) self.win:setWindowTitle(title) end -- Reset the drawing area function Scroll:clear() self:resize(self.width,0) self.x = 0 self.y = 0 end return Scroll ================================================ FILE: glyphnet/scroll.ui ================================================ window 0 0 640 480 Scrollable Window 0 0 0 false 0 0 600 440 0 0 ================================================ FILE: glyphnet/test.lua ================================================ --[[ Tester for GlyphNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local torch = require('torch') local sys = require('sys') local Test = class() -- Constructor for Test -- data: the data object -- model: the model object -- loss: the loss object -- config: configuration table function Test:_init(data, model, loss, config) self.data = data self.model = model self.loss = loss self.time = {} end -- Run for all the data -- callback: (optional) a callback function to execute after each step function Test:run(callback) self.total_error = 0 self.total_objective = 0 self.total_count = 0 self.clock = sys.clock() for input, label, count in self.data:iterator() do self:runStep(input, label, count) if callback then callback(self) end self.clock = sys.clock() end end -- Run for one minibatch step function Test:runStep(input, label, count) -- Get a batch of data self.input_untyped, self.label_untyped = input, label self.input = self.input or self.input_untyped:type(self.model:type()) self.input:copy(self.input_untyped) self.label = self.label or self.label_untyped:type(self.model:type()) self.label:copy(self.label_untyped) self.count = count if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.data = sys.clock() - self.clock -- Forward propagation self.clock = sys.clock() self.output = self.model:forward(self.input) self.objective = self.loss:forward(self.output, self.label) if type(self.objective) ~= 'number' then self.objective = self.objectve[1] end self.max, self.decision = self.output:type( torch.getdefaulttensortype()):max(2) self.max = self.max:squeeze() self.decision = self.decision:squeeze():narrow(1, 1, count):type( torch.getdefaulttensortype()) self.error = torch.ne( self.decision, self.label_untyped:narrow(1, 1, count)):type( torch.getdefaulttensortype()):sum() / count if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.forward = sys.clock() - self.clock -- Update the results self.clock = sys.clock() self.total_objective = (self.total_objective * self.total_count + self.objective * count) / (self.total_count + count) self.total_error = (self.total_error * self.total_count + self.error * count) / (self.total_count + count) self.total_count = self.total_count + count self.time.update = sys.clock() - self.clock end return Test ================================================ FILE: glyphnet/train.lua ================================================ --[[ Trainer for GlyphNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local torch = require('torch') local sys = require('sys') local Train = class() -- Constructor for Train -- data: the data object -- model: the model object -- loss: the loss object -- config: configuration table function Train:_init(data, model, loss, config) self.data = data self.model = model self.loss = loss self.rates = config.rates or {1e-3} self.step = config.step or 0 self.momentum = config.momentum or 0 self.decay = config.decay or 0 self.recapture = config.recapture self.params, self.grads = self.model:getParameters() if config.state then self.state = config.state:type(self.model:type()) else self.state = self.grads:clone():zero() end -- Find current learning rate local max_step = 1 self.rate = self.rates[1] for step, rate in pairs(self.rates) do if step <= self.step and step > max_step then max_step = step self.rate = rate end end self.time = {} end -- Run for a number of steps -- steps: number of steps -- callback: (optional) a callback function to execute after each step function Train:run(steps, callback) if self.recapture then self.params, self.grads = self.model:getParameters() end for i = 1, steps do self.step = self.step + 1 self:runStep() if callback then callback(self, i) end end end -- Run for one minibatch step function Train:runStep() -- Get a batch of data/ self.clock = sys.clock() self.input_untyped, self.label_untyped = self.data:getBatch( self.input_untyped, self.label_untyped) self.input = self.input or self.input_untyped:type(self.model:type()) self.input:copy(self.input_untyped) self.label = self.label or self.label_untyped:type(self.model:type()) self.label:copy(self.label_untyped) if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.data = sys.clock() - self.clock -- Forward propagation self.clock = sys.clock() self.output = self.model:forward(self.input) self.objective = self.loss:forward(self.output, self.label) if type(self.objective) ~= 'number' then self.objective = self.objectve[1] end self.max, self.decision = self.output:type( torch.getdefaulttensortype()):max(2) self.max = self.max:squeeze() self.decision = self.decision:squeeze():type(torch.getdefaulttensortype()) self.error = torch.ne(self.decision, self.label_untyped):type( torch.getdefaulttensortype()):sum() / self.label:size(1) if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.forward = sys.clock() - self.clock -- Backward propagation self.clock = sys.clock() self.grads:zero() self.grad_output = self.loss:backward(self.output, self.label) self.grad_input = self.model:backward(self.input, self.grad_output) if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.backward = sys.clock() - self.clock -- Update the step self.clock = sys.clock() self:sgd() if self.model:type() == 'torch.CudaTensor' then cutorch.synchronize() end self.time.update = sys.clock() - self.clock end function Train:sgd() self.rate = self.rates[self.step] or self.rate if self.momentum and self.momentum > 0 then self.state:mul(self.momentum):add(self.grads:mul(-self.rate)) self.params:mul(1 - self.rate * self.decay):add(self.state) else self.params:mul(1 - self.rate * self.decay):add( self.grads:mul(-self.rate)) end end return Train ================================================ FILE: glyphnet/unittest/data.lua ================================================ --[[ Unit test for GlyphNet data program Copyright 2015-2016 Xiang Zhang --]] local Data = require('data') local image = require('image') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe.init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe.init() local config = {} config.file = 'data/dianping/test_code.t7b' config.unifont = 'unifont/unifont-8.0.01.t7b' config.length = 512 config.batch = 16 joe.config = config joe.data = Data(config) end function joe.getBatchTest() local data = joe.data local sample, label = data:getBatch() print('Size of sample: ') print(sample:size()) print('Size of label: ') print(label:size()) io.write('Labels:') for i = 1, label:size(1) do io.write(' ', label[i]) end io.write('\n') image.display{image = sample[1]:narrow(1, 1, 100), nrow = 10, zoom = 4} joe.sample = sample joe.label = label end function joe.iteratorTest() local data = joe.data local window local total = 0 for sample, label, count in data:iterator() do total = total + count io.write(total, ',', count, ':') for i = 1, count do window = image.display{ image = sample[1][1], nrow = 10, zoom = 4, win = window} io.write(' ', label[i]) end io.write('\n') io.flush() end end joe.main() return joe ================================================ FILE: glyphnet/unittest/driver.lua ================================================ --[[ Unit test for GlyphNet driver component Copyright 2016 Xiang Zhang --]] local Driver = require('driver') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Creating driver') config.train_data.file = 'data/dianping/unittest_code.t7b' config.test_data.file = 'data/dianping/unittest_code.t7b' config.driver.debug = true config.driver.device = 3 config.driver.steps = 10 config.driver.epoches = 30 config.driver.schedule = 4 config.driver.variation = 'small' config.driver.location = '/tmp' local driver = Driver(config, config.driver) self.config = config self.driver = driver end function joe:driverTest() local driver = self.driver print('Training schedule') for i, v in pairs(driver.options.train.rates) do print(i, v) end print('Testing driver') driver:run() end joe.main() return joe ================================================ FILE: glyphnet/unittest/model.lua ================================================ --[[ Unit test for GlyphNet model component Copyright 2015-2016 Xiang Zhang --]] local Model = require('model') local os = require('os') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') local model = Model(config.model) local parameters, gradients = model:getParameters() print('Parameter pointers: '..torch.pointer(parameters:storage())..' '.. torch.pointer(gradients:storage())) print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement()) self.config = config self.model = model self.parameters = parameters self.gradients = gradients self:printModel() end function joe:printModel(model) local model = model or self.model print('Created spatial model: ') print(model.spatial) print('Created temporal model: ') print(model.temporal) print('Spatial group pointers:') print(0, torch.pointer(model.spatial.modules[1].weight:storage()), torch.pointer(model.spatial.modules[1].gradWeight:storage())) for i, m in ipairs(model.group) do print(i, torch.pointer(m.modules[1].weight:storage()), torch.pointer(m.modules[1].gradWeight:storage())) end end function joe:forwardBackwardTest() local model = self.model print('Initializing input') local input = torch.rand(4, 512, 16, 16) print('Input size:') print(input:size()) print('Running forward propagation') local output = model:forward(input) print('Feature size:') print(model.feature:size()) print('Output size:') print(output:size()) print('Initializing output gradients') local grad_output = torch.rand(output:size()) print('Running backward propagation') local grad_input = model:backward(input, grad_output) print('Feature gradient size:') print(model.grad_feature:size()) self.input = input self.grad_input = grad_input self.output = output self.grad_output = grad_output end function joe:saveTest() local model = self.model local file = '/tmp/model.t7b' print('Saving to '..file) model:save(file) print('Model saved') local config = {} config.file = file config.cudnn = joe.config.model.cudnn config.group = joe.config.model.group print('Loading from '..file) model = Model(config) self:printModel(model) end function joe:modeTest() local model = self.model print('Setting to testing mode') model:setModeTest() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end print('Setting to training mode') model:setModeTrain() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/model_cuda.lua ================================================ --[[ Unit test for GlyphNet model component Copyright 2015-2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local os = require('os') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.cudnn = nil print('Changing device to '..config.driver.device) cutorch.setDevice(config.driver.device) local model = Model(config.model) model:cuda() local parameters, gradients = model:getParameters() print('Parameter pointers: '..torch.pointer(parameters:storage())..' '.. torch.pointer(gradients:storage())) print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement()) self.config = config self.model = model self.parameters = parameters self.gradients = gradients self:printModel() end function joe:printModel(model) local model = model or self.model print('Type of model: '..model:type()) print('Created spatial model: ') print(model.spatial) print('Created temporal model: ') print(model.temporal) print('Spatial group pointers:') print(0, torch.pointer(model.spatial.modules[1].weight:storage()), torch.pointer(model.spatial.modules[1].gradWeight:storage())) for i, m in ipairs(model.group) do print(i, torch.pointer(m.modules[1].weight:storage()), torch.pointer(m.modules[1].gradWeight:storage())) end end function joe:forwardBackwardTest() local model = self.model print('Initializing input') local input = torch.rand(16, 512, 16, 16):type(model:type()) print('Input size:') print(input:size()) print('Running forward propagation') cutorch.synchronize() sys.tic() local output = model:forward(input) cutorch.synchronize() sys.toc(true) print('Feature size:') print(model.feature:size()) print('Output size:') print(output:size()) print('Initializing output gradients') local grad_output = torch.rand(output:size()):type(model:type()) print('Running backward propagation') cutorch.synchronize() sys.tic() local grad_input = model:backward(input, grad_output) cutorch.synchronize() sys.toc(true) print('Feature gradient size:') print(model.grad_feature:size()) self.input = input self.grad_input = grad_input self.output = output self.grad_output = grad_output end function joe:saveTest() local model = self.model local file = '/tmp/model.t7b' print('Saving to '..file) model:save(file) print('Model saved') local config = {} config.file = file config.cudnn = joe.config.model.cudnn config.group = joe.config.model.group print('Loading from '..file) model = Model(config) self:printModel(model) end function joe:modeTest() local model = self.model print('Setting to testing mode') model:setModeTest() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end print('Setting to training mode') model:setModeTrain() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/model_cudnn.lua ================================================ --[[ Unit test for GlyphNet model component Copyright 2015-2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local os = require('os') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.cudnn = true print('Changing device to '..config.driver.device) cutorch.setDevice(config.driver.device) local model = Model(config.model) model:cuda() local parameters, gradients = model:getParameters() print('Parameter pointers: '..torch.pointer(parameters:storage())..' '.. torch.pointer(gradients:storage())) print('Parameter sizes: '..parameters:nElement()..' '..gradients:nElement()) self.config = config self.model = model self.parameters = parameters self.gradients = gradients self:printModel() end function joe:printModel(model) local model = model or self.model print('Type of model: '..model:type()) print('Created spatial model: ') print(model.spatial) print('Created temporal model: ') print(model.temporal) print('Spatial group pointers:') print(0, torch.pointer(model.spatial.modules[1].weight:storage()), torch.pointer(model.spatial.modules[1].gradWeight:storage())) for i, m in ipairs(model.group) do print(i, torch.pointer(m.modules[1].weight:storage()), torch.pointer(m.modules[1].gradWeight:storage())) end end function joe:forwardBackwardTest() local model = self.model print('Initializing input') local input = torch.rand(16, 512, 16, 16):type(model:type()) print('Input size:') print(input:size()) print('Running forward propagation') cutorch.synchronize() sys.tic() local output = model:forward(input) cutorch.synchronize() sys.toc(true) print('Feature size:') print(model.feature:size()) print('Output size:') print(output:size()) print('Initializing output gradients') local grad_output = torch.rand(output:size()):type(model:type()) print('Running backward propagation') cutorch.synchronize() sys.tic() local grad_input = model:backward(input, grad_output) cutorch.synchronize() sys.toc(true) print('Feature gradient size:') print(model.grad_feature:size()) self.input = input self.grad_input = grad_input self.output = output self.grad_output = grad_output end function joe:saveTest() local model = self.model local file = '/tmp/model.t7b' print('Saving to '..file) model:save(file) print('Model saved') local config = {} config.file = file config.cudnn = joe.config.model.cudnn config.group = joe.config.model.group print('Loading from '..file) model = Model(config) self:printModel(model) end function joe:modeTest() local model = self.model print('Setting to testing mode') model:setModeTest() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end print('Setting to training mode') model:setModeTrain() print('Temporal mode:') for i, m in ipairs(model.temporal.modules) do print(i, torch.type(m), m.train) end print('Spatial mode:') for i, m in ipairs(model.spatial.modules) do print(i, torch.type(m), m.train) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/modules_temporal.lua ================================================ --[[ Unit test for modules Copyright 2016 Xiang Zhang --]] local nn = require('modules') local cunn = require('cunn') local cutorch = require('cutorch') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe.init(joe) end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local device = 1 cutorch.setDevice(device) print('Device set to '..device) self.jacobian = nn.Jacobian end function joe:noBatchCPU(kernel, stride, pad) local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = nn.TemporalConvolutionMM( input_feature, output_feature, kernel, stride, pad) print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(input_feature, input_length) print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros(input_feature, input_length + 2 * pad) input_pad:narrow(2, pad + 1, input_length):copy(input) for i = 1, output:size(2) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 2, input_begin, kernel):contiguous():view( 1, input_feature, kernel):expand(output_feature, input_feature, kernel) local output_slice = torch.cmul( temporal.weight, input_chunk):sum(3):sum(2):squeeze() output_slice:add(1, temporal.bias:viewAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(2, i)):abs():mean()) end local grad_output = torch.rand(output:size()) print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( output_feature, input_length + kernel - 1):zero() local interlace_length = stride * (grad_output:size(2) - 1) + 1 local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2 for i = 1, grad_output:size(2) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(2) then grad_output_pad:select(2, grad_output_pad_begin):copy( grad_output:select(2, i)) end end local weight_reverse = torch.Tensor(temporal.weight:size()) local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, temporal.weight) for i = 1, grad_input:size(2) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 2, grad_output_pad_begin, kernel):contiguous():view( output_feature, 1, kernel):expand( output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean()) end local input_unfold = input_pad:unfold(2, kernel, stride) for i = 1, temporal.weight:size(3) do local grad_weight_slice = torch.mm( grad_output, input_unfold:select(3, i):transpose(1, 2)) print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, temporal.gradWeight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(2) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) local jacobian = self.jacobian local err = jacobian.testJacobian(temporal, input) print('Error of jacobian test: '..err) local err = jacobian.testJacobianParameters( temporal, input, temporal.weight, temporal.gradWeight) print('Error of jacobian test for weight: '..err) local err = jacobian.testJacobianParameters( temporal, input, temporal.bias, temporal.gradBias) print('Error of jacobian test for bias: '..err) local err = jacobian.testJacobianUpdateParameters( temporal, input, temporal.weight) print('Error of jacobian test for weight update: '..err) local err = jacobian.testJacobianUpdateParameters( temporal, input, temporal.bias) print('Error of jacobian test for bias update: '..err) for t,err in pairs( jacobian.testAllUpdate(temporal, input, 'weight', 'gradWeight')) do print('Error of jacobian test for '..t..' all update: '..err) end for t,err in pairs( jacobian.testAllUpdate(temporal, input, 'bias', 'gradBias')) do print('Error of jacobian test for '..t..' all update: '..err) end end function joe:noBatchCPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:noBatchCPU(kernel, stride, pad) end end end end function joe:batchCPU(kernel, stride, pad) local batch = 4 local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = nn.TemporalConvolutionMM( input_feature, output_feature, kernel, stride, pad) print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(batch, input_feature, input_length) print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros(batch, input_feature, input_length + 2 * pad) input_pad:narrow(3, pad + 1, input_length):copy(input) local weight = temporal.weight:view( 1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) for i = 1, output:size(3) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 3, input_begin, kernel):contiguous():view( batch, 1, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) local output_slice = torch.cmul( weight, input_chunk):sum(4):sum(3):squeeze() output_slice:add( 1, temporal.bias:view(1, output_feature):expandAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(3, i)):abs():mean()) end local grad_output = torch.rand(output:size()) print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( batch, output_feature, input_length + kernel - 1):zero() local interlace_length = stride * (grad_output:size(3) - 1) + 1 local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2 for i = 1, grad_output:size(3) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(3) then grad_output_pad:select(3, grad_output_pad_begin):copy( grad_output:select(3, i)) end end local weight_reverse = torch.Tensor(temporal.weight:size()) local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, temporal.weight) for i = 1, grad_input:size(3) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 3, grad_output_pad_begin, kernel):contiguous():view( batch, output_feature, 1, kernel):expand( batch, output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse:view(1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel), grad_output_pad_chunk):sum(4):sum(2):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean()) end local input_unfold = input_pad:unfold(3, kernel, stride) for i = 1, temporal.weight:size(3) do local grad_weight_slice = torch.bmm( grad_output, input_unfold:select(4, i):transpose(2, 3)):sum( 1):squeeze() print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, temporal.gradWeight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(3):sum(1) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) local jacobian = self.jacobian local err = jacobian.testJacobian(temporal, input) print('Error of jacobian test: '..err) local err = jacobian.testJacobianParameters( temporal, input, temporal.weight, temporal.gradWeight) print('Error of jacobian test for weight: '..err) local err = jacobian.testJacobianParameters( temporal, input, temporal.bias, temporal.gradBias) print('Error of jacobian test for bias: '..err) local err = jacobian.testJacobianUpdateParameters( temporal, input, temporal.weight) print('Error of jacobian test for weight update: '..err) local err = jacobian.testJacobianUpdateParameters( temporal, input, temporal.bias) print('Error of jacobian test for bias update: '..err) for t,err in pairs( jacobian.testAllUpdate(temporal, input, 'weight', 'gradWeight')) do print('Error of jacobian test for '..t..' all update: '..err) end for t,err in pairs( jacobian.testAllUpdate(temporal, input, 'bias', 'gradBias')) do print('Error of jacobian test for '..t..' all update: '..err) end end function joe:batchCPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:batchCPU(kernel, stride, pad) end end end end function joe:noBatchGPU(kernel, stride, pad) local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = nn.TemporalConvolutionMM( input_feature, output_feature, kernel, stride, pad):cuda() print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(input_feature, input_length):cuda() print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros(input_feature, input_length + 2 * pad):cuda() input_pad:narrow(2, pad + 1, input_length):copy(input) for i = 1, output:size(2) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 2, input_begin, kernel):contiguous():view( 1, input_feature, kernel):expand(output_feature, input_feature, kernel) local output_slice = torch.cmul( temporal.weight, input_chunk):sum(3):sum(2):squeeze() output_slice:add(1, temporal.bias:viewAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(2, i)):abs():mean()) end local grad_output = torch.rand(output:size()):cuda() print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( output_feature, input_length + kernel - 1):zero():cuda() local interlace_length = stride * (grad_output:size(2) - 1) + 1 local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2 for i = 1, grad_output:size(2) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(2) then grad_output_pad:select(2, grad_output_pad_begin):copy( grad_output:select(2, i)) end end local weight_reverse = torch.Tensor(temporal.weight:size()):cuda() local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, temporal.weight) for i = 1, grad_input:size(2) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 2, grad_output_pad_begin, kernel):contiguous():view( output_feature, 1, kernel):expand( output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean()) end local input_unfold = input_pad:unfold(2, kernel, stride) for i = 1, temporal.weight:size(3) do local grad_weight_slice = torch.mm( grad_output, input_unfold:select(3, i):transpose(1, 2)) print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, temporal.gradWeight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(2) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) end function joe:noBatchGPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:noBatchGPU(kernel, stride, pad) end end end end function joe:batchGPU(kernel, stride, pad) local batch = 4 local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = nn.TemporalConvolutionMM( input_feature, output_feature, kernel, stride, pad):cuda() print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(batch, input_feature, input_length):cuda() print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros( batch, input_feature, input_length + 2 * pad):cuda() input_pad:narrow(3, pad + 1, input_length):copy(input) local weight = temporal.weight:view( 1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) for i = 1, output:size(3) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 3, input_begin, kernel):contiguous():view( batch, 1, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) local output_slice = torch.cmul( weight, input_chunk):sum(4):sum(3):squeeze() output_slice:add( 1, temporal.bias:view(1, output_feature):expandAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(3, i)):abs():mean()) end local grad_output = torch.rand(output:size()):cuda() print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( batch, output_feature, input_length + kernel - 1):zero():cuda() local interlace_length = stride * (grad_output:size(3) - 1) + 1 local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2 for i = 1, grad_output:size(3) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(3) then grad_output_pad:select(3, grad_output_pad_begin):copy( grad_output:select(3, i)) end end local weight_reverse = torch.Tensor(temporal.weight:size()):cuda() local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, temporal.weight) for i = 1, grad_input:size(3) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 3, grad_output_pad_begin, kernel):contiguous():view( batch, output_feature, 1, kernel):expand( batch, output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse:view(1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel), grad_output_pad_chunk):sum(4):sum(2):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean()) end local input_unfold = input_pad:unfold(3, kernel, stride) for i = 1, temporal.weight:size(3) do local grad_weight_slice = torch.bmm( grad_output, input_unfold:select(4, i):transpose(2, 3)):sum( 1):squeeze() print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, temporal.gradWeight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(3):sum(1) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) end function joe:batchGPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:batchGPU(kernel, stride, pad) end end end end joe.main() return joe ================================================ FILE: glyphnet/unittest/modules_temporal_cudnn.lua ================================================ --[[ Unit test for modules Copyright 2016 Xiang Zhang --]] local nn = require('modules') local cudnn = require('cudnn') local cunn = require('cunn') local cutorch = require('cutorch') local torch = require('torch') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe.init(joe) end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local device = 2 cutorch.setDevice(device) print('Device set to '..device) self.jacobian = nn.Jacobian end function joe:noBatchGPU(kernel, stride, pad) local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = cudnn.TemporalConvolutionCudnn( input_feature, output_feature, kernel, stride, pad):cuda() print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local weight = temporal.weight:view(output_feature, input_feature, kernel) local grad_weight = temporal.gradWeight:view( output_feature, input_feature, kernel) local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(input_feature, input_length):cuda() print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros(input_feature, input_length + 2 * pad):cuda() input_pad:narrow(2, pad + 1, input_length):copy(input) for i = 1, output:size(2) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 2, input_begin, kernel):contiguous():view( 1, input_feature, kernel):expand(output_feature, input_feature, kernel) local output_slice = torch.cmul( weight, input_chunk):sum(3):sum(2):squeeze() output_slice:add(1, temporal.bias:viewAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(2, i)):abs():mean()) end local grad_output = torch.rand(output:size()):cuda() print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( output_feature, input_length + kernel - 1):zero():cuda() local interlace_length = stride * (grad_output:size(2) - 1) + 1 local interlace_shift = (grad_output_pad:size(2) - interlace_length) / 2 for i = 1, grad_output:size(2) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(2) then grad_output_pad:select(2, grad_output_pad_begin):copy( grad_output:select(2, i)) end end local weight_reverse = torch.Tensor(weight:size()):cuda() local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, weight) for i = 1, grad_input:size(2) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 2, grad_output_pad_begin, kernel):contiguous():view( output_feature, 1, kernel):expand( output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse, grad_output_pad_chunk):sum(3):sum(1):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(2, i)):abs():mean()) end local input_unfold = input_pad:unfold(2, kernel, stride) for i = 1, weight:size(3) do local grad_weight_slice = torch.mm( grad_output, input_unfold:select(3, i):transpose(1, 2)) print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, grad_weight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(2) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) end function joe:noBatchGPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:noBatchGPU(kernel, stride, pad) end end end end function joe:batchGPU(kernel, stride, pad) local batch = 4 local input_feature = 2 local output_feature = 4 local kernel = kernel or 3 local stride = stride or 1 local pad = pad or 0 local temporal = nn.TemporalConvolutionMM( input_feature, output_feature, kernel, stride, pad):cuda() print('Created module: '..tostring(temporal)) temporal.gradWeight:zero() temporal.gradBias:zero() local temporal_weight = temporal.weight:view(input_feature, output_feature, kernel) local temporal_grad_weight = temporal.gradWeight:view( input_feature, output_feature, kernel) local output_length = 16 local input_length = (output_length - 1) * stride + kernel - 2 * pad local input = torch.rand(batch, input_feature, input_length):cuda() print('Input size:') print(input:size()) print('Executing forward propagation') local output = temporal:forward(input) print('Output size: ') print(output:size()) local input_pad = torch.zeros( batch, input_feature, input_length + 2 * pad):cuda() input_pad:narrow(3, pad + 1, input_length):copy(input) local weight = temporal_weight:view( 1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) for i = 1, output:size(3) do local input_begin = (i - 1) * stride + 1 local input_chunk = input_pad:narrow( 3, input_begin, kernel):contiguous():view( batch, 1, input_feature, kernel):expand( batch, output_feature, input_feature, kernel) local output_slice = torch.cmul( weight, input_chunk):sum(4):sum(3):squeeze() output_slice:add( 1, temporal.bias:view(1, output_feature):expandAs(output_slice)) print('Error of output slice '..i..': '.. output_slice:add(-1, output:select(3, i)):abs():mean()) end local grad_output = torch.rand(output:size()):cuda() print('Executing backward propagation') local grad_input = temporal:backward(input, grad_output) print('Input gradient size: ') print(grad_input:size()) local grad_output_pad = torch.Tensor( batch, output_feature, input_length + kernel - 1):zero():cuda() local interlace_length = stride * (grad_output:size(3) - 1) + 1 local interlace_shift = (grad_output_pad:size(3) - interlace_length) / 2 for i = 1, grad_output:size(3) do local grad_output_pad_begin = (i - 1) * stride + 1 + interlace_shift if grad_output_pad_begin >= 1 and grad_output_pad_begin <= grad_output_pad:size(3) then grad_output_pad:select(3, grad_output_pad_begin):copy( grad_output:select(3, i)) end end local weight_reverse = torch.Tensor(temporal_weight:size()):cuda() local weight_index = torch.LongTensor(kernel) for i = 1, weight_index:size(1) do weight_index[i] = kernel - i + 1 end weight_reverse:indexCopy(3, weight_index, temporal_weight) for i = 1, grad_input:size(3) do local grad_output_pad_begin = i local grad_output_pad_chunk = grad_output_pad:narrow( 3, grad_output_pad_begin, kernel):contiguous():view( batch, output_feature, 1, kernel):expand( batch, output_feature, input_feature, kernel) local grad_input_slice = torch.cmul( weight_reverse:view(1, output_feature, input_feature, kernel):expand( batch, output_feature, input_feature, kernel), grad_output_pad_chunk):sum(4):sum(2):squeeze() print('Error of input gradient slice '..i..': '.. grad_input_slice:add(-1, grad_input:select(3, i)):abs():mean()) end local input_unfold = input_pad:unfold(3, kernel, stride) for i = 1, temporal_weight:size(3) do local grad_weight_slice = torch.bmm( grad_output, input_unfold:select(4, i):transpose(2, 3)):sum( 1):squeeze() print('Error of weight gradient slice '..i..': '..grad_weight_slice:add( -1, temporal_grad_weight:select(3, i)):abs():mean()) end local grad_bias = grad_output:sum(3):sum(1) print('Error of bias gradient: '..grad_bias:add( -1, temporal.gradBias):abs():mean()) end function joe:batchGPUTest() for _, kernel in ipairs({3, 5}) do for _, stride in ipairs({1, 2, 3, 5}) do for _, pad in ipairs({0, 1, 2, 3, 5}) do self:batchGPU(kernel, stride, pad) end end end end joe.main() return joe ================================================ FILE: glyphnet/unittest/test.lua ================================================ --[[ Unit test for GlyphNet test component Copyright 2015-2016 Xiang Zhang --]] local Test = require('test') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') local data = Data(config.test_data) print('Create model') local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/test_cuda.lua ================================================ --[[ Unit test for GlyphNet test component Copyright 2015-2016 Xiang Zhang --]] local Test = require('test') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') local data = Data(config.test_data) print('Create model') local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/train.lua ================================================ --[[ Unit test for GlyphNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') local data = Data(config.test_data) print('Create model') local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create trainer') config.train.rates[4] = 1e-5 local train = Train(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 10 steps') train:run(10, callback) end function joe:callback() return function (train, i) print('stp: '..train.step..', rat: '..train.rate.. ', obj: '..train.objective..', dat: '..train.time.data.. ', fwd: '..train.time.forward..', bwd: '..train.time.backward.. ', upd: '..train.time.update) end end joe.main() return joe ================================================ FILE: glyphnet/unittest/train_cuda.lua ================================================ --[[ Unit test for GlyphNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') local data = Data(config.test_data) print('Create model') local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create trainer') config.train.rates[79] = 1e-5 config.train.rates[85] = config.train.rates[1] local train = Train(data, model, loss, config.train) print('pmn: '..train.params:mean()..', psd: '..train.params:std().. ', gmn: '..train.grads:mean()..', gsd: '..train.grads:std().. ', smn: '..train.state:mean()..', ssd: '..train.state:std()) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 100 steps') train:run(100, callback) end function joe:callback() return function (train, i) print('stp: '..train.step..', rat: '..train.rate..', err: '..train.error.. ', obj: '..train.objective..', dat: '..train.time.data.. ', fwd: '..train.time.forward..', bwd: '..train.time.backward.. ', upd: '..train.time.update..', pmn: '..train.params:mean().. ', psd: '..train.params:std()..', gmn: '..train.grads:mean().. ', gsd: '..train.grads:std()..', smn: '..train.state:mean().. ', ssd: '..train.state:std()) end end joe.main() return joe ================================================ FILE: glyphnet/visualizer.lua ================================================ --[[ Visualization module for glyphnet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local torch = require('torch') local Scroll = require('scroll') local Visualizer = class() -- Constructor -- config: configuration table -- .width: (optional) width of scrollable window -- .scale: (optional) scale of visualizing weights -- .title: (optional) title of the scrollable window -- .height: (optional) maximum height of visualization for a module function Visualizer:_init(config) local config = config or {} local config = config or {} self.width = config.width or 800 self.scale = config.scale or 4 self.title = config.title or "Visualizer" self.height = config.height or 64 self.win = Scroll(self.width, self.title) end -- Save wrapper function Visualizer:save(...) return self.win:save(...) end -- Visualize the weights of a sequential model -- model: the sequential model function Visualizer:drawSequential(model) self.win:clear() for i, m in ipairs(model.modules) do self.win:drawText(tostring(i)..": "..tostring(m)) if self.drawModule[torch.type(m)] then self.drawModule[torch.type(m)](self, m) end end end -- Draw an image with height hints function Visualizer:drawImage(im, y_zero, max, min) local win = self.win local y = win:hintImageHeight(im, self.scale) if y - y_zero > self.height then return false end local max = max or im:max() local min = min or im:min() local normalized = torch.Tensor(im:size()):copy(im):add(-min) if max - min > 0 then normalized:div(max - min) end win:drawImage(normalized, self.scale) return true end -- A table for reading modules Visualizer.drawModule = {} Visualizer.drawModule['nn.Linear'] = function (self, m) local weight = m.weight local y_zero = self.win.y for i = 1, m.weight:size(1) do local w = weight[i]:view(1, weight:size(2)) if not self:drawImage(w, y_zero) then return end end self:drawImage(m.bias:view(1, m.bias:size(1)), y_zero) end Visualizer.drawModule['nn.SpatialConvolution'] = function (self, m) local weight = m.weight:view(m.nOutputPlane, m.nInputPlane, m.kH, m.kW) local height = m.kH local width = m.kW local y_zero = self.win.y local max = weight:max() local min = weight:min() if m.nInputPlane == 3 then for i = 1, m.nOutputPlane do local w = weight[i] if not self:drawImage(w, y_zero, max, min) then return end end else for i = 1, m.nOutputPlane do for j = 1, m.nInputPlane do local w = weight[i][j] if not self:drawImage(w, y_zero, max, min) then return end end end end self:drawImage(m.bias:view(1, m.nOutputPlane), y_zero) end Visualizer.drawModule['nn.SpatialConvolutionMM'] = Visualizer.drawModule['nn.SpatialConvolution'] Visualizer.drawModule['cudnn.SpatialConvolution'] = Visualizer.drawModule['nn.SpatialConvolution'] Visualizer.drawModule['nn.TemporalConvolutionMM'] = function (self, m) local weight = m.weight:view(m.output_feature, m.input_feature, m.kernel) local y_zero = self.win.y local max = weight:max() local min = weight:min() for i = 1, m.output_feature do local w = weight[i]:transpose(2, 1) if not self:drawImage(w, y_zero, max, min) then return end end end Visualizer.drawModule['cudnn.TemporalConvolutionCudnn'] = function (self, m) local weight = m.weight:view(m.nOutputPlane, m.nInputPlane, m.kW) local y_zero = self.win.y local max = weight:max() local min = weight:min() for i = 1, m.nOutputPlane do local w = weight[i]:transpose(2, 1) if not self:drawImage(w, y_zero, max, min) then return end end end return Visualizer ================================================ FILE: linearnet/archive/11stbinary_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/charbag -train_data_file data/11st/sentiment/binary_train_charbag.t7b -test_data_file data/11st/sentiment/binary_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/charbagtfidf -train_data_file data/11st/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/chargram -train_data_file data/11st/sentiment/binary_train_chargram.t7b -test_data_file data/11st/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stbinary_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/chargramtfidf -train_data_file data/11st/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordbag -train_data_file data/11st/sentiment/binary_train_wordbag.t7b -test_data_file data/11st/sentiment/binary_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordbagroman -train_data_file data/11st/sentiment/binary_train_rr_wordbag.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordbagtfidf -train_data_file data/11st/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordbagtfidfroman -train_data_file data/11st/sentiment/binary_train_rr_wordbagtfidf.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordgram -train_data_file data/11st/sentiment/binary_train_wordgram.t7b -test_data_file data/11st/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordgramroman -train_data_file data/11st/sentiment/binary_train_rr_wordgram.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordgramtfidf -train_data_file data/11st/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stbinary_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stbinary/wordgramtfidfroman -train_data_file data/11st/sentiment/binary_train_rr_wordgramtfidf.t7b -test_data_file data/11st/sentiment/binary_test_rr_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/charbag -train_data_file data/11st/sentiment/full_train_charbag.t7b -test_data_file data/11st/sentiment/full_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/charbagtfidf -train_data_file data/11st/sentiment/full_train_charbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/chargram -train_data_file data/11st/sentiment/full_train_chargram.t7b -test_data_file data/11st/sentiment/full_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/chargramtfidf -train_data_file data/11st/sentiment/full_train_chargramtfidf.t7b -test_data_file data/11st/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordbag -train_data_file data/11st/sentiment/full_train_wordbag.t7b -test_data_file data/11st/sentiment/full_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordbagroman -train_data_file data/11st/sentiment/full_train_rr_wordbag.t7b -test_data_file data/11st/sentiment/full_test_rr_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordbagtfidf -train_data_file data/11st/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordbagtfidfroman -train_data_file data/11st/sentiment/full_train_rr_wordbagtfidf.t7b -test_data_file data/11st/sentiment/full_test_rr_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/11stfull_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordgram -train_data_file data/11st/sentiment/full_train_wordgram.t7b -test_data_file data/11st/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordgramroman -train_data_file data/11st/sentiment/full_train_rr_wordgram.t7b -test_data_file data/11st/sentiment/full_test_rr_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordgramtfidf -train_data_file data/11st/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/11st/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/11stfull_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/11stfull/wordgramtfidfroman -train_data_file data/11st/sentiment/full_train_rr_wordgramtfidf.t7b -test_data_file data/11st/sentiment/full_test_rr_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonbinary_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/charbag -train_data_file data/amazon/binary_train_charbag.t7b -test_data_file data/amazon/binary_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/amazonbinary_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/charbagtfidf -train_data_file data/amazon/binary_train_charbagtfidf.t7b -test_data_file data/amazon/binary_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/amazonbinary_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/chargram -train_data_file data/amazon/binary_train_chargram.t7b -test_data_file data/amazon/binary_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonbinary_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/chargramtfidf -train_data_file data/amazon/binary_train_chargramtfidf.t7b -test_data_file data/amazon/binary_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonbinary_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/wordbag -train_data_file data/amazon/binary_train_wordbag.t7b -test_data_file data/amazon/binary_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/amazonbinary_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/wordbagtfidf -train_data_file data/amazon/binary_train_wordbagtfidf.t7b -test_data_file data/amazon/binary_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/amazonbinary_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/wordgram -train_data_file data/amazon/binary_train_wordgram.t7b -test_data_file data/amazon/binary_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonbinary_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonbinary/wordgramtfidf -train_data_file data/amazon/binary_train_wordgramtfidf.t7b -test_data_file data/amazon/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonfull_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/charbag -train_data_file data/amazon/full_train_charbag.t7b -test_data_file data/amazon/full_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/amazonfull_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/charbagtfidf -train_data_file data/amazon/full_train_charbagtfidf.t7b -test_data_file data/amazon/full_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/amazonfull_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/chargram -train_data_file data/amazon/full_train_chargram.t7b -test_data_file data/amazon/full_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonfull_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/chargramtfidf -train_data_file data/amazon/full_train_chargramtfidf.t7b -test_data_file data/amazon/full_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonfull_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/wordbag -train_data_file data/amazon/full_train_wordbag.t7b -test_data_file data/amazon/full_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/amazonfull_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/wordbagtfidf -train_data_file data/amazon/full_train_wordbagtfidf.t7b -test_data_file data/amazon/full_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/amazonfull_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/wordgram -train_data_file data/amazon/full_train_wordgram.t7b -test_data_file data/amazon/full_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/amazonfull_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/amazonfull/wordgramtfidf -train_data_file data/amazon/full_train_wordgramtfidf.t7b -test_data_file data/amazon/full_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/charbag -train_data_file data/chinanews/topic/train_charbag.t7b -test_data_file data/chinanews/topic/test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/charbagtfidf -train_data_file data/chinanews/topic/train_charbagtfidf.t7b -test_data_file data/chinanews/topic/test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/chargram -train_data_file data/chinanews/topic/train_chargram.t7b -test_data_file data/chinanews/topic/test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/chargramtfidf -train_data_file data/chinanews/topic/train_chargramtfidf.t7b -test_data_file data/chinanews/topic/test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordbag -train_data_file data/chinanews/topic/train_wordbag.t7b -test_data_file data/chinanews/topic/test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordbagroman -train_data_file data/chinanews/topic/train_pinyin_wordbag.t7b -test_data_file data/chinanews/topic/test_pinyin_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordbagtfidf -train_data_file data/chinanews/topic/train_wordbagtfidf.t7b -test_data_file data/chinanews/topic/test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordbagtfidfroman -train_data_file data/chinanews/topic/train_pinyin_wordbagtfidf.t7b -test_data_file data/chinanews/topic/test_pinyin_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/chinanews_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordgram -train_data_file data/chinanews/topic/train_wordgram.t7b -test_data_file data/chinanews/topic/test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordgramroman -train_data_file data/chinanews/topic/train_pinyin_wordgram.t7b -test_data_file data/chinanews/topic/test_pinyin_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordgramtfidf -train_data_file data/chinanews/topic/train_wordgramtfidf.t7b -test_data_file data/chinanews/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/chinanews_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/chinanews/wordgramtfidfroman -train_data_file data/chinanews/topic/train_pinyin_wordgramtfidf.t7b -test_data_file data/chinanews/topic/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua "$@"; ================================================ FILE: linearnet/archive/dianping_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/charbagtfidf -train_data_file data/dianping/train_charbagtfidf.t7b -test_data_file data/dianping/test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/dianping_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/chargram -train_data_file data/dianping/train_chargram.t7b -test_data_file data/dianping/test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/chargramtfidf -train_data_file data/dianping/train_chargramtfidf.t7b -test_data_file data/dianping/test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordbag -train_data_file data/dianping/train_wordbag.t7b -test_data_file data/dianping/test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/dianping_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordbagroman -train_data_file data/dianping/train_pinyin_wordbag.t7b -test_data_file data/dianping/test_pinyin_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/dianping_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordbagtfidf -train_data_file data/dianping/train_wordbagtfidf.t7b -test_data_file data/dianping/test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/dianping_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordbagtfidfroman -train_data_file data/dianping/train_pinyin_wordbagtfidf.t7b -test_data_file data/dianping/test_pinyin_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/dianping_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordgram -train_data_file data/dianping/train_wordgram.t7b -test_data_file data/dianping/test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordgramroman -train_data_file data/dianping/train_pinyin_wordgram.t7b -test_data_file data/dianping/test_pinyin_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordgramtfidf -train_data_file data/dianping/train_wordgramtfidf.t7b -test_data_file data/dianping/test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/dianping_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/dianping/wordgramtfidfroman -train_data_file data/dianping/train_pinyin_wordgramtfidf.t7b -test_data_file data/dianping/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/charbag -train_data_file data/ifeng/topic/train_charbag.t7b -test_data_file data/ifeng/topic/test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/charbagtfidf -train_data_file data/ifeng/topic/train_charbagtfidf.t7b -test_data_file data/ifeng/topic/test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/chargram -train_data_file data/ifeng/topic/train_chargram.t7b -test_data_file data/ifeng/topic/test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/chargramtfidf -train_data_file data/ifeng/topic/train_chargramtfidf.t7b -test_data_file data/ifeng/topic/test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordbag -train_data_file data/ifeng/topic/train_wordbag.t7b -test_data_file data/ifeng/topic/test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordbagroman -train_data_file data/ifeng/topic/train_pinyin_wordbag.t7b -test_data_file data/ifeng/topic/test_pinyin_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordbagtfidf -train_data_file data/ifeng/topic/train_wordbagtfidf.t7b -test_data_file data/ifeng/topic/test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordbagtfidfroman -train_data_file data/ifeng/topic/train_pinyin_wordbagtfidf.t7b -test_data_file data/ifeng/topic/test_pinyin_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/ifeng_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordgram -train_data_file data/ifeng/topic/train_wordgram.t7b -test_data_file data/ifeng/topic/test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordgramroman -train_data_file data/ifeng/topic/train_pinyin_wordgram.t7b -test_data_file data/ifeng/topic/test_pinyin_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordgramtfidf -train_data_file data/ifeng/topic/train_wordgramtfidf.t7b -test_data_file data/ifeng/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/ifeng_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/ifeng/wordgramtfidfroman -train_data_file data/ifeng/topic/train_pinyin_wordgramtfidf.t7b -test_data_file data/ifeng/topic/test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/charbag -train_data_file data/jd/sentiment/binary_train_charbag.t7b -test_data_file data/jd/sentiment/binary_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/charbagtfidf -train_data_file data/jd/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/chargram -train_data_file data/jd/sentiment/binary_train_chargram.t7b -test_data_file data/jd/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/chargramtfidf -train_data_file data/jd/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordbag -train_data_file data/jd/sentiment/binary_train_wordbag.t7b -test_data_file data/jd/sentiment/binary_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordbagroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordbag.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordbagtfidf -train_data_file data/jd/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordbagtfidfroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordbagtfidf.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordgram -train_data_file data/jd/sentiment/binary_train_wordgram.t7b -test_data_file data/jd/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordgramroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordgram.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordgramtfidf -train_data_file data/jd/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdbinary_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdbinary/wordgramtfidfroman -train_data_file data/jd/sentiment/binary_train_pinyin_wordgramtfidf.t7b -test_data_file data/jd/sentiment/binary_test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/charbag -train_data_file data/jd/sentiment/full_train_charbag.t7b -test_data_file data/jd/sentiment/full_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/charbagtfidf -train_data_file data/jd/sentiment/full_train_charbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/chargram -train_data_file data/jd/sentiment/full_train_chargram.t7b -test_data_file data/jd/sentiment/full_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/chargramtfidf -train_data_file data/jd/sentiment/full_train_chargramtfidf.t7b -test_data_file data/jd/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordbag -train_data_file data/jd/sentiment/full_train_wordbag.t7b -test_data_file data/jd/sentiment/full_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordbagroman -train_data_file data/jd/sentiment/full_train_pinyin_wordbag.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordbagtfidf -train_data_file data/jd/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordbagtfidfroman -train_data_file data/jd/sentiment/full_train_pinyin_wordbagtfidf.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jdfull_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordgram -train_data_file data/jd/sentiment/full_train_wordgram.t7b -test_data_file data/jd/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordgramroman -train_data_file data/jd/sentiment/full_train_pinyin_wordgram.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordgramtfidf -train_data_file data/jd/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/jd/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jdfull_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jdfull/wordgramtfidfroman -train_data_file data/jd/sentiment/full_train_pinyin_wordgramtfidf.t7b -test_data_file data/jd/sentiment/full_test_pinyin_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/charbag -train_data_file data/joint/binary_train_charbag.t7b -test_data_file data/joint/binary_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/charbagtfidf -train_data_file data/joint/binary_train_charbagtfidf.t7b -test_data_file data/joint/binary_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/chargram -train_data_file data/joint/binary_train_chargram.t7b -test_data_file data/joint/binary_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/chargramtfidf -train_data_file data/joint/binary_train_chargramtfidf.t7b -test_data_file data/joint/binary_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordbag -train_data_file data/joint/binary_train_wordbag.t7b -test_data_file data/joint/binary_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordbagroman -train_data_file data/joint/binary_train_roman_wordbag.t7b -test_data_file data/joint/binary_test_roman_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordbagtfidf -train_data_file data/joint/binary_train_wordbagtfidf.t7b -test_data_file data/joint/binary_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordbagtfidfroman -train_data_file data/joint/binary_train_roman_wordbagtfidf.t7b -test_data_file data/joint/binary_test_roman_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordgram -train_data_file data/joint/binary_train_wordgram.t7b -test_data_file data/joint/binary_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordgramroman -train_data_file data/joint/binary_train_roman_wordgram.t7b -test_data_file data/joint/binary_test_roman_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordgramtfidf -train_data_file data/joint/binary_train_wordgramtfidf.t7b -test_data_file data/joint/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointbinary_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointbinary/wordgramtfidfroman -train_data_file data/joint/binary_train_roman_wordgramtfidf.t7b -test_data_file data/joint/binary_test_roman_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/charbag -train_data_file data/joint/full_train_charbag.t7b -test_data_file data/joint/full_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/charbagtfidf -train_data_file data/joint/full_train_charbagtfidf.t7b -test_data_file data/joint/full_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/chargram -train_data_file data/joint/full_train_chargram.t7b -test_data_file data/joint/full_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/chargramtfidf -train_data_file data/joint/full_train_chargramtfidf.t7b -test_data_file data/joint/full_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordbag -train_data_file data/joint/full_train_wordbag.t7b -test_data_file data/joint/full_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordbagroman -train_data_file data/joint/full_train_roman_wordbag.t7b -test_data_file data/joint/full_test_roman_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordbagtfidf -train_data_file data/joint/full_train_wordbagtfidf.t7b -test_data_file data/joint/full_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordbagtfidfroman -train_data_file data/joint/full_train_roman_wordbagtfidf.t7b -test_data_file data/joint/full_test_roman_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/jointfull_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordgram -train_data_file data/joint/full_train_wordgram.t7b -test_data_file data/joint/full_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordgramroman -train_data_file data/joint/full_train_roman_wordgram.t7b -test_data_file data/joint/full_test_roman_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordgramtfidf -train_data_file data/joint/full_train_wordgramtfidf.t7b -test_data_file data/joint/full_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/jointfull_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/jointfull/wordgramromantfidf -train_data_file data/joint/full_train_roman_wordgramtfidf.t7b -test_data_file data/joint/full_test_roman_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/nytimes_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/charbag -train_data_file data/nytimes/topic/train_charbag.t7b -test_data_file data/nytimes/topic/test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/nytimes_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/charbagtfidf -train_data_file data/nytimes/topic/train_charbagtfidf.t7b -test_data_file data/nytimes/topic/test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/nytimes_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/chargram -train_data_file data/nytimes/topic/train_chargram.t7b -test_data_file data/nytimes/topic/test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/nytimes_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/chargramtfidf -train_data_file data/nytimes/topic/train_chargramtfidf.t7b -test_data_file data/nytimes/topic/test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/nytimes_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/wordbag -train_data_file data/nytimes/topic/train_wordbag.t7b -test_data_file data/nytimes/topic/test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/nytimes_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/wordbagtfidf -train_data_file data/nytimes/topic/train_wordbagtfidf.t7b -test_data_file data/nytimes/topic/test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/nytimes_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/wordgram -train_data_file data/nytimes/topic/train_wordgram.t7b -test_data_file data/nytimes/topic/test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/nytimes_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/nytimes/wordgramtfidf -train_data_file data/nytimes/topic/train_wordgramtfidf.t7b -test_data_file data/nytimes/topic/test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/charbag -train_data_file data/rakuten/sentiment/binary_train_charbag.t7b -test_data_file data/rakuten/sentiment/binary_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/charbagtfidf -train_data_file data/rakuten/sentiment/binary_train_charbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/chargram -train_data_file data/rakuten/sentiment/binary_train_chargram.t7b -test_data_file data/rakuten/sentiment/binary_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/chargramtfidf -train_data_file data/rakuten/sentiment/binary_train_chargramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordbag -train_data_file data/rakuten/sentiment/binary_train_wordbag.t7b -test_data_file data/rakuten/sentiment/binary_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordbagroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordbag.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordbagtfidf -train_data_file data/rakuten/sentiment/binary_train_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordbagtfidfroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordgram -train_data_file data/rakuten/sentiment/binary_train_wordgram.t7b -test_data_file data/rakuten/sentiment/binary_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordgramroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordgram.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordgramtfidf -train_data_file data/rakuten/sentiment/binary_train_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenbinary_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenbinary/wordgramtfidfroman -train_data_file data/rakuten/sentiment/binary_train_hepburn_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_charbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/charbag -train_data_file data/rakuten/sentiment/full_train_charbag.t7b -test_data_file data/rakuten/sentiment/full_test_charbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_charbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/charbagtfidf -train_data_file data/rakuten/sentiment/full_train_charbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_charbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_chargram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/chargram -train_data_file data/rakuten/sentiment/full_train_chargram.t7b -test_data_file data/rakuten/sentiment/full_test_chargram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_chargramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/chargramtfidf -train_data_file data/rakuten/sentiment/full_train_chargramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_chargramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordbag.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordbag -train_data_file data/rakuten/sentiment/full_train_wordbag.t7b -test_data_file data/rakuten/sentiment/full_test_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordbagroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordbagroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordbag.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordbag.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordbagtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordbagtfidf -train_data_file data/rakuten/sentiment/full_train_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordbagtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordbagtfidfroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordbagtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordbagtfidf.t7b "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordgram.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordgram -train_data_file data/rakuten/sentiment/full_train_wordgram.t7b -test_data_file data/rakuten/sentiment/full_test_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordgramroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordgramroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordgram.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordgram.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordgramtfidf.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordgramtfidf -train_data_file data/rakuten/sentiment/full_train_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/archive/rakutenfull_wordgramtfidfroman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; th main.lua -driver_location models/rakutenfull/wordgramtfidfroman -train_data_file data/rakuten/sentiment/full_train_hepburn_wordgramtfidf.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn_wordgramtfidf.t7b -model_size 1000001 "$@"; ================================================ FILE: linearnet/config.lua ================================================ --[[ Configuration for LinearNet Copyright 2016 Xiang Zhang --]] -- Name space local config = {} -- Training data configuration config.train_data = {} config.train_data.file = 'data/dianping/train_charbag.t7b' -- Testing data configuration config.test_data = {} config.test_data.file = 'data/dianping/test_charbag.t7b' -- Model configuration config.model = {} config.model.size = 200001 config.model.dimension = 2 config.model.decay = 1e-5 -- Trainer configuration config.train = {} config.train.rate = 1e-3 -- Tester configuration config.test = {} -- Driver configuration config.driver = {} config.driver.loss = 'nn.ClassNLLCriterion' config.driver.threads = 10 config.driver.buffer = 100 config.driver.steps = 100000 config.driver.epoches = 1000 config.driver.interval = 5 config.driver.location = 'models/dianping/charbag' config.driver.initialization = 1e-2 config.driver.plot = true config.driver.debug = false config.driver.resume = false return config ================================================ FILE: linearnet/data.lua ================================================ --[[ Data class for LinearNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local torch = require('torch') local Data = class() -- Constructor for Data -- config: configuration table -- .file: the data file location -- data_table: if present, will use the data_table instead of load from file function Data:_init(config, data_table) self.data = data_table or torch.load(config.file) end function Data:getClasses() return #self.data.bag end function Data:getSample(sample, label) local bag, bag_index, bag_value = self.data.bag, self.data.bag_index, self.data.bag_value -- Sample a non-empty example local class = torch.random(#bag) local item = torch.random(bag[class]:size(1)) while bag[class][item][2] == 0 do class = torch.random(#bag) item = torch.random(bag[class]:size(1)) end local start = bag[class][item][1] local length = bag[class][item][2] local sample = sample or torch.Tensor(bag[class][item][2] ,2) sample:resize(bag[class][item][2], 2) sample:select(2, 1):copy(bag_index:narrow(1, start, length)) sample:select(2, 2):copy(bag_value:narrow(1, start, length)) local label = label or torch.Tensor(1) label[1] = class return sample, label end -- Iterator function Data:iterator(sample, label) local bag, bag_index, bag_value = self.data.bag, self.data.bag_index, self.data.bag_value local sample = sample or torch.Tensor(1, 2) local label = label or torch.Tensor(1) local class = 1 local item = 0 local count = 0 return function() item = item + 1 if item > bag[class]:size(1) then class = class + 1 item = 1 if bag[class] == nil then return end end while bag[class][item][2] == 0 do item = item + 1 if item > bag[class]:size(1) then class = class + 1 item = 1 if bag[class] == nil then return end end end local start = bag[class][item][1] local length = bag[class][item][2] sample:resize(length, 2) sample:select(2, 1):copy(bag_index:narrow(1, start, length)) sample:select(2, 2):copy(bag_value:narrow(1, start, length)) label[1] = class return sample, label end end -- Get data table for share function Data:getTable() return self.data end return Data ================================================ FILE: linearnet/driver.lua ================================================ --[[ Driver for LinearNet using HogWILD! Copyright 2015 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local os = require('os') local paths = require('paths') local threads = require('threads') local torch = require('torch') local Data = require('data') local Model = require('model') local Qeueu = require('queue') local Train = require('train') local Test = require('test') -- Library configurations threads.serialization('threads.sharedserialize') local Driver = class() -- Constructor for driver -- options: configuration table for others -- config: configuration table -- .loss: the loss used for classification task -- .threads: number of threads -- .buffer: buffer size for RPC queues -- .steps: steps for each training run -- .epoches: number of testing epoches before stopping -- .interval: print time interval -- .location: save location -- .initialization: initialization parameter for model -- .plot: whether to plot the output -- .debug: whether to debug -- .resume: whether to resume function Driver:_init(options, config) local config = config or {} self.loss = config.loss or 'nn.ClassNLLCriterion' self.threads = config.threads or 10 self.buffer = config.buffer or 100 self.steps = config.steps or 100000 self.epoches = config.epoches or 1000 self.interval = config.interval or 5 self.location = config.location or '.' self.initialization = config.initialization or 1e-2 self.plot = config.plot self.debug = config.debug self.resume = config.resume self.options = options or {} self.config = config math.randomseed(os.time()) torch.manualSeed(os.time()) print('Driver loading training data') self.train_data = Data(self.options.train_data) print('Driver loading testing data') self.test_data = Data(self.options.test_data) self.options.model.dimension = self.train_data:getClasses() print('Driver changed model output dimension to '.. self.options.model.dimension) if self.resume then local record_file = paths.concat(self.location, 'record.t7b') print('Driver loading resumption record from '..record_file) self.record = torch.load(record_file) local model_file = paths.concat( self.location, 'model_'..#self.record..'.t7b') print('Driver loading model from '..model_file) self.model = Model(self.options.model) self.model:load(model_file) if self.record[#self.record].progress then if self.record[#self.record].progress:size(1) == self.threads then self.progress = self.record[#self.record].progress:clone() else print('Driver resumption number of threads change.') self.progress = torch.LongTensor(self.threads):zero() local total = self.record[#self.record].progress:sum() while self.progress:sum() < total do local thread = math.random(self.threads) self.progress[thread] = self.progress[thread] + self.steps end end else print('Driver resumption progress vector not found') self.progress = torch.LongTensor(self.threads):zero() end print('Driver progress = '..self.progress:sum()) for i = 1, #self.record do self:printResult(i) end if self.plot then self:plotRecord() end else self.record = {} print('Driver loading model') self.model = Model(self.options.model) print('Driver initializing model') self.model:reset(self.initialization) self.progress = torch.LongTensor(self.threads):zero() if self.plot then require('gnuplot') end end print('Driver loading tester for training data') self.train_test = Test( self.train_data, self.model, nn[self.loss:sub(4)](), self.options.test) print('Driver loading tester for testing data') self.test_test = Test( self.test_data, self.model, nn[self.loss:sub(4)](), self.options.test) print('Driver building RPC queues') self.master_queue = Queue(self.buffer) self.slave_queues = {} for i = 1, self.threads do self.slave_queues[i] = Queue(self.buffer) end print('Driver creating thread block') local init_thread = self:initThread() self.block = threads.Threads(self.threads, init_thread) self.block:specific(true) self.time = os.time() self.step = self.progress:sum() end -- Run the training process function Driver:run() self:deployThreads() local begin_epoch = #self.record + 1 local end_epoch = #self.record + self.epoches for i = begin_epoch, end_epoch do print('Driver testing on training data for epoch '..i) self.train_test:run(function (test, step) self:logTest(test, step) end) print('Driver testing on testing data for epoch '..i) self.test_test:run(function (test, step) self:logTest(test, step) end) self:save() self:printResult() if self.plot then self:plotRecord() end end for i = 1, self.threads do print('Driver sending RPC to exit thread '..i) self.slave_queues[i]:push{func = 'exit', arg = {}} end self.block:synchronize() self.block:terminate() end -- Deploy threads in sequential order to prevent io and memory jam function Driver:deployThreads() for i = 1, self.threads do print('Driver deploying job for threads '..i) local thread_job = self:threadJob(i) self.block:addjob(i, thread_job) local rpc = self.master_queue:pop() while rpc.func ~= 'notifyDeploy' do self[rpc.func](self, unpack(rpc.arg)) rpc = self.master_queue:pop() end print('Driver rpc = notifyDeploy, thread = '..rpc.arg[1]) end end -- Thread initialization callback function Driver:initThread() return function () local math = require('math') local nn = require('nn') local os = require('os') local torch = require('torch') local Queue = require('queue') math.randomseed(os.time() + __threadid) torch.manualSeed(os.time() + __threadid) end end -- Thread job callback function Driver:threadJob(id) local options = self.options local steps = self.steps local data_table = self.train_data:getTable() local modules = self.model:getModules() local loss = self.loss local master_queue = self.master_queue local slave_queue = self.slave_queues[id] local progress = self.progress[id] return function() local os = require('os') local nn = require('nn') local torch = require('torch') local Data = require('data') local Model = require('model') local Train = require('train') local train_data = Data(options.train_data, data_table) local model = Model(options.model, modules) options.train.step = progress local train = Train(train_data, model, nn[loss:sub(4)](), options.train) master_queue:push{func = 'notifyDeploy', arg = {__threadid}} local exit = false while not exit do train:run(steps) -- Tell main thread to update progress master_queue:push{ func = 'updateProgress', arg = {__threadid, train.step, train.objective}} -- Handle RPC requests from main thread local rpc = slave_queue:pop_async() while rpc do if rpc.func == 'exit' then exit = true end rpc = slave_queue:pop_async() end end end end -- Update progress function Driver:updateProgress(thread, step, objective) self.progress[thread] = step print('Driver rpc = updateProgress, thread = '..thread..', objective = '.. objective..', progress = '..self.progress[thread]..', total = '.. self.progress:sum()) end -- Log for testing function Driver:logTest(test, step) if os.difftime(os.time(), self.time) >= self.interval then local message = 'Test step = '..step.. ', total_error = '..test.total_error.. ', total_objective = '..test.total_objective.. ', label = '..test.label[1].. ', decision = '..test.decision[1] if self.debug then local weight = { weight = test.model.linear.weight, bias = test.model.linear.bias} for key, w in pairs(weight) do message = message..', '..key..':mean() = '..w:mean()..', '.. key..':std() = '..w:std() end end print(message) -- Handle rpc local rpc = self.master_queue:pop_async() while rpc do self[rpc.func](self, unpack(rpc.arg)) rpc = self.master_queue:pop_async() end self.time = os.time() end end -- Save for model function Driver:save(epoch) local epoch = epoch or #self.record + 1 -- Make a backup for the record print('Driver backing up record.t7b') local record_file = paths.concat(self.location, 'record.t7b') os.rename(record_file, record_file..'.backup') -- Save the new record print('Driver saving new records to '..record_file) self.record[epoch] = { train_loss = self.train_test.total_objective, test_loss = self.test_test.total_objective, train_error = self.train_test.total_error, test_error = self.test_test.total_error, progress = self.progress:clone() } torch.save(record_file, self.record) -- Save the model local model_file = paths.concat(self.location, 'model_'..epoch..'.t7b') print('Driver saving model to '..model_file) self.model:save(model_file) end -- Print current result function Driver:printResult(epoch) local epoch = epoch or #self.record print('Driver epoch = '..epoch.. ', train_error = '..self.record[epoch].train_error.. ', test_error = '..self.record[epoch].test_error.. ', train_loss = '..self.record[epoch].train_loss.. ', test_loss = '..self.record[epoch].test_loss) end -- Plot the record function Driver:plotRecord() require('gnuplot') self.error_figure = self.error_figure or gnuplot.figure() self.loss_figure = self.loss_figure or gnuplot.figure() local epoch = torch.linspace(1, #self.record, #self.record) local train_error = torch.Tensor(epoch:size()) local test_error = torch.Tensor(epoch:size()) local train_loss = torch.Tensor(epoch:size()) local test_loss = torch.Tensor(epoch:size()) for i = 1, #self.record do train_error[i] = self.record[i].train_error test_error[i] = self.record[i].test_error train_loss[i] = self.record[i].train_loss test_loss[i] = self.record[i].test_loss end gnuplot.figure(self.error_figure) gnuplot.plot({'Training error', epoch, train_error}, {'Testing error', epoch, test_error}) gnuplot.title('Training and testing error') gnuplot.figure(self.loss_figure) gnuplot.plot({'Training loss', epoch, train_loss}, {'Testing loss', epoch, test_loss}) gnuplot.title('Training and testing loss') end return Driver ================================================ FILE: linearnet/model.lua ================================================ --[[ Model class for LinearNet, using SparseLinear Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local nn = require('nn') local torch = require('torch') local Model = class() -- Constructor for model -- config: configuration table -- .size: size of input index -- .dimension: dimension of output -- .decay: weight decay. Optional. -- modules: share weights with the given modules. Optional. function Model:_init(config, modules) self.size = config.size self.dimension = config.dimension self.decay = config.decay or 0 if modules then self.linear = modules.linear:clone('weight', 'bias') else self.linear = nn.SparseLinear(self.size, self.dimension) end self.sequential = nn.Sequential() self.sequential:add(self.linear) self.sequential:add(nn.LogSoftMax()) end -- Forward propagation function Model:forward(input) return self.sequential:forward(input) end -- Backward propagation function Model:backward(input, grad_output) local grad_input = self.sequential:backward(input, grad_output) -- Apply weight decay to linear module if self.decay > 0 then self.linear_index = self.linear_index or torch.LongTensor(input:size(1)) self.linear_index:resize(input:size(1)):copy(input:select(2, 1)) self.linear_decay = self.linear_decay or self.linear.gradWeight:new() self.linear_decay:index(self.linear.weight, 2, self.linear_index) self.linear.gradWeight:indexAdd( 2, self.linear_index, self.linear_decay:mul(self.decay)) self.linear.gradBias:add(self.decay, self.linear.bias) end return grad_input end -- Update parameters function Model:updateParameters(rate) return self.linear:updateParameters(rate) end -- Zero grad parameters function Model:zeroGradParameters() return self.linear:zeroGradParameters() end -- Set the type function Model:type(tensortype) local tensortype = tensortype or self.linear.weight:type() if tensor_type ~= self.linear.weight:type() then self.linear:type(tensortype) end return tensortype end -- Reset the weights function Model:reset(sigma) self.linear.weight:normal(0, sigma) self.linear.bias:zero() end -- Get the modules function Model:getModules() return {linear = self.linear} end -- Share given modules function Model:shareModules(modules) self.linear:share(modules.linear, 'weight', 'bias') end -- Save to file function Model:save(file) torch.save(file, self.linear) end -- Load from file function Model:load(file) local linear = torch.load(file) self.linear.weight:copy(linear.weight) self.linear.bias:copy(linear.bias) end return Model ================================================ FILE: linearnet/queue.lua ================================================ --[[ Multithreaded queue based on tds Copyright 2015 Xiang Zhang --]] local class = require('pl.class') local ffi = require('ffi') local serialize = require('threads.sharedserialize') local tds = require('tds') local threads = require('threads') local torch = require('torch') -- Append an underscore to distinguish between metatable and class name local Queue_ = torch.class('Queue') -- Constructor -- n: buffer size function Queue_:__init(size) self.data = tds.hash() self.pointer = torch.LongTensor(3):fill(1) self.pointer[3] = 0 self.size = size or 10 self.mutex = threads.Mutex() self.added_condition = threads.Condition() self.removed_condition = threads.Condition() end function Queue_:push(item) local storage = serialize.save(item) self.mutex:lock() while self.pointer[3] == self.size do self.removed_condition:wait(self.mutex) end self.data[self.pointer[1]] = storage:string() self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1 self.pointer[3] = self.pointer[3] + 1 self.mutex:unlock() self.added_condition:signal() end function Queue_:pop() self.mutex:lock() while self.pointer[3] == 0 do self.added_condition:wait(self.mutex) end local storage = torch.CharStorage():string(self.data[self.pointer[2]]) self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1 self.pointer[3] = self.pointer[3] - 1 self.mutex:unlock() self.removed_condition:signal() local item = serialize.load(storage) return item end function Queue_:push_async(item) if self.pointer[3] == self.size then return end local storage = serialize.save(item) self.mutex:lock() if self.pointer[3] == self.size then self.mutex:unlock() return end self.data[self.pointer[1]] = storage:string() self.pointer[1] = math.fmod(self.pointer[1], self.size) + 1 self.pointer[3] = self.pointer[3] + 1 self.mutex:unlock() self.added_condition:signal() return item end function Queue_:pop_async() if self.pointer[3] == 0 then return end self.mutex:lock() if self.pointer[3] == 0 then self.mutex:unlock() return end local storage = torch.CharStorage():string(self.data[self.pointer[2]]) self.pointer[2] = math.fmod(self.pointer[2], self.size) + 1 self.pointer[3] = self.pointer[3] - 1 self.mutex:unlock() self.removed_condition:signal() local item = serialize.load(storage) return item end function Queue_:free() self.mutex:free() self.added_condition:free() self.removed_condition:free() end function Queue_:__write(f) local data = self.data f:writeLong(torch.pointer(data)) tds.C.tds_hash_retain(data) local pointer = self.pointer f:writeLong(torch.pointer(pointer)) pointer:retain() f:writeObject(self.size) f:writeObject(self.mutex:id()) f:writeObject(self.added_condition:id()) f:writeObject(self.removed_condition:id()) end function Queue_:__read(f) local data = f:readLong() data = ffi.cast('tds_hash&', data) ffi.gc(data, tds.C.tds_hash_free) self.data = data local pointer = f:readLong() pointer = torch.pushudata(pointer, 'torch.LongTensor') self.pointer = pointer self.size = f:readObject() self.mutex = threads.Mutex(f:readObject()) self.added_condition = threads.Condition(f:readObject()) self.removed_condition = threads.Condition(f:readObject()) end -- Return class name, not the underscored metatable return Queue ================================================ FILE: linearnet/test.lua ================================================ --[[ Tester for LinearNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local torch = require('torch') local Test = class() -- Constructor -- data: the data instance -- model: the model instance -- loss: the loss instance -- config: configuration table function Test:_init(data, model, loss, config) self.data = data self.model = model self.loss = loss self.type = model:type() end -- Run the tester -- callback: a function to execute after each step function Test:run(callback) self.total_objective = 0 self.total_error = 1 self.step = 0 for sample, label in self.data:iterator() do self:runStep(sample, label) self.step = self.step + 1 if callback then callback(self, self.step) end end end -- Run for one step function Test:runStep(sample, label) -- Get sample self.sample, self.label = sample, label -- Forward propagation self.output = self.model:forward(self.sample) self.objective = self.loss:forward(self.output, self.label) -- Compute decision self.max, self.decision = self.output:max(1) self.error = (self.decision[1] == self.label[1]) and 0 or 1 -- Accumulate errors self.total_objective = (self.total_objective * self.step + self.objective) / (self.step + 1) self.total_error = (self.total_error * self.step + self.error) / (self.step + 1) end return Test ================================================ FILE: linearnet/train.lua ================================================ --[[ Training class for LinearNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local math = require('math') local nn = require('nn') local Train = class() -- Constructor -- data: the data instance -- model: the model instance -- loss: the loss instance -- config: the configuration table -- .rate: learning rate -- .step: current finished steps. Starting from 0 function Train:_init(data, model, loss, config) self.data = data self.model = model self.loss = loss local config = config or {} self.rate = config.rate or 1e-3 self.step = config.step or 0 self.type = model:type() end -- Run for a number of steps -- steps: number of steps to run -- callback: a function to execute after each step function Train:run(steps, callback) for i = 1, steps do self:runStep() self.step = self.step + 1 if callback then callback(self, i) end end end -- Run for one step function Train:runStep() -- Get sample self.sample, self.label = self.data:getSample(self.sample, self.label) -- Forward propagation self.output = self.model:forward(self.sample) self.objective = self.loss:forward(self.output, self.label) -- Backward propagation self.grad_output = self.loss:backward(self.output, self.label) self.grad_input = self.model:backward(self.sample, self.grad_output) -- Update parameters self.model:updateParameters(self.rate) self.model:zeroGradParameters() end return Train ================================================ FILE: linearnet/unittest/data.lua ================================================ --[[ Unit test for LinearNet data program Copyright 2016 Xiang Zhang --]] local Data = require('data') local math = require('math') local string = require('string') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.file = 'data/dianping/unittest_charbag.t7b' self.config = config print('Loading data from '..config.train_data.file) self.data = Data(config.train_data) end function joe:getSampleTest() local data = self.data print('Getting 10 samples') for i = 1, 10 do local sample, label = data:getSample(sample, label) io.write(label[1], ' ', sample:size(1)) for j = 1, sample:size(1) do io.write(' ', sample[j][1], ':', string.format('%.2g', sample[j][2])) end io.write('\n') io.flush() end end function joe:iteratorTest() local data = self.data print('Iterating through data') local count = 0 for sample, label in data:iterator() do io.write(label[1], ' ', sample:size(1)) count = count + 1 if math.fmod(count, 16) == 0 then io.write('\n') io.flush() else io.write(', ') end end if math.fmod(count, 16) ~= 0 then io.write('\n') io.flush() end end joe.main() return joe ================================================ FILE: linearnet/unittest/driver.lua ================================================ --[[ Unit test for driver Copyright 2016 Xiang Zhang --]] local Driver = require('driver') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Creating driver') config.train_data.file = 'data/dianping/unittest_charbag.t7b' config.test_data.file = 'data/dianping/unittest_charbag.t7b' config.driver.steps = 10000 config.driver.epoches = 30 config.driver.interval = 1 config.driver.location = '/tmp' config.driver.debug = true local driver = Driver(config, config.driver) self.config = config self.driver = driver end function joe:driverTest() local driver = self.driver print('Testing driver') driver:run() end joe.main() return joe ================================================ FILE: linearnet/unittest/model.lua ================================================ --[[ Unit test for LinearNet model program Copyright 2016 Xiang Zhang --]] local Model = require('model') local math = require('math') local string = require('string') local sys = require('sys') local Data = require('data') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.file = 'data/dianping/unittest_charbag.t7b' print('Loading data from '..config.train_data.file) self.data = Data(config.train_data) print('Loading the model') self.model = Model(config.model) print(self.model.linear) print('Resetting model') self.model:reset(1e-3) print(self.model.linear.weight:std()) end function joe:propagationTest() local data = self.data local model = self.model local weight = self.model.linear.weight local bias = self.model.linear.bias print('Testing forward and backward propagation for 10 times') for i = 1, 10 do print('Zero gradient of parameters') sys.tic() model:zeroGradParameters() sys.toc(true) local sample, label = data:getSample() print(tostring(i)..', sample '..sample:size(1)..', label '..label[1]) print('Forward propagating') sys.tic() local output = model:forward(sample) sys.toc(true) print('output '..output:dim()..', '..output:size(1)) print('Backward propagating') local grad_output = torch.rand(output:size()) sys.tic() local grad_input = model:backward(sample, grad_output) sys.toc(true) print('grad_input '..tostring(grad_input)) print('Update parameters') sys.tic() model:updateParameters(1e-3) sys.toc(true) print('weight mean '..weight:mean()..', std '..weight:std().. ', bias mean '..bias:mean()..', std '..bias:std()) end end function joe:shareModuleTest() local model = self.model local linear = model.linear:clone() print(torch.pointer(model.linear.weight:storage()), torch.pointer(linear.weight:storage()), torch.pointer(model.linear.bias:storage()), torch.pointer(linear.bias:storage())) model:shareModules({linear = linear}) print(torch.pointer(model.linear.weight:storage()), torch.pointer(linear.weight:storage()), torch.pointer(model.linear.bias:storage()), torch.pointer(linear.bias:storage())) end function joe:saveTest() local model = self.model local weight, bias = model.linear.weight, model.linear.bias print('weight mean '..weight:mean()..', std '..weight:std().. ', bias mean '..bias:mean()..', std '..bias:std()) print('Saving model to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Resetting model with sigma 1e-2') model:reset(1e-2) print('weight mean '..weight:mean()..', std '..weight:std().. ', bias mean '..bias:mean()..', std '..bias:std()) print('Loading model from /tmp/model.t7b') model:load('/tmp/model.t7b') print('weight mean '..weight:mean()..', std '..weight:std().. ', bias mean '..bias:mean()..', std '..bias:std()) end joe.main() return joe ================================================ FILE: linearnet/unittest/test.lua ================================================ --[[ Unit test for LinearNet tester Copyright 2016 Xiang Zhang --]] local Test = require('test') local math = require('math') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.file = 'data/dianping/unittest_charbag.t7b' print('Loading data from '..config.train_data.file) self.data = Data(config.train_data) print('Loading the model') self.model = Model(config.model) print(self.model.linear) print('Resetting model') self.model:reset(1e-2) print('Loading the loss') self.loss = nn[config.driver.loss:sub(4)]() print(self.loss) print('Loading the tester') self.test = Test(self.data, self.model, self.loss) end function joe:runTest() local callback = function(test, step) print('stp = '..step.. ', lss = '..test.total_objective.. ', err = '..test.total_error.. ', obj = '..test.objective.. ', lbl = '..test.label[1].. ', dcs = '..test.decision[1]) end print('Starting test') self.test:run(callback) end joe.main() return joe ================================================ FILE: linearnet/unittest/train.lua ================================================ --[[ Unit test for LinearNet trainer Copyright 2016 Xiang Zhang --]] local Train = require('train') local math = require('math') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.file = 'data/dianping/unittest_charbag.t7b' print('Loading data from '..config.train_data.file) self.data = Data(config.train_data) print('Loading the model') self.model = Model(config.model) print(self.model.linear) print('Resetting model') self.model:reset(1e-2) print('Loading the loss') self.loss = nn[config.driver.loss:sub(4)]() print(self.loss) print('Loading the trainer') self.train = Train(self.data, self.model, self.loss) end function joe:runTest() local callback = function(train, step) local model = train.model if math.fmod(step, 1000) == 0 then local max, decision = train.output:max(1) print('stp = '..step.. ', lbl = '..train.label[1].. ', dcs = '..decision[1].. ', obj = '..train.objective.. ', wmn = '..model.linear.weight:mean().. ', wsd = '..model.linear.weight:std().. ', bmn = '..model.linear.bias:mean().. ', bsd = '..model.linear.bias:std()) end end local steps = 1000000 local train = self.train print('Training for '..steps..' steps') train:run(steps, callback) end joe.main() return joe ================================================ FILE: models/README.txt ================================================ This directory should contain trained models and checkpoints. ================================================ FILE: models/embednet/README.txt ================================================ This directory should contain trained models and checkpoints for embednet. ================================================ FILE: models/fasttext/README.txt ================================================ This directory should contain trained models and checkpoints for fasttext. ================================================ FILE: models/glyphnet/README.txt ================================================ This directory should contain trained models and checkpoints for glyphnet. ================================================ FILE: models/linearnet/README.txt ================================================ This directory should contain trained models and checkpoints for linearnet. ================================================ FILE: models/onehotnet/README.txt ================================================ This directory should contain trained models and checkpoints for onehotnet. ================================================ FILE: onehotnet/archive/11stbinary_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/11st/sentiment/binary_train.t7b -test_data_file data/11st/sentiment/binary_test.t7b -driver_location models/11stbinary/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/11stbinary_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/11st/sentiment/binary_train_rr.t7b -test_data_file data/11st/sentiment/binary_test_rr.t7b -driver_location models/11stbinary/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/11stbinary_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/binary_train.t7b -test_data_file data/11st/sentiment/binary_test.t7b -driver_location models/11stbinary/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/11stbinary_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/binary_train_rr.t7b -test_data_file data/11st/sentiment/binary_test_rr.t7b -driver_location models/11stbinary/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/11stfull_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/11st/sentiment/full_train.t7b -test_data_file data/11st/sentiment/full_test.t7b -driver_location models/11stfull/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/11stfull_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/11st/sentiment/full_train_rr.t7b -test_data_file data/11st/sentiment/full_test_rr.t7b -driver_location models/11stfull/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/11stfull_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/full_train.t7b -test_data_file data/11st/sentiment/full_test.t7b -driver_location models/11stfull/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/11stfull_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/11st/sentiment/full_train_rr.t7b -test_data_file data/11st/sentiment/full_test_rr.t7b -driver_location models/11stfull/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/amazonbinary_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/amazon/binary_train.t7b -test_data_file data/amazon/binary_test.t7b -driver_location models/amazonbinary/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/amazonbinary_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/amazon/binary_train.t7b -test_data_file data/amazon/binary_test.t7b -driver_location models/amazonbinary/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/amazonfull_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/amazon/full_train.t7b -test_data_file data/amazon/full_test.t7b -driver_location models/amazonfull/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/amazonfull_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/amazon/full_train.t7b -test_data_file data/amazon/full_test.t7b -driver_location models/amazonfull/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/chinanews_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/chinanews/topic/train.t7b -test_data_file data/chinanews/topic/test.t7b -driver_location models/chinanews/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/chinanews_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/chinanews/topic/train_pinyin.t7b -test_data_file data/chinanews/topic/test_pinyin.t7b -driver_location models/chinanews/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/chinanews_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/chinanews/topic/train.t7b -test_data_file data/chinanews/topic/test.t7b -driver_location models/chinanews/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/chinanews_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/chinanews/topic/train_pinyin.t7b -test_data_file data/chinanews/topic/test_pinyin.t7b -driver_location models/chinanews/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/dianping_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua "$@"; ================================================ FILE: onehotnet/archive/dianping_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_location models/dianping/onehot4temporal12length2048feature256roman -train_data_file data/dianping/train_pinyin_string.t7b -test_data_file data/dianping/test_pinyin_string.t7b "$@"; ================================================ FILE: onehotnet/archive/dianping_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/dianping_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -driver_location models/dianping/onehot4temporal8length1944feature256roman -train_data_file data/dianping/train_pinyin_string.t7b -test_data_file data/dianping/test_pinyin_string.t7b "$@"; ================================================ FILE: onehotnet/archive/ifeng_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/ifeng/topic/train.t7b -test_data_file data/ifeng/topic/test.t7b -driver_location models/ifeng/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/ifeng_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/ifeng/topic/train_pinyin.t7b -test_data_file data/ifeng/topic/test_pinyin.t7b -driver_location models/ifeng/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/ifeng_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/ifeng/topic/train.t7b -test_data_file data/ifeng/topic/test.t7b -driver_location models/ifeng/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/ifeng_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/ifeng/topic/train_pinyin.t7b -test_data_file data/ifeng/topic/test_pinyin.t7b -driver_location models/ifeng/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/jdbinary_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/binary_train.t7b -test_data_file data/jd/sentiment/binary_test.t7b -driver_location models/jdbinary/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/jdbinary_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/binary_train_pinyin.t7b -test_data_file data/jd/sentiment/binary_test_pinyin.t7b -driver_location models/jdbinary/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/jdbinary_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/binary_train.t7b -test_data_file data/jd/sentiment/binary_test.t7b -driver_location models/jdbinary/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/jdbinary_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/binary_train_pinyin.t7b -test_data_file data/jd/sentiment/binary_test_pinyin.t7b -driver_location models/jdbinary/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/jdfull_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/full_train.t7b -test_data_file data/jd/sentiment/full_test.t7b -driver_location models/jdfull/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/jdfull_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/jd/sentiment/full_train_pinyin.t7b -test_data_file data/jd/sentiment/full_test_pinyin.t7b -driver_location models/jdfull/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/jdfull_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/full_train.t7b -test_data_file data/jd/sentiment/full_test.t7b -driver_location models/jdfull/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/jdfull_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/jd/sentiment/full_train_pinyin.t7b -test_data_file data/jd/sentiment/full_test_pinyin.t7b -driver_location models/jdfull/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/jointbinary_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/binary_train.t7b -test_data_file data/joint/binary_test.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/jointbinary_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/binary_train_roman.t7b -test_data_file data/joint/binary_test_roman.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/jointbinary_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/binary_train.t7b -test_data_file data/joint/binary_test.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/jointbinary_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/binary_train_roman.t7b -test_data_file data/joint/binary_test_roman.t7b -driver_steps 400000 -driver_location models/jointbinary/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/jointfull_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/full_train.t7b -test_data_file data/joint/full_test.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/jointfull_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/joint/full_train_roman.t7b -test_data_file data/joint/full_test_roman.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/jointfull_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/full_train.t7b -test_data_file data/joint/full_test.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/jointfull_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/joint/full_train_roman.t7b -test_data_file data/joint/full_test_roman.t7b -driver_steps 400000 -driver_location models/jointfull/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/nytimes_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/nytimes/topic/train.t7b -test_data_file data/nytimes/topic/test.t7b -driver_location models/nytimes/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/nytimes_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/nytimes/topic/train.t7b -test_data_file data/nytimes/topic/test.t7b -driver_location models/nytimes/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/rakutenbinary_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/rakuten/sentiment/binary_train.t7b -test_data_file data/rakuten/sentiment/binary_test.t7b -driver_location models/rakutenbinary/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/rakutenbinary_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/rakuten/sentiment/binary_train_hepburn.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn.t7b -driver_location models/rakutenbinary/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/rakutenbinary_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/binary_train.t7b -test_data_file data/rakuten/sentiment/binary_test.t7b -driver_location models/rakutenbinary/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/rakutenbinary_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/binary_train_hepburn.t7b -test_data_file data/rakuten/sentiment/binary_test_hepburn.t7b -driver_location models/rakutenbinary/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/archive/rakutenfull_onehot4temporal12length2048feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/rakuten/sentiment/full_train.t7b -test_data_file data/rakuten/sentiment/full_test.t7b -driver_location models/rakutenfull/onehot4temporal12length2048feature256 "$@"; ================================================ FILE: onehotnet/archive/rakutenfull_onehot4temporal12length2048feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -train_data_file data/rakuten/sentiment/full_train_hepburn.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn.t7b -driver_location models/rakutenfull/onehot4temporal12length2048feature256roman "$@"; ================================================ FILE: onehotnet/archive/rakutenfull_onehot4temporal8length1944feature256.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/full_train.t7b -test_data_file data/rakuten/sentiment/full_test.t7b -driver_location models/rakutenfull/onehot4temporal8length1944feature256 "$@"; ================================================ FILE: onehotnet/archive/rakutenfull_onehot4temporal8length1944feature256roman.sh ================================================ #!/bin/bash # Archived program command-line for experiment # Copyright 2016 Xiang Zhang # # Usage: bash {this_file} [additional_options] set -x; set -e; qlua main.lua -driver_variation small -train_data_file data/rakuten/sentiment/full_train_hepburn.t7b -test_data_file data/rakuten/sentiment/full_test_hepburn.t7b -driver_location models/rakutenfull/onehot4temporal8length1944feature256roman "$@"; ================================================ FILE: onehotnet/config.lua ================================================ --[[ Configuration for EmbedNet Copyright Xiang Zhang 2016 --]] -- Name space local config = {} -- Training data configurations config.train_data = {} config.train_data.file = 'data/dianping/train_string.t7b' config.train_data.batch = 16 config.train_data.size = 256 -- Testing data configurations config.test_data = {} config.test_data.file = 'data/dianping/test_string.t7b' config.test_data.batch = 16 config.test_data.size = 256 -- Model configurations config.model = {} config.model.cudnn = true -- Model variations configuration config.variation = {} -- Large model configuration local onehot = {} onehot[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} onehot[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[16] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[17] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[18] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[19] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[20] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[21] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[22] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[23] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[24] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[25] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} temporal[26] = {name = 'nn.Reshape', size = 4096, batchMode = true} temporal[27] = {name = 'nn.Linear', inputSize = 4096, outputSize = 1024} temporal[28] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[29] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[30] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[31] = {name = 'nn.LogSoftMax'} config.variation['large'] = {onehot = onehot, temporal = temporal, length = 2048} -- Small model configuration local onehot = {} onehot[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} onehot[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} onehot[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} onehot[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 2, dW = 2} local temporal = {} temporal[1] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[2] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[3] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[4] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[5] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[6] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[7] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[8] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[9] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[10] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[11] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[12] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[13] = {name = 'nn.TemporalConvolutionMM', inputFrameSize = 256, outputFrameSize = 256, kW = 3, dW = 1, padW = 1} temporal[14] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[15] = {name = 'nn.TemporalMaxPoolingMM', kW = 3, dW = 3} temporal[16] = {name = 'nn.Reshape', size = 4608, batchMode = true} temporal[17] = {name = 'nn.Linear', inputSize = 4608, outputSize = 1024} temporal[18] = {name = 'nn.Threshold', th = 1e-6, v = 0, ip = true} temporal[19] = {name = 'nn.Dropout', p = 0.5, v2 = true, inplace = true} temporal[20] = {name = 'nn.Linear', inputSize = 1024, outputSize = 2} temporal[21] = {name = 'nn.LogSoftMax'} config.variation['small'] = {onehot = onehot, temporal = temporal, length = 1944} -- Trainer settings config.train = {} config.train.momentum = 0.9 config.train.decay = 1e-5 -- These are just multipliers to config.driver.rate -- For every config.driver.schedule * config.driver.steps config.train.rates = {1/1, 1/2, 1/4, 1/8, 1/16, 1/32, 1/64, 1/128, 1/256, 1/512, 1/1024} -- Tester settings config.test = {} -- Visualizer settings config.visualizer = {} config.visualizer.width = 1200 config.visualizer.scale = 4 config.visualizer.height = 64 -- Driver configurations config.driver = {} config.driver.type = 'torch.CudaTensor' config.driver.device = 1 config.driver.loss = 'nn.ClassNLLCriterion' config.driver.variation = 'large' config.driver.steps = 100000 config.driver.epoches = 100 config.driver.schedule = 8 config.driver.rate = 1e-5 config.driver.interval = 5 config.driver.location = 'models/dianping/onehot4temporal12length2048feature256' config.driver.plot = true config.driver.visualize = true config.driver.debug = false config.driver.resume = false -- Main configuration config.joe = {} return config ================================================ FILE: onehotnet/data.lua ================================================ --[[ Data class for OnehotNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local torch = require('torch') local parent = require('glyphnet/data') local Data = class(parent) -- Constructor for Data -- config: configuration table -- .file: file for data -- .batch: batch of data -- .size: size of the quantization function Data:_init(config) local data = torch.load(config.file) self.data = {code = data.index, code_value = data.content} self.length = config.length or 2048 self.size = config.size or 256 self.batch = config.batch or 16 end function Data:initSample(sample, label) local sample = sample or torch.Tensor(self.batch, self.size, self.length) local label = label or torch.Tensor(self.batch) sample:zero() return sample, label end function Data:index(sample, class, item) local code, code_value = self.data.code, self.data.code_value local position = 1 for field = 1, code[class][item]:size(1) do -- Break if current position is larger than sample length if position > sample:size(2) then break end for char = 1, code[class][item][field][2] + 1 do -- Break if current position is larger than sample length if position > sample:size(2) then break end local char_index = code[class][item][field][1] + char - 1 sample[code_value[char_index] + 1][position] = 1 position = position + 1 end end return sample end return Data ================================================ FILE: onehotnet/driver.lua ================================================ --[[ Driver for OnehotNet training Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local parent = require('glyphnet/driver') local Driver = class(parent) -- Initialize variation function Driver:initVariation() print('Driver using model variation '..self.variation) self.options.model.onehot = self.options.variation[self.variation].onehot self.options.model.temporal = self.options.variation[self.variation].temporal print('Driver adjusting data length to '.. self.options.variation[self.variation].length) self.options.train_data.length = self.options.variation[self.variation].length self.options.test_data.length = self.options.variation[self.variation].length end -- Visualize the model function Driver:visualizeModel() local Visualizer = require('visualizer') self.options.visualizer.title = 'Onehot model' self.onehot_visualizer = self.onehot_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = 'Temporal model' self.temporal_visualizer = self.temporal_visualizer or Visualizer(self.options.visualizer) self.options.visualizer.title = nil self.onehot_visualizer:drawSequential(self.model.onehot) self.temporal_visualizer:drawSequential(self.model.temporal) end return Driver ================================================ FILE: onehotnet/model.lua ================================================ --[[ Model for OnehotNet Copyright 2016 Xiang Zhang --]] local class = require('pl.class') local nn = require('nn') local parent = require('glyphnet/model') local Model = class(parent) -- Model constructor -- config: configuration table -- .onehot: configuration table of the onehot model -- .temporal: configuration table of the temporal model -- .file: the model file to load -- .cudnn: whether to use NVidia CUDNN function Model:_init(config) -- Read or create model if config.file then local model = torch.load(config.file) self.onehot = self:makeCleanSequential(model.onehot) self.temporal = self:makeCleanSequential(model.temporal) else self.onehot = self:createCleanSequential(config.onehot) self:initSequential(self.onehot) self.temporal = self:createCleanSequential(config.temporal) self:initSequential(self.temporal) end -- Saving configurations self.cudnn = config.cudnn self.config = config self.tensortype = torch.getdefaulttensortype() end function Model:forward(input) self.feature = self.onehot:forward(input) self.output = self.temporal:forward(self.feature) return self.output end function Model:backward(input, grad_output) self.grad_feature = self.temporal:backward(self.feature, grad_output) self.grad_input = self.onehot:backward(input, self.grad_feature) return self.grad_input end function Model:getParameters() return nn.Module.getParameters(self) end function Model:parameters() local parameters, gradients = {}, {} if not self.pretrain then local onehot_parameters, onehot_gradients = self.onehot:parameters() for i = 1, #onehot_parameters do parameters[#parameters + 1] = onehot_parameters[i] gradients[#gradients + 1] = onehot_gradients[i] end end local temporal_parameters, temporal_gradients = self.temporal:parameters() for i = 1, #temporal_parameters do parameters[#parameters + 1] = temporal_parameters[i] gradients[#gradients + 1] = temporal_gradients[i] end return parameters, gradients end function Model:type(tensortype) if tensortype ~= nil and tensortype ~= self.tensortype then if tensortype == 'torch.CudaTensor' then require('cunn') self.onehot = self:makeCudaSequential(self.onehot) self.temporal = self:makeCudaSequential(self.temporal) else self.onehot = self:makeCleanSequential(self.onehot) self.temporal = self:makeCleanSequential(self.temporal) end self.onehot:type(tensortype) self.temporal:type(tensortype) self.tensortype = tensortype end return self.tensortype end function Model:setMode(mode) self:setModeSequential(self.onehot, mode) self:setModeSequential(self.temporal, mode) end function Model:save(file) local onehot = self:clearSequential( self:makeCleanSequential(self.onehot)) local temporal = self:clearSequential( self:makeCleanSequential(self.temporal)) torch.save(file, {onehot = onehot, temporal = temporal}) end return Model ================================================ FILE: onehotnet/unittest/data.lua ================================================ --[[ Unit test for OnehotNet data component Copyright 2016 Xiang Zhang --]] local Data = require('data') local image = require('image') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.train_data.length = 2048 config.test_data.length = 2048 print('Creating testing data object') local data = Data(config.test_data) self.config = config self.data = data end function joe:getBatchTest() local data = self.data print('Getting a batch') local sample, label = data:getBatch() local win = image.display{image = sample[1]:narrow(2, 1, 512), zoom = 3} print('Getting a second batch') sample, label = data:getBatch(sample, label) win = image.display{ win = win, image = sample[1]:narrow(2, 1, 512), zoom = 3} end function joe:iteratorTest() local data = self.data local win for sample, label, count in data:iterator() do win = image.display{ win = win, image = sample[1]:narrow(2, 1, 512), zoom = 3} io.write(count, ':') for i = 1, count do io.write(' ', label[i]) end io.write('\n') io.flush() end end joe.main() return joe ================================================ FILE: onehotnet/unittest/driver.lua ================================================ --[[ Unit test for OnehotNet driver component Copyright 2016 Xiang Zhang --]] local Driver = require('driver') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Creating driver') config.train_data.file = 'data/dianping/unittest_string.t7b' config.test_data.file = 'data/dianping/unittest_string.t7b' config.driver.debug = true config.driver.device = 3 config.driver.steps = 10 config.driver.epoches = 5 local driver = Driver(config, config.driver) self.config = config self.driver = driver end function joe:driverTest() local driver = self.driver print('Testing driver') driver:run() end joe.main() return joe ================================================ FILE: onehotnet/unittest/model.lua ================================================ --[[ Unit Test for OnehotNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Onehot model:') print(model.onehot) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(2, 256, 2048) print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()) print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Onehot model') print(loaded.onehot) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: onehotnet/unittest/model_cuda.lua ================================================ --[[ Unit Test for OnehotNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal config.model.cudnn = false local model = Model(config.model) model:cuda() print('Onehot model:') print(model.onehot) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(2, 256, 2048):cuda() print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()):cuda() print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Onehot model') print(loaded.onehot) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: onehotnet/unittest/model_cudnn.lua ================================================ --[[ Unit Test for OnehotNet model Copyright 2016 Xiang Zhang --]] local Model = require('model') local cutorch = require('cutorch') local sys = require('sys') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal config.model.cudnn = true local model = Model(config.model) model:cuda() print('Onehot model:') print(model.onehot) print('Temporal model:') print(model.temporal) self.config = config self.model = model end function joe:modelTest() local model = self.model local params, grads = model:getParameters() grads:zero() print('Number of elements in parameters and gradients: '.. params:nElement()..', '..grads:nElement()) print('Creating input') local input = torch.rand(2, 256, 2048):cuda() print(input:size()) print('Forward propagating') sys.tic() local output = model:forward(input) sys.toc(true) print(output:size()) print('Creating output gradients') local grad_output = torch.rand(output:size()):cuda() print(grad_output:size()) print('Backward propagating') sys.tic() local grad_input = model:backward(input, grad_output) sys.toc(true) print(grad_input:size()) end function joe:modeTest() local model = self.model print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to train') model:setModeTrain() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end print('Setting model to test') model:setModeTest() for i, m in ipairs(model.temporal.modules) do if torch.type(m) == 'nn.Dropout' then print(i, torch.type(m), m.train) end end end function joe:saveTest() local model = self.model print('Saving to /tmp/model.t7b') model:save('/tmp/model.t7b') print('Loading from /tmp/model.t7b') local config = self.config config.model.file = '/tmp/model.t7b' local loaded = Model(config.model) print('Onehot model') print(loaded.onehot) print('Temporal model') print(loaded.temporal) config.model.file = nil end joe.main() return joe ================================================ FILE: onehotnet/unittest/test.lua ================================================ --[[ Unit test for OnehotNet test component Copyright 2015-2016 Xiang Zhang --]] local Test = require('test') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: onehotnet/unittest/test_cuda.lua ================================================ --[[ Unit test for OnehotNet test component Copyright 2016 Xiang Zhang --]] local Test = require('test') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create tester') local test = Test(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.test = test self.config = config end function joe:testTest() local test = self.test local callback = self:callback() print('Running tests') test:run(callback) end function joe:callback() return function (test, i) print('cnt: '..test.total_count..', err: '..test.total_error.. ', lss: '..test.total_objective..', obj: '..test.objective.. ', crr: '..test.error..', dat: '..test.time.data.. ', fwd: '..test.time.forward..', upd: '..test.time.update) end end joe.main() return joe ================================================ FILE: onehotnet/unittest/train.lua ================================================ --[[ Unit test for OnehotNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') config.test_data.batch = 2 print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal local model = Model(config.model) print('Create loss') local loss = nn[config.driver.loss:sub(4)]() print('Create trainer') for i, v in pairs(config.train.rates) do config.train.rates[i] = v * config.driver.rate end local train = Train(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 10 steps') train:run(100, callback) end function joe:callback() self.time = os.time() return function (train, i) if os.difftime(os.time(), self.time) >= 5 then print('stp: '..train.step..', rat: '..train.rate.. ', err: '..train.error..', obj: '..train.objective.. ', dat: '..train.time.data..', fwd: '..train.time.forward.. ', bwd: '..train.time.backward..', upd: '..train.time.update) self.time = os.time() end end end joe.main() return joe ================================================ FILE: onehotnet/unittest/train_cuda.lua ================================================ --[[ Unit test for OnehotNet train component Copyright 2015-2016 Xiang Zhang --]] local Train = require('train') local cutorch = require('cutorch') local nn = require('nn') local os = require('os') local Data = require('data') local Model = require('model') -- A Logic Named Joe local joe = {} function joe.main() if joe.init then print('Initializing testing environment') joe:init() end for name, func in pairs(joe) do if type(name) == 'string' and type(func) == 'function' and name:match('[%g]+Test') then print('\nExecuting '..name) func(joe) end end end function joe:init() local config = dofile('config.lua') print('Setting device to '..config.driver.device) cutorch.setDevice(config.driver.device) print('Creating data') config.test_data.length = config.variation['large'].length local data = Data(config.test_data) print('Create model') config.model.onehot = config.variation['large'].onehot config.model.temporal = config.variation['large'].temporal local model = Model(config.model) model:cuda() print('Create loss') local loss = nn[config.driver.loss:sub(4)]() loss:cuda() print('Create trainer') for i, v in pairs(config.train.rates) do config.train.rates[i] = v * config.driver.rate end local train = Train(data, model, loss, config.train) self.data = data self.model = model self.loss = loss self.train = train self.config = config end function joe:trainTest() local train = self.train local callback = self:callback() print('Running for 100000 steps') train:run(100000, callback) end function joe:callback() self.time = os.time() return function (train, i) if os.difftime(os.time(), self.time) >= 5 then print('stp: '..train.step..', rat: '..train.rate.. ', err: '..train.error..', obj: '..train.objective.. ', dat: '..train.time.data..', fwd: '..train.time.forward.. ', bwd: '..train.time.backward..', upd: '..train.time.update) self.time = os.time() end end end joe.main() return joe ================================================ FILE: unifont/createunifont.lua ================================================ --[[ Create unifont database from png file Copyright 2015 Xiang Zhang Usage: qlua createunifont.lua [input] [output] --]] local image = require('image') local io = require('io') local math = require("math") local torch = require("torch") -- A Logic Named Joe local joe = {} function joe.main() local input = arg[1] or 'unifont/unifont-8.0.01.png' local output = arg[2] or 'unifont/unifont-8.0.01.t7b' local row = arg[3] and tonumber(arg[3]) or 256 local startx = arg[4] and tonumber(arg[4]) or 33 local starty = arg[5] and tonumber(arg[5]) or 65 local width = arg[6] and tonumber(arg[6]) or 16 local height = arg[7] and tonumber(arg[7]) or width local num = arg[8] and tonumber(arg[8]) or 65536 print('Loading data from '..input) local im = image.load(input) im = im[1]:double():mul(-1):add(1) local data = torch.Tensor(num, height, width) for i = 1, num do local x = startx + math.fmod(i - 1, row) * width local y = starty + math.floor((i - 1)/row) * height data[i]:copy(im[{{y, y + height - 1},{x, x + width - 1}}]) if math.fmod(i, 1000) == 0 then io.write('\rProcessing character: '..i..'/'..num) joe.win = image.display({image = data[i], win = joe.win, zoom = 8}) end end joe.win = image.display({image = data[num], win = joe.win, zoom = 8}) print('\rProcessed characters: '..num..'/'..num) print('Saving to '..output) torch.save(output, data) end joe.main() ================================================ FILE: unifont/unifont/README.txt ================================================ This directory contains GNU Unifont data ================================================ FILE: unifont/visualize.lua ================================================ --[[ Visualizing argument string using GNU Unifont Copyright 2015 Xiang Zhang --]] local bit32 = require('bit32') local image = require('image') local torch = require('torch') local joe = {} function joe.main() local input = arg[1] local unifont = arg[2] or 'unifont/unifont-8.0.01.t7b' print('Loading unifont from '..unifont) local data = torch.load(unifont) local sequence = joe.utf8to32(input) local im = torch.Tensor(data:size(2), data:size(3) * #sequence) for i, c in ipairs(sequence) do im:narrow(2, 1 + (i-1)*data:size(3), data:size(3)):copy(data[c + 1]) end print('Visualizing') image.display({image = im, zoom = 4}) end -- Ref: http://lua-users.org/wiki/LuaUnicode function joe.utf8to32(utf8str) assert(type(utf8str) == "string") local res, seq, val = {}, 0, nil for i = 1, #utf8str do local c = string.byte(utf8str, i) if seq == 0 then table.insert(res, val) seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or error("invalid UTF-8 character sequence") val = bit32.band(c, 2^(8-seq) - 1) else val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F)) end seq = seq - 1 end table.insert(res, val) table.insert(res, 0) return res end joe.main()