Copy disabled (too large)
Download .txt
Showing preview only (61,621K chars total). Download the full file to get everything.
Repository: yilifzf/BDCI_Car_2018
Branch: master
Commit: cc1b0193213e
Files: 261
Total size: 206.2 MB
Directory structure:
gitextract_z313sz7r/
├── ReadMe.md
├── attribute_level/
│ ├── attribute.py
│ ├── best_test.sh
│ ├── cp_AttA3_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_Bert/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ └── networks2.py
├── bert/
│ ├── CONTRIBUTING.md
│ ├── MSRP/
│ │ ├── LICENSE
│ │ ├── MSR Paraphrase Corpus.lnk
│ │ ├── Microsoft Shared Source License.htm
│ │ ├── Microsoft Shared Source License.rtf
│ │ ├── msr_paraphrase_README.htm
│ │ ├── msr_paraphrase_README.rtf
│ │ ├── msr_paraphrase_data.txt
│ │ ├── msr_paraphrase_test.txt
│ │ └── msr_paraphrase_train.txt
│ ├── README.md
│ ├── __init__.py
│ ├── convert_tf_checkpoint_to_pytorch.py
│ ├── convert_tf_checkpoint_to_pytorch_raw.py
│ ├── download_MSRP.py
│ ├── extract_features.py
│ ├── glue_data/
│ │ ├── aspect_ensemble_online/
│ │ │ ├── 1/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 2/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 3/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 4/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 5/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── generate_npy.py
│ │ ├── generate_npy_for_polarity.py
│ │ ├── polarity_ensemble_online/
│ │ │ ├── 1/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 2/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 3/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 4/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 5/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ └── test.tsv
│ │ └── test.tsv
│ ├── modeling.py
│ ├── notebooks/
│ │ ├── Comparing TF and PT models SQuAD predictions.ipynb
│ │ └── Comparing TF and PT models.ipynb
│ ├── optimization.py
│ ├── requirements.txt
│ ├── run_classifier.py
│ ├── run_classifier_2.py
│ ├── run_classifier_ensemble.py
│ ├── run_classifier_ensemble_polarity.py
│ ├── run_squad.py
│ ├── samples/
│ │ ├── input.txt
│ │ └── sample_text.txt
│ ├── tests/
│ │ ├── modeling_test.py
│ │ ├── optimization_test.py
│ │ └── tokenization_test.py
│ └── tokenization.py
├── data/
│ ├── aspect_ensemble_online/
│ │ ├── 1/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 2/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 3/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 4/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ └── 5/
│ │ ├── dev.ind
│ │ ├── dev.tsv
│ │ ├── test.tsv
│ │ └── train.tsv
│ ├── backup/
│ │ ├── test_predict_aspect_ensemble_['Mon', 'Nov', '19', '20_30_28', '2018'].txt
│ │ ├── test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_48_57', '2018'].txt
│ │ ├── test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_49_23', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_25_00', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_26_03', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_30_35', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_42_02', '2018'].txt
│ │ └── test_predict_polarity_ensemble_['Tue', 'Nov', '13', '21_49_37', '2018'].txt
│ ├── build_test_for_predict.py
│ ├── generate_dev.py
│ ├── generate_dev_polarity.py
│ ├── generate_test.py
│ ├── polarity_ensemble_online/
│ │ ├── 1/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 2/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 3/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 4/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 5/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ └── test.tsv
│ ├── submit2.csv
│ ├── submit2.py
│ ├── submit2_stacking_all_bert.csv
│ ├── submit_example_2.csv
│ ├── test.txt
│ ├── test_predict_aspect_ensemble.txt
│ ├── test_predict_polarity_ensemble.txt
│ ├── test_public_2.csv
│ ├── train.txt
│ └── vocabulary.pkl
├── dataset/
│ ├── attribute.json
│ ├── clean_data.py
│ ├── clean_test.py
│ ├── polarity.json
│ ├── submit_example_2.csv
│ ├── test_public_2.csv
│ └── train_2.csv
├── embedding/
│ ├── embedding_all_fasttext2_300.txt
│ ├── embedding_all_merge_300.txt
│ └── embedding_all_tencent_200.txt
├── polarity_level_aspect/
│ ├── ab_polarity.py
│ ├── backup/
│ │ ├── cp_AT_LSTM_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_ft2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_tc/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_Bert/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_ft2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_tc/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_HEAT_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_HEAT_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ └── cp_HEAT_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── best_test.sh
│ ├── cp_AT_LSTM_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_Bert/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ └── networks.py
└── utils/
├── Data.py
├── data_helper.py
├── evaluate.py
├── prepare_w2v.py
├── prepare_w2v_with_UNK.py
├── train.py
├── train2.py
├── train_single.py
├── utils.py
└── vocabulary2.pkl
================================================
FILE CONTENTS
================================================
================================================
FILE: ReadMe.md
================================================
# 汽车行业用户观点主题及情感识别 (Just a test 团队决赛一等奖方案)
## 注意:
* 目前开源的代码按照下面的说明应该是可以跑通的,但是因为整个框架比较复杂所以可能有文档没有说清楚的地方,遇到问题可以给我们提issue,或者email
* 我们的实验表明,其实只用BERT就能达到一个非常好的结果,和全部模型比差距比较小,所以如果不是太关心完美复现,可以只跑bert的代码,这样会省去很多的时间。
* 我们的代码目前还没有进行优化,所以里面会有很多不完美的地方,比如我们很多网络没有用batch,请大家见谅。以后有时间的话,我们会考虑更新,优化一下代码结构。
* 如果有其他的问题也可以给我们反馈。
## 关于将tf-checkpoint转为pytorch_model.bin的问题。
* 由于huggingface的pytorch版的BERT已经更改了转换的代码以及load的方式,主要就是从最初版本的存储BertModel改成了BertForPreTraining,所以如果你用huggingface最新的脚本转换得到的pytorch_model.bin会和我们基于最初版本转换脚本的代码不兼容,因此提醒一下,请使用我们提供的脚本或者huggingface最早的转换脚本。
* 不过huggingface更改之后的脚本可能解决一些潜在的bug,所以后续计划中我们会将整个BERT模块和最新版本兼容。
## 代码运行环境:
* 基于Anaconda的python3 (最好是python3.5)
* pytorch 0.4.*
* skmulti-learn
* tqdm
* hanlp (分词需要,不过我们已经提供了预处理过的文件,可以不装)
## 方案概述:
* 我们采用pipeline的方式,将这个任务拆为两个子任务,先预测主题,根据主题预测情感极性(ABSA),这两个任务我们都使用深度学习的方式来解决
* 主题分类是一个多标签分类问题,我们使用BCE来解决多标签问题,我们使用不同的模型不同的词向量(2*4)训练了8个模型,再加上微调的中文BERT,一种九个模型,我们使用stacking的方式在第二层利用LR极性模型融合,得到预测概率,并使用threshold得到最终预测的标签。
* 基于角度的情感分类是一个有两个输入的多分类问题,我们使用了三种比较新的网络设计和四种词向量再加上微调的BERT一共13个模型,同样我们也用LR来做stacking。
## pretrained models:
* 我们将预训练好的模型放在了下面的链接中,可以直接拿来测试,免去长时间的训练。
* 链接: [BaiduYun](https://pan.baidu.com/s/1UDzqKeRIzc01chaj3Ew7AA) 提取码: 47e7
* 其中:
* backup_polarity.zip:保存了用来预测情感极性的三个模型四种embedding五折共60个checkpoint。请将其backup_polarity里面的各个文件夹放在polarity_level_aspect/目录下。
* backup_aspect.zip保存了用来预测主题的两个模型四种embedding五折共40个checkpoint。请将backup里面的各个文件夹放在attribute_level/目录下。
* backup_bert.zip保存了分别用来预测主题和情感极性五折共是十个checkpoint。请将其里面的两个个文件夹放在berrt/glue_data/目录下。并且要将polarity_ensemble_online_submit重命名为polarity_ensemble_online
* backup_chinese_bert.zip 保存了我们将谷歌开源的中文BERT转为pytorch版本的预训练模型,可以用来做fine tune。请将chinese_L-12_H-768_A-12文件夹放在bert/目录下。
* backup_embedding.zip 保存了我们使用的embedding, 主要是一个由elmo得到的句子表示。请将backup_embedding下的词向量放在embedding/目录下。
## 代码框架:
* dataset/: 存放原始的数据集,以及预处理脚本
* data/: 存放预处理过的数据集,最终主题的预测以及情感的预测也会存储在这里。
* train.txt: 预处理之后的训练集
* test.txt: 预处理之后的测试集
* vocabulary.pkl:词表
* test_predict_aspect_ensemble.txt: 预测主题的结果文件
* test_predict_polarity_ensemble.txt: 预测情感极性的结果文件
* submit2.py:生成最终的提交结果
* submit2.csv: 最终的提交结果。
* embedding/: 存储我们处理过的词向量文件以及elmo
* embedding_all_merge_300.txt, 来自于[Chinese-Word-Vectors](https://github.com/Embedding/Chinese-Word-Vectors)的mixed-large的Word feature.
* embedding_all_fasttext2_300.txt, 来自于[fasttext](https://fasttext.cc/docs/en/crawl-vectors.html)
* embedding_all_tencent_200.txt, 来自于[Tencent AI Lab Embedding Corpus](https://ai.tencent.com/ailab/nlp/embedding.html)
* embeddings_elmo_ly-1.hdf5, 使用中文elmo得到的句子表示,elmo来自于[ELMoForManyLangs](https://github.com/HIT-SCIR/ELMoForManyLangs) 。因为太大,所以我们没有放在代码这里,你可以在百度云链接中的backup_embedding.zip中找到它
* attribute_level/:运行主题分类的模块, 里面有:
* attribute_level.py: 主要运行文件,主要接受以下命令行参数:
* --mode: 运行模式,
* 0: 代表leave out 训练,
* 1: 代表五折交叉训练, 用于后面的stacking
* 2: stacking, 利用五折交叉训练好的模型进行预测并stacking。
* --model: 训练使用的模型:
* CNN
* AttA3: 一种使用label attention的RNN模型
* --w2v: 指定使用的词向量:
* merge: embedding_all_merge_300.txt
* fasttext2: fasttext词向量
* tencent: 腾讯词向量
* --use_elmo: 是否使用elmo
* 0 : 不使用elmo
* 2 : 只使用elmo,读取embedding/中的elmo的hdf5文件,最终表示和词向量无关。
* --EPOCHS: 训练轮数
* --saved: stacking测试时是从头测试,还是直接读取存储好的预测结果
* 0 : 读取checkpoint对dev集和测试集进行预测
* 1 :直接读取存储好的dev集和测试集预测结果
* --check_dir:训练时指定checkpoint的存储位置
* --test_dir: 指定测试时读取checkpoint或者预测结果的文件夹位置, 因为做stacking同时读取多个模型,所以可以用指定多个文件夹,用‘#’做分隔
* networks2.py: 我们实现的模型网络代码文件
* 保存各个模型的checkpoint的文件夹:命名格式为cp_ModelName_w2vName,
* w2vName中,0代表merge 词向量, 2 代表使用了elmo(没有用词向量),ft2 代表fasttext词向量, tc代表腾讯词向量。
* polarity_level_aspect: 给定主题的情感分类模块:
* ab_polarity.py :主要运行文件, 命令行参数类似于attribute_level.py
* networks.py :模型实现文件
* utils: 一些代码工具,比如封装数据集类,训练骨架,评测函数等。
## One Step:
* 因为训练模型比较久而且模型比较大,所以我们提供了所有checkpoint对OOF和测试集的预测结果,只需要简单的做一下stacking就可以得到我们提交的最好结果:
```
cd attribute_level
python attribute.py --mode 2 --test_dir cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_AttA3_0#cp_AttA3_ft2#cp_AttA3_2#cp_AttA3_tc#cp_Bert --saved 1
cd ../polarity_level_aspect
python ab_polarity.py --mode 2 --test_dir cp_HEAT_0#cp_AT_LSTM_0#cp_HEAT_ft2#cp_AT_LSTM_ft2#cp_HEAT_2#cp_AT_LSTM_2#cp_HEAT_tc#cp_AT_LSTM_tc#cp_GCAE_0#cp_GCAE_2#cp_GCAE_ft2#cp_GCAE_tc#cp_Bert --saved 1
cd ../data
python submit2.py
```
最后生成的submit2.csv即可用于提交。
* 当然如果想要从头复现,可以看下面的说明:
## 预处理模块:
* 主要就是分词,分别运行clean_data.py, 和clean_test.py文件在data文件夹中生成预处理好的train.txt和test.txt
* 不过我们已经提供了预处理好的文件,所以不需要运行。
* 需要注意的是,如果你重新运行了分词程序,那么你生成的数据集的词表可能和我们提供的词向量的词表不一致,所以你必须重新运行prepare_w2v.py里面的prepare_w2v函数构建新的词表和词向量。
## 运行主题分类模块:
1. 训练阶段:(由于训练时间比较长,你可以直接跳到第三步加载我们预训练好的模型)
首先进入attribute_level文件夹:
```
cd attribute_level
```
以五折交叉训练基于fasttext词向量的CNN模型为例:只需运行:
```
python attribute.py --mode 1 --model CNN --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_CNN_ft2
```
这样就会在cp_CNN_ft2文件夹中生成五个checkpoint,名称为如下格式:checkpoint_Model_score_fold.pt
类似地所有模型和embedding执行命令如下:
```
# CNN + merge:
python attribute.py --mode 1 --model CNN --use_elmo 0 --w2v merge --EPOCHS 5 --check_dir cp_CNN_0
# CNN + fasttext:
python attribute.py --mode 1 --model CNN --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_CNN_ft2
# CNN + tencent:
python attribute.py --mode 1 --model CNN --use_elmo 0 --w2v tencent --EPOCHS 5 --check_dir cp_CNN_tc
# CNN + elmo:
python attribute.py --mode 1 --model CNN --use_elmo 2 --EPOCHS 5 --check_dir cp_CNN_2
```
训练AttA3模型如下:
```
# AttA3 + merge:
python attribute.py --mode 1 --model AttA3 --use_elmo 0 --w2v merge --EPOCHS 5 --check_dir cp_AttA3_0
# AttA3 + fasttext:
python attribute.py --mode 1 --model AttA3 --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_AttA3_ft2
# AttA3 + tencent:
python attribute.py --mode 1 --model AttA3 --use_elmo 0 --w2v tencent --EPOCHS 5 --check_dir cp_AttA3_tc
# AttA3 + elmo:
python attribute.py --mode 1 --model AttA3 --use_elmo 2 --EPOCHS 5 --check_dir cp_AttA3_2
```
至此我们在各对应文件夹中共得到了40个checkpoint。
2. 微调Bert阶段:
* 我们修改了一个开源的pytorch版本的[BERT](https://github.com/huggingface/pytorch-pretrained-BERT), 并在本数据集上fine tune了谷歌放出来的[中文BERT](https://github.com/google-research/bert/blob/master/multilingual.md)
* 首先我们我们将数据集按五折处理成tsv格式,放在bert/glue_data下,(我们已经帮你处理过了)
* 下载预训练的BERT模型,运行以下命令行完成转换:
```
export BERT_BASE_DIR=chinese_L-12_H-768_A-12
python convert_tf_checkpoint_to_pytorch.py --tf_checkpoint_path $BERT_BASE_DIR/bert_model.ckpt --bert_config_file $BERT_BASE_DIR/bert_config.json --pytorch_dump_path $BERT_BASE_DIR/pytorch_model.bin
```
* 注意如果你使用huggingface最新的转换脚本会出现state_dict不匹配的问题。所以你最好使用我们提供的转换脚本,或者是我们提供的huggingface最早的转换脚本convert_tf_checkpoint_to_pytorch_raw.py。
* 或者将百度云中转换好的chinese_L-12_H-768_A-12文件夹放在bert/目录下
* 设置环境变量:
```
export GLUE_DIR=glue_data
export BERT_BASE_DIR=chinese_L-12_H-768_A-12
```
* 在bert/文件夹下运行下面的命令进行fine-tune (5cv): (需要一块8GB显存的GPU)
```
python run_classifier_ensemble.py --task_name Aspect --do_train --do_eval --do_lower_case --data_dir $GLUE_DIR/aspect_ensemble_online --vocab_file $BERT_BASE_DIR/vocab.txt --bert_config_file $BERT_BASE_DIR/bert_config.json --init_checkpoint $BERT_BASE_DIR/pytorch_model.bin --max_seq_length 128 --train_batch_size 24 --learning_rate 2e-5 --num_train_epochs 5 --output_dir $GLUE_DIR/aspect_ensemble_online --seed 42
```
* fine-tune之后会在各自的fold的文件夹下得到对应的预测结果oof_test.npy
3. 使用预训练好的模型:
* 以上两步的所有checkpoint我们都放在了百度云链接里,下载解压之后,放入对应的文件目录下即可,这样可以免去长时间的训练。
* 注意文件夹的对应关系
* 很遗憾我们没有保存Aspect的BERT checkpoint, 我们只保存了它的预测结果,因为在训练过程中,我们已经预测过了。
* load 模型时, 我们都是在GPU上读取和保存的,我们没有在CPU上进行过测试,所以如果load有问题可以自行修改load处语法,或者联系我们。
4. 预测和stacking阶段:
* 不管是从头训练还是直接下载,我们现在已经有了训练好的模型,我们可以进行预测。
* 我们首先用BERT模型进行预测,事实上我们在每个fold训练时已经将预测结果保存为npy,我们现在只需要将五折结合起来。
在 bert\glue_data\文件夹下运行下面命令:
```
python generate_npy.py aspect_ensemble_online
```
这样我们在aspect_ensemble_online路径下得到一个npy文件夹,将它拷贝到aspect level 下的cp_Bert目录即可。
``` under aspect_leve directory
cp -r ../bert/glue_data/aspect_ensemble_online/npy cp_Bert/
```
然后我们用之前的40个checkpoint对测试集进行预测:
```
python attribute.py --mode 2 --saved 0 --use_elmo 2 --test_dir cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_AttA3_0#cp_AttA3_ft2#cp_AttA3_2#cp_AttA3_tc
```
这样会在对应checkpoint的目录下生成一个npy文件夹,里面存放了oof的预测,oof的label,以及test的预测结果。
最后我们将这9个模型的npy进行stacking:
```
python attribute.py --mode 2 --saved 1 --test_dir cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_AttA3_0#cp_AttA3_ft2#cp_AttA3_2#cp_AttA3_tc#cp_Bert
```
最终预测的主题结果, 存放在data/test_predict_aspect_ensemble.txt中。
## 运行情感分类模块:
1. 训练阶段:(由于训练时间比较长,你可以直接跳到第三步加载我们预训练好的模型)
* 和主题分类类似:
```
# AT_LSTM + merge:
python ab_polarity.py --mode 1 --model AT_LSTM --use_elmo 0 --w2v merge --EPOCHS 5 --check_dir cp_AT_LSTM_0
# AT_LSTM + fasttext:
python ab_polarity.py --mode 1 --model AT_LSTM --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_AT_LSTM_ft2
# AT_LSTM + tencent:
python ab_polarity.py --mode 1 --model AT_LSTM --use_elmo 0 --w2v tencent --EPOCHS 5 --check_dir cp_AT_LSTM_tc
# AT_LSTM + elmo:
python ab_polarity.py --mode 1 --model AT_LSTM --use_elmo 2 --EPOCHS 5 --check_dir cp_AT_LSTM_2
# HEAT + merge:
python ab_polarity.py --mode 1 --model HEAT --use_elmo 0 --w2v merge --EPOCHS 5 --check_dir cp_HEAT_0
# HEAT + fasttext:
python ab_polarity.py --mode 1 --model HEAT --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_HEAT_ft2
# HEAT + tencent:
python ab_polarity.py --mode 1 --model HEAT --use_elmo 0 --w2v tencent --EPOCHS 5 --check_dir cp_HEAT_tc
# HEAT + elmo:
python ab_polarity.py --mode 1 --model HEAT --use_elmo 2 --EPOCHS 5 --check_dir cp_HEAT_2
# GCAE + merge:
python ab_polarity.py --mode 1 --model GCAE --use_elmo 0 --w2v merge --EPOCHS 5 --check_dir cp_GCAE_0
# GCAE + fasttext:
python ab_polarity.py --mode 1 --model GCAE --use_elmo 0 --w2v fasttext2 --EPOCHS 5 --check_dir cp_GCAE_ft2
# GCAE + tencent:
python ab_polarity.py --mode 1 --model GCAE --use_elmo 0 --w2v tencent --EPOCHS 5 --check_dir cp_GCAE_tc
# GCAE + elmo:
python ab_polarity.py --mode 1 --model GCAE --use_elmo 2 --EPOCHS 5 --check_dir cp_GCAE_2
```
最终我们得到3种网络4种embedding 在5折下的60个checkpoint保存在对应的文件夹中。
2. 微调Bert阶段:
* 和主题分类类似,但是需要一个aspect预测的结果作为输入。运行data文件夹下的build_test_for_predict.py脚本后, 将生成的test.tsv放在bert/glue_data/polarity_ensemble_online/下即可。
* 设置环境变量:
```
export GLUE_DIR=glue_data
export BERT_BASE_DIR=chinese_L-12_H-768_A-12
```
* 在bert/文件夹下运行下面的命令进行fine-tune (5cv): (需要一块8GB显存的GPU)
```
python run_classifier_ensemble_polarity.py --task_name Polarity --do_train --do_eval --do_lower_case --data_dir $GLUE_DIR/polarity_ensemble_online --vocab_file $BERT_BASE_DIR/vocab.txt --bert_config_file $BERT_BASE_DIR/bert_config.json --init_checkpoint $BERT_BASE_DIR/pytorch_model.bin --max_seq_length 128 --train_batch_size 24 --learning_rate 2e-5 --num_train_epochs 5 --output_dir $GLUE_DIR/polarity_ensemble_online --seed 42
```
* fine-tune之后会在各自的fold的文件夹下得到对应的checkpoint,最好的模型是model_best.pt
3. 使用预训练好的模型:
* 如果不做上面两步长时间的训练可以直接用我们训练好的模型
* 从百度云中下载之后解压到backup_polarity.zip得到60个checkpoint,将backup_bert.zip中的polarity_ensemble_polarity放在bert/glue_data/下。
* 注意文件夹的对应关系
4. 预测和stacking阶段:
* 我们首先用BERT模型进行预测,每个fold下有一个model_best.pt的checkpoint,我们通过下面的命令加载它们并进行预测(记得像微调时一样设置环境变量):
```
python run_classifier_ensemble_polarity.py --task_name Polarity --do_test --do_predict --do_lower_case --data_dir $GLUE_DIR/polarity_ensemble_online --vocab_file $BERT_BASE_DIR/vocab.txt --bert_config_file $BERT_BASE_DIR/bert_config.json --init_checkpoint $BERT_BASE_DIR/pytorch_model.bin --max_seq_length 128 --train_batch_size 24 --learning_rate 2e-5 --num_train_epochs 10 --output_dir polarity_output_ensemble_online/ --eval_batch_size 32
```
* 然后我们将五折的预测结果结合起来:
```
python generate_npy_for_polarity.py polarity_ensemble_online
```
我们将生成的npy文件夹拷贝到polarity_level_aspect路径下:
``` under polarity_level_aspect directory
cp -r ../bert/glue_data/polarity_ensemble_online/npy cp_Bert/
```
类似地我们先生成60个checkpoint的oof的npy:
```
python ab_polarity.py --mode 2 --use_elmo 2 --saved 0 --test_dir cp_HEAT_0#cp_AT_LSTM_0#cp_HEAT_ft2#cp_AT_LSTM_ft2#cp_HEAT_2#cp_AT_LSTM_2#cp_HEAT_tc#cp_AT_LSTM_tc#cp_GCAE_0#cp_GCAE_2#cp_GCAE_ft2#cp_GCAE_tc
```
然后将这13个模型进行最终的stacking融合:
```
python ab_polarity.py --mode 2 --saved 1 --test_dir cp_HEAT_0#cp_AT_LSTM_0#cp_HEAT_ft2#cp_AT_LSTM_ft2#cp_HEAT_2#cp_AT_LSTM_2#cp_HEAT_tc#cp_AT_LSTM_tc#cp_GCAE_0#cp_GCAE_2#cp_GCAE_ft2#cp_GCAE_tc#cp_Bert
```
这样我们最终在data文件夹下生成了test_predict_polarity_ensemble.txt文件,里面即为预测结果。
## 提交:
* 在data目录下, 运行提交脚本:
```
cd data
python submit2.py
```
生成的submi2.csv 即为我们的提交文件。
## ISSUES:
* 关于UNK的问题:
* 由于本比赛是公开测试集的,所以我们没有考虑UNK的问题,如果想把本代码用于实际应用之中,需要添加对UNK的处理,即使用prepare_w2v_with_UNK.py生成词表和词向量而不是prepare_w2v.py
## Contact:
sqfzf69(At)163.com
================================================
FILE: attribute_level/attribute.py
================================================
import codecs
# import torch
import sys
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
# import xgboost as xgb
from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain, LabelPowerset
sys.path.append("..")
from utils.data_helper import load_attr_data, load_w2v, load_pos, load_char2id, parse_json, load_test_data
import attribute_level.networks2 as networks
import utils.train_single as train_single
from utils.Data import Data, Data2
from utils.evaluate import score, label_analysis
import utils.train as train
import torch
import numpy as np
import argparse
import os
import shutil
import time
import pickle
parser = argparse.ArgumentParser()
parser.add_argument("--EPOCHS", type=int, default=5)
parser.add_argument("--n_hidden", type=int, default=128)
parser.add_argument("--optimizer", type=str, default="Adam")
# parser.add_argument("--model", type=str, default="Average_LSTM2")
# parser.add_argument("--model", type=str, default="Binary_LSTM")
# parser.add_argument("--model", type=str, default="AttA3")
parser.add_argument("--model", type=str, default="CNN")
# parser.add_argument("--model", type=str, default="Attn_LSTM")
parser.add_argument("--lr", type=float, default=0.2)
parser.add_argument("--dropout", type=float, default=0.0)
parser.add_argument("--freeze", type=bool, default=True)
parser.add_argument("--use_dev", type=bool, default=False)
parser.add_argument("--mode", type=int, default=2)
parser.add_argument("--use_elmo", type=int, default=0)
parser.add_argument("--save", type=int, default=1)
parser.add_argument("--check_dir", type=str, default="checkpoints_00")
parser.add_argument("--saved", type=int, default=1)
parser.add_argument("--threshold_list", type=list, default=[0.45 for _ in range(10)])
parser.add_argument("--threshold", type=float, default=0.45)
parser.add_argument("--folds", type=int, default=5)
parser.add_argument("--seed", type=int, default=1024)
parser.add_argument("--test_model", type=str, default="TD_3LSTM_0.7626.pt")
parser.add_argument("--train_mode", type=int, default=0)
parser.add_argument("--w2v", type=str, default="fasttext2")
# parser.add_argument("--test_dir", type=str, default="checkpoints_00")
parser.add_argument("--test_dir", type=str, default="cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_AttA3_0#cp_AttA3_ft2#cp_AttA3_2#cp_AttA3_tc#cp_Bert")
parser.add_argument("--meta_dir", type=str, default="cp_1024")
args = parser.parse_args()
print(args)
# seed = 314159
# torch.manual_seed(seed)
# seed = torch.initial_seed()
# print(seed)
class AttributeClassifier: # Neural network method
def __init__(self):
self.classifier = None
self.trained = False
pass
def train_from_data(self, train_raw_data, test_raw_data, W, word2index, attr_dict, args, Fold=0):
word_embed_dim = W.shape[1]
hidden_size = args.n_hidden
vocab_size = len(W)
output_size = len(attr_dict)
if args.model == 'LSTM':
self.classifier = networks.LSTM(word_embed_dim, output_size, vocab_size, args)
elif args.model == 'Fasttext':
self.classifier = networks.Fasttext(word_embed_dim, output_size, vocab_size, args)
elif args.model == 'Average_LSTM2':
self.classifier = networks.Average_LSTM2(word_embed_dim, output_size, vocab_size, args)
elif args.model == 'AttA3':
self.classifier = networks.AttA3(word_embed_dim, output_size, vocab_size, args)
aspect_e_l = []
for a in attr_dict:
# print(a)
if a == '舒适性':
a = '舒适'
a_e = torch.FloatTensor(W[word2index[a]])
aspect_e_l.append(a_e)
aspect_embeds = torch.cat(aspect_e_l, 0)
# print(aspect_embeds)
# print(attr_dict)
self.classifier.AE.weight = torch.nn.Parameter(aspect_embeds)
elif args.model == 'Binary_LSTM':
self.classifier = networks.Binary_LSTM(word_embed_dim, output_size, vocab_size, args)
elif args.model == 'CNN':
self.classifier = networks.CNN(word_embed_dim, output_size, vocab_size, args)
elif args.model == 'Attn_LSTM':
self.classifier = networks.Attn_LSTM(word_embed_dim, output_size, vocab_size, args)
train_elmo, test_elmo = [], []
if args.use_elmo != 0:
import h5py
elmo_dict = h5py.File('../embedding/embeddings_elmo_ly-1.hdf5', 'r')
for s in train_raw_data[0]:
sentence = '\t'.join(s)
sentence = sentence.replace('.', '$period$')
sentence = sentence.replace('/', '$backslash$')
# print(sentence)
embeddings = torch.from_numpy(np.asarray(elmo_dict[sentence]))
train_elmo.append(embeddings)
for s in test_raw_data[0]:
sentence = '\t'.join(s)
sentence = sentence.replace('.', '$period$')
sentence = sentence.replace('/', '$backslash$')
embeddings = torch.from_numpy(np.asarray(elmo_dict[sentence]))
test_elmo.append(embeddings)
elmo_dict.close()
print("finish elmo")
train_data = Data(train_raw_data, word2index, attr_dict, args)
# if args.use_dev:
# dev_data = Data(args, dev_input_s, dev_input_t, dev_y_tensor)
# else:
# dev_data = None
test_data = Data(test_raw_data, word2index, attr_dict, args)
if args.use_elmo != 0:
train_data.add_feature(train_elmo)
test_data.add_feature(test_elmo)
best_dict, max_acc = train.train(self.classifier, train_data, test_data, test_data, attr_dict, W, args=args)
best_model = "%s/checkpoint_%s_%.6f_%d.pt" % (args.check_dir, args.model, max_acc, Fold)
if args.save != 0:
torch.save(best_dict, best_model)
pass
def load_model(self, check_point):
self.classifier = torch.load(check_point)
def split_dev(train_texts, train_labels, folds=5):
length = len(train_texts)
np.random.seed(args.seed)
index_list = np.random.permutation(length).tolist()
# print(index_list)
train_lines = [train_texts[i] for i in index_list[0:length - length // folds]]
test_lines = [train_texts[i] for i in index_list[length - length // folds:]]
train_y = [train_labels[i] for i in index_list[0:length-length//folds]]
test_y = [train_labels[i] for i in index_list[length - length // folds:]]
return train_lines, train_y, test_lines, test_y
def main():
f_train = "../data/train.txt"
# f_test = "data/test_attr2.txt"
if args.w2v == "merge":
f_w2v = "../embedding/embedding_all_merge_300.txt"
elif args.w2v == "fasttext":
f_w2v = "../embedding/embedding_all_fasttext_300.txt"
elif args.w2v == "fasttext2":
f_w2v = "../embedding/embedding_all_fasttext2_300.txt"
elif args.w2v == "tencent":
f_w2v = "../embedding/embedding_all_tencent_200.txt"
else:
print("error, no embedding")
exit(-1)
f_dict = "../dataset/attribute.json"
print(f_w2v)
train_texts, train_labels = load_attr_data(filename=f_train)
train_texts, train_labels, test_texts, test_labels = split_dev(train_texts, train_labels)
print(len(train_texts))
print(len(test_labels))
# train_texts2, train_labels2, test_texts, test_labels = split_dev(train_texts, train_labels)
if not os.path.exists("%s" % args.check_dir):
os.mkdir("%s" % args.check_dir)
# test_texts, test_labels = load_attr_data(filename=f_test)
W, word2index2 = load_w2v(f_w2v)
word2index = pickle.load(open("../data/vocabulary.pkl", 'rb'))
assert word2index == word2index2
attr_list, attr_dict = parse_json(f_dict)
print(list(attr_dict.keys()))
model = AttributeClassifier()
print(attr_list)
print(attr_dict)
# exit(-1)
# print(train_texts)
model.train_from_data((train_texts, train_labels), (test_texts, test_labels), W, word2index, attr_dict, args)
def kfold_split(length, k=5):
np.random.seed(args.seed)
index_list = np.random.permutation(length)
l = length // k
folds = []
for i in range(k):
test_idx = np.zeros(length, dtype=bool)
test_idx[i*l:(i+1)*l] = True
folds.append((index_list[~test_idx], index_list[test_idx]))
return folds
def ensemble():
f_train = "../data/train.txt"
# f_test = "data/test_attr2.txt"
if args.w2v == "merge":
f_w2v = "../embedding/embedding_all_merge_300.txt"
elif args.w2v == "fasttext2":
f_w2v = "../embedding/embedding_all_fasttext2_300.txt"
elif args.w2v == "tencent":
f_w2v = "../embedding/embedding_all_tencent_200.txt"
else:
print("error, no embedding")
exit(-1)
f_dict = "../dataset/attribute.json"
print(f_train)
print(f_w2v)
if not os.path.exists("%s" % args.check_dir):
os.mkdir("%s" % args.check_dir)
raw_texts, raw_labels = load_attr_data(filename=f_train)
W, word2index2 = load_w2v(f_w2v)
word2index = pickle.load(open("../data/vocabulary.pkl", 'rb'))
assert word2index == word2index2
attr_list, attr_dict = parse_json(f_dict)
kf = 0
for train_index, test_index in kfold_split(len(raw_texts), args.folds):
kf += 1
print("FOLD:", kf)
print("TRAIN:", str(len(train_index)), '\n', "TEST:", str(len(test_index)))
# train_index, test_index = train_index.tolist(), test_index.tolist()
test_texts, test_labels = [raw_texts[i] for i in test_index], [raw_labels[i] for i in test_index]
train_texts, train_labels = [raw_texts[i] for i in train_index], [raw_labels[i] for i in train_index]
print(len(train_texts))
print(len(test_labels))
model = AttributeClassifier()
print(attr_list)
print(attr_dict)
# exit(-1)
# print(train_texts)
model.train_from_data((train_texts, train_labels), (test_texts, test_labels), W, word2index, attr_dict, args, kf)
pass
def test():
model = AttributeClassifier()
check_point = "checkpoint_AttA3_0.8810.pt"
model.load_model(check_point)
test_file = "data/attribute_test.txt"
test_texts = load_test_data(test_file)
f_w2v = "../embedding/embedding_all_merge_300.txt"
W, word2index = load_w2v(f_w2v)
f_dict = "../dataset/attribute.json"
attr_list, attr_dict = parse_json(f_dict)
test_data = Data((test_texts, None), word2index)
test_predict = train.predict(model.classifier, test_data, args)
print(test_predict)
fw = codecs.open("test_predict.txt", 'w', encoding='utf-8')
for p in test_predict:
attributes = []
for i,l in enumerate(p):
if l != 0:
attributes.append(attr_list[i])
fw.write('|'.join(attributes)+'\n')
def dev():
model = AttributeClassifier()
check_point = "checkpoints5/checkpoint_AttA3_0.8666.pt"
model.load_model(check_point)
f_train = "data/attribute_data.txt"
# f_test = "data/test_attr2.txt"
f_w2v = "../embedding/embedding_all_merge_300.txt"
f_dict = "../dataset/attribute.json"
print(f_w2v)
raw_texts, raw_labels = load_attr_data(filename=f_train)
W, word2index = load_w2v(f_w2v)
attr_list, attr_dict = parse_json(f_dict)
kf = 0
_, test_index = kfold_split(len(raw_texts), args.folds)[2]
test_texts, test_labels = [raw_texts[i] for i in test_index], [raw_labels[i] for i in test_index]
test_data = Data((test_texts, test_labels), word2index, attr_dict, args)
test_predict = train.predict(model.classifier, test_data, args)
pred_acc_t = score(test_predict, test_data.labels)
print(pred_acc_t)
def load_elmo(test_texts):
test_elmo = []
import h5py
elmo_dict = h5py.File('../embedding/embeddings_elmo_ly-1.hdf5', 'r')
for s in test_texts:
sentence = '\t'.join(s)
sentence = sentence.replace('.', '$period$')
sentence = sentence.replace('/', '$backslash$')
embeddings = torch.from_numpy(np.asarray(elmo_dict[sentence]))
test_elmo.append(embeddings)
elmo_dict.close()
print("finish elmo")
return test_elmo
def get_oof(clfs, raw_texts, raw_labels, test_data, word2index, attr_dict):
NFOLDS = len(clfs)
n_train = len(raw_texts)
n_test = len(test_data.sentences)
class_num = 10
oof_train = np.zeros((n_train, class_num))
oof_train_y = np.zeros((n_train, class_num))
oof_test = np.zeros((n_test, class_num))
oof_test_skf = np.zeros((NFOLDS, n_test, class_num))
kf = 0
for (train_index, test_index), checkpoint in zip(kfold_split(n_train, NFOLDS), clfs):
print(checkpoint)
clf = torch.load(checkpoint)
kf += 1
print("FOLD:", kf)
print("TRAIN:", str(len(train_index)), "TEST:", str(len(test_index)))
# train_index, test_index = train_index.tolist(), test_index.tolist()
dev_texts, dev_labels = [raw_texts[i] for i in test_index], [raw_labels[i] for i in test_index]
dev_data = Data((dev_texts, dev_labels), word2index, attr_dict, args)
if args.use_elmo != 0:
dev_elmo = load_elmo(dev_texts)
dev_data.add_feature(dev_elmo)
with torch.no_grad():
dev_predict, oof_dev = train.predict_with_logit(clf, dev_data, args)
pred_acc_p = score(dev_predict, dev_data.labels)
print("[p:%.4f, r:%.4f, f:%.4f] acc:%.4f" %
(pred_acc_p[0], pred_acc_p[1], pred_acc_p[2], pred_acc_p[3]))
# label_prf = label_analysis(dev_predict, dev_data.labels)
# for i in range(len(label_prf)):
# print("%s : [%.4f, %.4f, %.4f] %.4f" %
# (list(attr_dict.keys())[i], label_prf[i][0], label_prf[i][1], label_prf[i][2], label_prf[i][3]))
oof_train[test_index] = oof_dev
dev_y = [l[0].detach().numpy() for l in dev_data.labels]
oof_train_y[test_index] = dev_y
_, oof_test_skf[kf - 1, :, :] = train.predict_with_logit(clf, test_data, args)
oof_test[:] = oof_test_skf.mean(axis=0)
dir = os.path.dirname(clfs[0])
if not os.path.exists(os.path.join(dir, 'npy')):
os.mkdir(os.path.join(dir, 'npy'))
print(dir)
np.save(os.path.join(dir, 'npy', "oof_train"), oof_train)
np.save(os.path.join(dir, 'npy', "oof_train_y"), oof_train_y)
np.save(os.path.join(dir, 'npy', "oof_test"), oof_test)
return oof_train, oof_train_y, oof_test
def load_oof(dir):
oof_train = np.load(os.path.join(dir, 'npy', "oof_train.npy"))
oof_train_y = np.load(os.path.join(dir, 'npy', "oof_train_y.npy"))
oof_test = np.load(os.path.join(dir, 'npy', "oof_test.npy"))
print("loaded from: " + dir)
return oof_train, oof_train_y, oof_test
def stacking():
saved = True if args.saved != 0 else False
f_train = "../data/train.txt"
test_file = "../data/test.txt"
test_texts = load_test_data(test_file)
raw_texts, raw_labels = load_attr_data(filename=f_train)
word2index = pickle.load(open("../data/vocabulary.pkl", 'rb'))
f_dict = "../dataset/attribute.json"
attr_list, attr_dict = parse_json(f_dict)
paths = args.test_dir.split('#')
models_files = []
for path in paths:
models_files.append([os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))])
test_data = Data((test_texts, None), word2index)
if args.use_elmo != 0:
test_elmo = load_elmo(test_texts)
test_data.add_feature(test_elmo)
x_train = []
y_train = [] # TODO replace
x_test = []
for dir, checkpoints_per_model in zip(paths, models_files):
print(dir, checkpoints_per_model)
if saved == 1 and os.path.isfile(os.path.join(dir, 'npy', "oof_train.npy")):
oof_train, oof_train_y, oof_test = load_oof(dir)
else:
NFOLDS = len(checkpoints_per_model)
print(NFOLDS)
assert NFOLDS == args.folds
clfs = [None for i in range(NFOLDS)]
for cp in checkpoints_per_model:
fold = int(cp.replace('_', '.').split('.')[-2])
print(fold)
clfs[fold-1] = cp
oof_train, oof_train_y, oof_test = get_oof(clfs, raw_texts, raw_labels, test_data, word2index, attr_dict)
x_train.append(oof_train)
if y_train == []:
y_train = oof_train_y
else:
assert (y_train == oof_train_y).all()
x_test.append(oof_test)
x_train = np.stack(x_train, axis=2)
x_test = np.stack(x_test, axis=2)
print(x_train.shape)
num_train = x_train.shape[0]
num_test = x_test.shape[0]
test_predict = []
for c in range(x_train.shape[1]):
x_train_c = x_train[:, c, :].reshape(num_train, -1)
x_test_c = x_test[:, c, :].reshape(num_test, -1)
meta_clf_c = LogisticRegression()
y_train_c = y_train[:, c]
meta_clf_c.fit(x_train_c, y_train_c)
test_predict_c = meta_clf_c.predict_proba(x_test_c)[:, 1]
test_predict.append(test_predict_c)
test_predict = np.stack(test_predict, axis=1)
print(test_predict.shape)
fw = codecs.open("../data/test_predict_aspect_ensemble.txt", 'w', encoding='utf-8')
for prob in test_predict:
attributes = []
voted = [0 for a in range(len(attr_list))]
for i in range(len(prob)):
p = prob[i]
# print(p)
if p > args.threshold:
voted[i] = 1
# categories.append(attrC[i])
if sum(voted) == 0:
voted[prob.argmax()] = 1
for i,l in enumerate(voted):
if l != 0:
attributes.append(attr_list[i])
fw.write('|'.join(attributes) + '\n')
time_stamp = time.asctime().replace(':', '_').split()
fw.close()
shutil.copy2("../data/test_predict_aspect_ensemble.txt",
"../data/backup/test_predict_aspect_ensemble_%s.txt" % time_stamp)
if __name__ == '__main__':
if args.mode == 0:
main()
elif args.mode == 1:
ensemble()
elif args.mode == 2:
stacking()
================================================
FILE: attribute_level/best_test.sh
================================================
CUDA_VISIBLE_DEVICES=2 python attribute.py --mode 3 --use_elmo 2 --test_dir cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_AttA3_0#cp_AttA3_ft2#cp_AttA3_2#cp_AttA3_tc#cp_Bert --saved 1
================================================
FILE: attribute_level/networks2.py
================================================
import torch
from torch import nn
import torch.nn.functional as F
class WordRep(nn.Module):
def __init__(self, vocab_size, word_embed_dim, char_size, args):
super(WordRep, self).__init__()
# self.use_char = args.use_char
self.use_elmo = args.use_elmo
# self.elmo_mode = args.elmo_mode
# self.elmo_mode2 = args.elmo_mode2
# self.projected = args.projected
# self.char_embed_dim = args.char_embed_dim
self.word_embed = nn.Embedding(vocab_size, word_embed_dim)
# if self.use_elmo:
# self.elmo_weights = nn.Linear(3, 1)
# self.elmo_proj = nn.Linear(1024, word_embed_dim)
# if self.use_char:
# self.char_embed = nn.Embedding(char_size, self.char_embed_dim)
# self.char_lstm = nn.LSTM(self.char_embed_dim, self.char_embed_dim//2, num_layers=1, bidirectional=True)
def forward(self, input_tensors):
sentence = input_tensors[0]
elmo_tensor = input_tensors[1]
char_seq = None
char_seq_len = None
char_seq_recover = None
words_embeds = self.word_embed(sentence)
if self.use_elmo == 1:
elmo_tensor = elmo_tensor.view(elmo_tensor.size()[0], 1, -1)
words_embeds = torch.cat((words_embeds, elmo_tensor), dim=-1)
elif self.use_elmo == 2:
elmo_tensor = elmo_tensor.view(elmo_tensor.size()[0], 1, -1)
words_embeds = elmo_tensor
# if self.use_elmo:
# if self.elmo_mode == 2:
# elmo_tensor = elmo_tensor[-1]
# elif self.elmo_mode == 3:
# elmo_tensor = elmo_tensor[1]
# elif self.elmo_mode == 4:
# elmo_tensor = elmo_tensor[0]
# elif self.elmo_mode == 6:
# attn_weights = F.softmax(self.elmo_weights.weight, dim=-1)
# elmo_tensor = torch.matmul(attn_weights, elmo_tensor.t())
# else:
# elmo_tensor = elmo_tensor.mean(dim=0)
# if not self.projected:
# projected = elmo_tensor
# else:
# projected = self.elmo_proj(elmo_tensor)
# # print(words_embeds.size())
# # exit(-1)
# projected = projected.view(projected.size()[0], 1, -1)
# if self.elmo_mode2 == 1:
# words_embeds = words_embeds + projected
# elif self.elmo_mode2 == 2:
# words_embeds = words_embeds
# elif self.elmo_mode2 == 3:
# words_embeds = torch.cat((words_embeds, projected), dim=-1)
# else:
# words_embeds = projected
# if self.use_char:
# char_embeds = self.char_embed(char_seq)
# pack_seq = pack_padded_sequence(char_embeds, char_seq_len, True)
# char_rnn_out, char_hidden = self.char_lstm(pack_seq)
# last_hidden = char_hidden[0].view(sentence.size()[0], 1, -1)
# # print(words_embeds)
# # print(last_hidden)
# words_embeds = torch.cat((words_embeds, last_hidden), -1)
return words_embeds
class LSTM(nn.Module):
def __init__(self, word_embed_dim, output_size, vocab_size, args=None):
super(LSTM, self).__init__()
print("LSTM")
self.input_size = word_embed_dim
# if args.elmo_mode2 == 3 and args.projected and args.use_elmo:
# self.input_size += word_embed_dim
# if args.elmo_mode2 == 0 and not args.projected and args.use_elmo:
# self.input_size = 1024
# if args.elmo_mode2 == 3 and not args.projected and args.use_elmo:
# self.input_size += 1024
self.hidden_size = args.n_hidden
self.output_size = output_size
self.max_length = 1
self.word_rep = WordRep(vocab_size, word_embed_dim, None, args)
self.rnn = nn.LSTM(self.input_size, self.hidden_size, num_layers=1, bidirectional=True)
self.decoderP = nn.Linear(self.hidden_size*2, self.output_size)
self.dropout = nn.Dropout(0.0)
def forward(self, input_tensors):
# print(sentence)
sentence = self.word_rep(input_tensors)
output, (hidden, _) = self.rnn(sentence)
hidden = hidden.view(1, -1)
decodedP = self.decoderP(hidden).view(1, -1)
# outputP = F.softmax(decodedP, dim=-1)
outputP = decodedP
# print(outputP.size())
return outputP
class CNN(nn.Module):
def __init__(self, word_embed_dim, output_size, vocab_size, args=None, max_length=20):
super(CNN, self).__init__()
print("CNN")
self.input_size = word_embed_dim if (args.use_elmo == 0) else (
word_embed_dim + 1024 if args.use_elmo == 1 else 1024)
# if args.elmo_mode2 == 3 and args.projected and args.use_elmo:
# self.input_size += word_embed_dim
# if args.elmo_mode2 == 0 and not args.projected and args.use_elmo:
# self.input_size = 1024
# if args.elmo_mode2 == 3 and not args.projected and args.use_elmo:
# self.input_size += 1024
self.hidden_size = args.n_hidden
self.output_size = output_size
self.max_length = max_length
self.word_rep = WordRep(vocab_size, word_embed_dim, None, args)
self.filter_size = [1, 2, 3, 4]
self.map_size = 300
self.convs1 = nn.ModuleList([nn.Conv1d(self.input_size, self.map_size, K) for K in self.filter_size])
# self.pool1 = nn.MaxPool2d((max_length - self.filter_size[0] + 1, 1))
# self.pool2 = nn.MaxPool2d((max_length - self.filter_size[1] + 1, 1))
# self.pool3 = nn.MaxPool2d((max_length - self.filter_size[2] + 1, 1))
# self.decoder = nn.Linear(self.map_size*3, output_size)
self.decoder = nn.Sequential(
nn.Linear(self.map_size*len(self.filter_size), self.hidden_size),
nn.ReLU(inplace=True),
nn.Linear(self.hidden_size, self.output_size)
)
self.dropout = nn.Dropout(args.dropout)
# self.pad = torch.nn.ConstantPad3d((0, 0, 0, 0, 0, ))
# self.softmax = nn.Softmax()
def forward(self, input_tensors):
feature = self.word_rep(input_tensors)
# input = F.pad(input, (0, 0, 0, 0, 0, self.max_length-input.size()[0])).view(1, self.max_length, -1)
feature = feature.view(1, feature.size()[0], -1)
feature = self.dropout(feature)
x = [F.tanh(conv(feature.transpose(1, 2))) for conv in self.convs1]
x0 = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
x0 = [i.view(i.size(0), -1) for i in x0]
x0 = torch.cat(x0, 1)
# c = self.dropout(c)
decoded = self.decoder(x0)
# decoded = self.dropout(decoded)
# ouput = self.softmax(decoded)
output = decoded
output = F.sigmoid(output.view(1, -1))
return output
def optimize_step(self, input_tensors, category_tensor, optimizer):
self.zero_grad()
self.train()
output = self.forward(input_tensors)
# print(output)
# print(category_tensor)
# loss = F.multilabel_soft_margin_loss(output, category_tensor.float())
loss = F.binary_cross_entropy(output, category_tensor.float())
# loss = customized_loss2(output, category_tensor.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return loss.item()
class AttA3(nn.Module):
def __init__(self, word_embed_dim, output_size, vocab_size, args=None):
super(AttA3, self).__init__()
self.input_size = word_embed_dim if (args.use_elmo == 0) else (
word_embed_dim + 1024 if args.use_elmo == 1 else 1024)
self.hidden_size = args.n_hidden
self.output_size = output_size
self.max_length = 1
self.lr = 0.0005
print(self.input_size)
# self.rnn = nn.LSTMCell(input_size, hidden_size)
self.word_rep = WordRep(vocab_size, word_embed_dim, None, args)
self.rnn_a = nn.LSTM(self.input_size, self.hidden_size//2, num_layers=1, bidirectional=True)
self.AE = nn.Embedding(self.output_size, word_embed_dim)
# if embeddings is not None:
# self.AE.weight = nn.Parameter(self.embeddings)
self.W_h_a = nn.Linear(self.hidden_size, self.hidden_size)
# self.W_v_a = nn.Linear(self.input_size, self.input_size)
# self.w_a = nn.Linear(self.hidden_size + self.input_size, 1)
self.W_v_a = nn.Linear(word_embed_dim, self.hidden_size)
self.w_a = nn.Linear(self.hidden_size, 1)
self.W_p_a = nn.Linear(self.hidden_size, self.hidden_size)
self.W_x_a = nn.Linear(self.hidden_size, self.hidden_size)
# self.attn = nn.Linear(self.hidden_size, self.max_length)
# self.attn_softmax = nn.Softmax()
# self.W1 = nn.Linear(hidden_size, hidden_size)
# self.W2 = nn.Linear(self.input_size, self.input_size)
# self.combine = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
# self.combiner = nn.Linear(self.output_size, 1)
self.decoders_a = nn.ModuleList([nn.Linear(self.hidden_size, 1) for i in range(output_size)])
# self.decoders = [nn.Linear(self.hidden_size, 1) for i in range(output_size)]
# self.decoder_a = nn.Linear(self.hidden_size, self.output_size) # TODO
self.dropout = nn.Dropout(args.dropout)
# self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
# self.optimizer = torch.optim.Adadelta(self.parameters())
def forward(self, input_tensors):
# print(sentence)
sentence = self.word_rep(input_tensors)
length = sentence.size()[0]
output2, (ht, ct) = self.rnn_a(sentence)
ht = ht.view(1, -1)
output = output2.view(1, length, -1)
aspect_embedding = self.AE.weight
aspect_embedding = aspect_embedding.view(self.output_size, 1, -1)
# print(aspect)
# print(aspect_embedding)
aspect_embedding = aspect_embedding.expand(self.output_size, length, -1)
output = output.expand(self.output_size, length, -1)
# M = F.tanh(torch.cat((self.W_h_a(output), self.W_v_a(aspect_embedding)), dim=2))
# print(aspect_embedding.size())
# print(output.size())
M = F.tanh(self.W_h_a(output) + self.W_v_a(aspect_embedding))
# print(M)
# print(self.w_a(M).view(12, -1))
weights = F.softmax(self.w_a(M).view(self.output_size, -1), dim=1)
r = torch.matmul(weights.view(self.output_size, -1), output2.view(length, -1))
# print(r.t())
# r = self.combiner(r.t()).view(1, -1)
# print(r)
# r = torch.sum(output2.view(length, -1), 0).view(1, -1)
# print(self.W_x_a(ht))
r = F.tanh(self.W_p_a(r) + self.W_x_a(ht))
# r = ht.view(1, -1)
r = self.dropout(r)
decoded = []
for i in range(r.size(0)):
decoded.append(self.decoders_a[i](r[i]))
decoded = torch.stack(decoded)
# decoded = self.decoder_a(r).view(1, -1)
# decoded = self.dropout(decoded)
# print(decoded)
# output = decoded.view(1,-1)
output = F.sigmoid(decoded.view(1, -1))
# output = F.softmax(decoded.view(1, -1), dim=-1)
# print(output)
return output
def optimize_step(self, input_tensors, category_tensor, optimizer):
self.zero_grad()
self.train()
output = self.forward(input_tensors)
# print(output)
# print(category_tensor)
# loss = F.multilabel_soft_margin_loss(output, category_tensor.float())
loss = F.binary_cross_entropy(output, category_tensor.float())
# loss = customized_loss2(output, category_tensor.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return loss.item()
================================================
FILE: bert/CONTRIBUTING.md
================================================
# How to Contribute
BERT needs to maintain permanent compatibility with the pre-trained model files,
so we do not plan to make any major changes to this library (other than what was
promised in the README). However, we can accept small patches related to
re-factoring and documentation. To submit contributes, there are just a few
small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows
[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
================================================
FILE: bert/MSRP/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: bert/MSRP/Microsoft Shared Source License.htm
================================================
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=shift_jis">
<meta name=Generator content="Microsoft Word 11 (filtered)">
<title>Microsoft Shared Source License</title>
<style>
<!--
/* Font Definitions */
@font-face
{font-family:"lr ";
panose-1:2 2 6 9 4 2 5 8 3 4;}
@font-face
{font-family:Century;
panose-1:2 4 6 4 5 5 5 2 3 4;}
@font-face
{font-family:"\@lr ";
panose-1:2 2 6 9 4 2 5 8 3 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
text-align:justify;
text-justify:inter-ideograph;
font-size:10.5pt;
font-family:Century;}
p.MsoPlainText, li.MsoPlainText, div.MsoPlainText
{margin:0cm;
margin-bottom:.0001pt;
text-align:justify;
text-justify:inter-ideograph;
font-size:10.5pt;
font-family:"lr ";}
/* Page Definitions */
@page Section1
{size:595.3pt 841.9pt;
margin:99.25pt 87.65pt 3.0cm 87.65pt;
layout-grid:18.0pt;}
div.Section1
{page:Section1;}
-->
</style>
</head>
<body lang=JA style='text-justify-trim:punctuation'>
<div class=Section1 style='layout-grid:18.0pt'>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>Microsoft
Research Paraphrase Corpus</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>This
Microsoft Research Shared Source license agreement ("MSR-SSLA") is a
legal agreement between you and Microsoft Corporation ("Microsoft" or
"we") for the software or data identified above, which may include
source code, and any associated materials, text or speech files, associated
media and "online" or electronic documentation and any updates we
provide in our discretion (together, the "Software"). </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>By
installing, copying, or otherwise using this Software, found at
http://research.microsoft.com/downloads, you agree to be bound by the terms of
this MSR-SSLA. If you do not agree, do not install copy or use the Software.
The Software is protected by copyright and other intellectual property laws and
is licensed, not sold. </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>SCOPE
OF RIGHTS:</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>You
may use, copy, reproduce, and distribute this Software for any non-commercial
purpose, subject to the restrictions in this MSR-SSLA. Some purposes which can
be non-commercial are teaching, academic research, public demonstrations and
personal experimentation. You may also distribute this Software with books or
other teaching materials, or publish the Software on websites, that are
intended to teach the use of the Software for academic or other non-commercial
purposes.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>You
may not use or distribute this Software or any derivative works in any form for
commercial purposes. Examples of commercial purposes would be running business
operations, licensing, leasing, or selling the Software, distributing the
Software for use with commercial products, using the Software in the creation
or use of commercial products or any other activity which purpose is to procure
a commercial gain to you or others.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>If the
Software includes source code or data, you may create derivative works of such
portions of the Software and distribute the modified Software for
non-commercial purposes, as provided herein.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>In
return, we simply require that you agree: </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>1.
That you will not remove any copyright or other notices from the Software.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>2.
That if any of the Software is in binary format, you will not attempt to modify
such portions of the Software, or to reverse engineer or decompile them, except
and only to the extent authorized by applicable law. </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>3.
That if you distribute the Software or any derivative works of the Software,
you will distribute them under the same terms and conditions as in this
license, and you will not grant other rights to the Software or derivative works
that are different from those provided by this MSR-SSLA. </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>4.
That if you have created derivative works of the Software, and distribute such
derivative works, you will cause the modified files to carry prominent notices
so that recipients know that they are not receiving the original Software. Such
notices must state: (i) that you have changed the Software; and (ii) the date
of any changes.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>5.
That Microsoft is granted back, without any restrictions or limitations, a
non-exclusive, perpetual, irrevocable, royalty-free, assignable and
sub-licensable license, to reproduce, publicly perform or display, install,
use, modify, distribute, make and have made, sell and transfer your
modifications to and/or derivative works of the Software source code or data,
for any purpose. </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>6.
That any feedback about the Software provided by you to us is voluntarily
given, and Microsoft shall be free to use the feedback as it sees fit without
obligation or restriction of any kind, even if the feedback is designated by
you as confidential. </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>7.
THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO
EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION,
WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY
AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF
TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL
ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER
ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>8.
THAT NEITHER MICROSOFT NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR
ANY DAMAGES RELATED TO THE SOFTWARE OR THIS MSR-SSLA, INCLUDING DIRECT,
INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT
THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST
PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR
DERIVATIVE WORKS.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>9.
That we have no duty of reasonable care or lack of negligence, and we are not
obligated to (and will not) provide technical support for the Software.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>10.
That if you breach this MSR-SSLA or if you sue anyone over patents that you
think may apply to or read on the Software or anyone's use of the Software,
this MSR-SSLA (and your license and rights obtained herein) terminate
automatically. Upon any such termination, you shall destroy all of your
copies of the Software immediately. Sections 5, 6, 7, 8, 9, 10, 13 and 14
of this MSR-SSLA shall survive any termination of this MSR-SSLA.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>11.
That the patent rights, if any, granted to you in this MSR-SSLA only apply to
the Software, not to any derivative works you make.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>12.
That the Software may be subject to U.S. export jurisdiction at the time it is
licensed to you, and it may be subject to additional export or import laws in
other places. You agree to comply with all such laws and regulations that may
apply to the Software after delivery of the software to you.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>13.
That all rights not expressly granted to you in this MSR-SSLA are reserved.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>14.
That this MSR-SSLA shall be construed and controlled by the laws of the State
of Washington, USA, without regard to conflicts of law. If any provision
of this MSR-SSLA shall be deemed unenforceable or contrary to law, the rest of
this MSR-SSLA shall remain in full effect and interpreted in an enforceable
manner that most nearly captures the intent of the original language.</span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'> </span></p>
<p class=MsoPlainText><span lang=EN-US style='font-family:"Courier New"'>Copyright
© Microsoft Corporation. All rights reserved.</span></p>
</div>
</body>
</html>
================================================
FILE: bert/MSRP/Microsoft Shared Source License.rtf
================================================
{\rtf1\ansi\ansicpg932\uc2\deff0\stshfdbch11\stshfloch21\stshfhich21\stshfbi0\deflang1033\deflangfe1041{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f11\froman\fcharset128\fprq1{\*\panose 02020609040205080304}\'82\'6c\'82\'72 \'96\'be\'92\'a9{\*\falt MS Mincho};}
{\f21\froman\fcharset0\fprq2{\*\panose 02040604050505020304}Century;}{\f38\froman\fcharset128\fprq1{\*\panose 02020609040205080304}@\'82\'6c\'82\'72 \'96\'be\'92\'a9;}{\f39\froman\fcharset238\fprq2 Times New Roman CE;}
{\f40\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f42\froman\fcharset161\fprq2 Times New Roman Greek;}{\f43\froman\fcharset162\fprq2 Times New Roman Tur;}{\f44\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
{\f45\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f46\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f47\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f59\fmodern\fcharset238\fprq1 Courier New CE;}
{\f60\fmodern\fcharset204\fprq1 Courier New Cyr;}{\f62\fmodern\fcharset161\fprq1 Courier New Greek;}{\f63\fmodern\fcharset162\fprq1 Courier New Tur;}{\f64\fmodern\fcharset177\fprq1 Courier New (Hebrew);}
{\f65\fmodern\fcharset178\fprq1 Courier New (Arabic);}{\f66\fmodern\fcharset186\fprq1 Courier New Baltic;}{\f67\fmodern\fcharset163\fprq1 Courier New (Vietnamese);}{\f151\froman\fcharset0\fprq1 MS Mincho Western{\*\falt MS Mincho};}
{\f149\froman\fcharset238\fprq1 MS Mincho CE{\*\falt MS Mincho};}{\f150\froman\fcharset204\fprq1 MS Mincho Cyr{\*\falt MS Mincho};}{\f152\froman\fcharset161\fprq1 MS Mincho Greek{\*\falt MS Mincho};}
{\f153\froman\fcharset162\fprq1 MS Mincho Tur{\*\falt MS Mincho};}{\f156\froman\fcharset186\fprq1 MS Mincho Baltic{\*\falt MS Mincho};}{\f249\froman\fcharset238\fprq2 Century CE;}{\f250\froman\fcharset204\fprq2 Century Cyr;}
{\f252\froman\fcharset161\fprq2 Century Greek;}{\f253\froman\fcharset162\fprq2 Century Tur;}{\f256\froman\fcharset186\fprq2 Century Baltic;}{\f421\froman\fcharset0\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Western;}
{\f419\froman\fcharset238\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 CE;}{\f420\froman\fcharset204\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Cyr;}{\f422\froman\fcharset161\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Greek;}
{\f423\froman\fcharset162\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Tur;}{\f426\froman\fcharset186\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Baltic;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;
\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;
\red192\green192\blue192;}{\stylesheet{\qj \li0\ri0\nowidctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs21\lang1033\langfe1041\kerning2\loch\f21\hich\af21\dbch\af11\cgrid\langnp1033\langfenp1041 \snext0 Normal;}{\*\cs10 \additive
\ssemihidden Default Paragraph Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\loch\f21\hich\af21\dbch\af11\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}{
\s15\qj \li0\ri0\nowidctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs21\lang1033\langfe1041\kerning2\loch\f11\hich\af2\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext15 Plain Text;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}
{\*\rsidtbl \rsid6188942\rsid14507857\rsid16326027}{\*\generator Microsoft Word 11.0.6359;}{\info{\title [Add Name and Version of Software here]}{\author chrisbkt}{\operator chrisbkt}{\creatim\yr2005\mo3\dy1\hr16\min37}{\revtim\yr2005\mo3\dy1\hr16\min38}
{\version3}{\edmins1}{\nofpages3}{\nofwords895}{\nofchars5104}{\*\company Microsoft Corporation}{\nofcharsws5988}{\vern24703}}\paperw11906\paperh16838\margl1753\margr1753\margt1985\margb1701\gutter0 \deftab851\ftnbj\aenddoc\formshade\horzdoc\dgmargin
\dghspace180\dgvspace180\dghorigin1701\dgvorigin1984\dghshow0\dgvshow2\jcompress\lnongrid
\viewkind4\viewscale100\splytwnine\ftnlytwnine\htmautsp\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct\asianbrkrule\rsidroot6188942\newtblstyruls\nogrowautofit {\upr{\*\fchars
!%),.:\'3b?]\'7d\'81\'91\'81\'8b\'81\'66\'81\'68\'81\'f1\'81\'8c\'81\'8d\'81\'8e\'81\'41\'81\'42\'81\'58\'81\'72\'81\'74\'81\'76\'81\'78\'81\'7a\'81\'6c\'81\'4a\'81\'4b\'81\'54\'81\'55\'81\'45\'81\'52\'81\'53\'81\'49\'81\'93\'81\'6a\'81\'43\'81\'44\'81\'46\'81\'47\'81\'48\'81\'6e\'81\'70\'a1\'a3\'a4\'a5\'de\'df\'81\'91
}{\*\ud\uc0{\*\fchars
!%),.:\'3b?]\'7d{\uc2\u162 \'81\'91\'81\'8b\'81f\'81h\'81\'f1\'81\'8c\'81\'8d\'81\'8e\'81A\'81B\'81X\'81r\'81t\'81v\'81x\'81z\'81l\'81J\'81K\'81T\'81U\'81E\'81R\'81S\'81I\'81\'93\'81j\'81C\'81D\'81F\'81G\'81H\'81n\'81p\'a1\'a3\'a4\'a5\'de\'df\'81\'91}}}}
{\upr{\*\lchars $([\'5c\'7b\'81\'92\'5c\'81\'65\'81\'67\'81\'71\'81\'73\'81\'75\'81\'77\'81\'79\'81\'6b\'81\'90\'81\'69\'81\'6d\'81\'6f\'a2\'81\'92\'81\'8f}{\*\ud\uc0{\*\lchars
$([\'5c\'7b{\uc2\u163 \'81\'92}{\uc1\u165 \'5c\'81e\'81g\'81q\'81s\'81u\'81w\'81y\'81k\'81\'90\'81i\'81m\'81o\'a2\'81\'92\'81\'8f}}}}\fet0\sectd \linex0\endnhere\sectlinegrid360\sectspecifyl\sectrsid12744622\sftnbj {\*\pnseclvl1
\pnucrm\pnstart1\pnindent720\pnhang {\pntxta \dbch .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta \dbch .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta \dbch .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang
{\pntxta \dbch )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb \dbch (}
{\pntxta \dbch )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb \dbch (}{\pntxta \dbch )}}\pard\plain
\s15\qj \li0\ri0\nowidctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12744622 \fs21\lang1033\langfe1041\kerning2\loch\af11\hich\af2\dbch\af11\cgrid\langnp1033\langfenp1041 {\loch\af2\insrsid16326027 \hich\af2\dbch\af11\loch\f2 Microsoft
\hich\af2\dbch\af11\loch\f2 \hich\af2\dbch\af11\loch\f2 Research\hich\af2\dbch\af11\loch\f2 \hich\af2\dbch\af11\loch\f2 Paraphrase\hich\af2\dbch\af11\loch\f2 \hich\af2\dbch\af11\loch\f2 Corpus}{\loch\af2\insrsid6188942\charrsid6188942
\par }{\loch\af2\insrsid6188942\charrsid6188942
\par \hich\af2\dbch\af11\loch\f2 This Microsoft Research Shared Source license agreement ("MSR-SSLA") is a legal agreement between you and Microsoft Corporation ("Microsoft" or "we") for the software or data identified above, which may include so
\hich\af2\dbch\af11\loch\f2 urce code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software").
\par
\par \hich\af2\dbch\af11\loch\f2 By installing, copying, or otherwise using this Software, \hich\af2\dbch\af11\loch\f2
found at http://research.microsoft.com/downloads, you agree to be bound by the terms of this MSR-SSLA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is license
\hich\af2\dbch\af11\loch\f2 d\hich\af2\dbch\af11\loch\f2 , not sold.
\par
\par \hich\af2\dbch\af11\loch\f2 SCOPE OF RIGHTS:
\par \hich\af2\dbch\af11\loch\f2 You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this MSR-SSLA. Some purposes which can be non-commercial are teaching, academic research, public
\hich\af2\dbch\af11\loch\f2
demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purpose
\hich\af2\dbch\af11\loch\f2 s\hich\af2\dbch\af11\loch\f2 .
\par \hich\af2\dbch\af11\loch\f2
You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use
\hich\af2\dbch\af11\loch\f2 with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others.
\par \hich\af2\dbch\af11\loch\f2 If the Software includes source code or data, you may create derivative works\hich\af2\dbch\af11\loch\f2 of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein.
\par
\par \hich\af2\dbch\af11\loch\f2 In return, we simply require that you agree:
\par \hich\af2\dbch\af11\loch\f2 1. That you will not remove any copyright or other notices from the Software.
\par \hich\af2\dbch\af11\loch\f2 2. That\hich\af2\dbch\af11\loch\f2
if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law.
\par \hich\af2\dbch\af11\loch\f2 3. That if you distribute the Software or any\hich\af2\dbch\af11\loch\f2
derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this MSR-SSLA.
\par \hich\af2\dbch\af11\loch\f2 4. That i\hich\af2\dbch\af11\loch\f2
f you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i)
\hich\af2\dbch\af11\loch\f2 \hich\af2\dbch\af11\loch\f2 that you have changed the Software; and (ii) the date of any changes.
\par \hich\af2\dbch\af11\loch\f2 5. That Microsoft is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, p\hich\af2\dbch\af11\loch\f2
ublicly perform or display, install, use, modify, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose.
\par \hich\af2\dbch\af11\loch\f2 6. That any feedback about the Software provided by y\hich\af2\dbch\af11\loch\f2
ou to us is voluntarily given, and Microsoft shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential.
\par \hich\af2\dbch\af11\loch\f2 7. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES\hich\af2\dbch\af11\loch\f2
. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-
\hich\af2\dbch\af11\loch\f2 I\hich\af2\dbch\af11\loch\f2
NFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS.
\par \hich\af2\dbch\af11\loch\f2 8. THAT NEITHER MICROSOFT NOR ANY CONTRIBUTOR TO T\hich\af2\dbch\af11\loch\f2
HE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS MSR-SSLA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUS
\hich\af2\dbch\af11\loch\f2 T\hich\af2\dbch\af11\loch\f2 PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS.
\par \hich\af2\dbch\af11\loch\f2 9. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software.
\par \hich\af2\dbch\af11\loch\f2 10. T\hich\af2\dbch\af11\loch\f2
hat if you breach this MSR-SSLA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this MSR-SSLA (and your license and rights obtained herein) terminate automatically. Upon any such term
\hich\af2\dbch\af11\loch\f2 i\hich\af2\dbch\af11\loch\f2 nation, you shall destroy all of your copies of the Software immediately. Sections 5, 6, 7, 8, 9, 10, 13 and 14 of this MSR-SSLA shall survive any termination of this MSR-SSLA.
\par \hich\af2\dbch\af11\loch\f2 11. That the patent rights, if any, granted to you in this MSR-SSLA only appl\hich\af2\dbch\af11\loch\f2 y to the Software, not to any derivative works you make.
\par \hich\af2\dbch\af11\loch\f2 12. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply wit\hich\af2\dbch\af11\loch\f2
h all such laws and regulations that may apply to the Software after delivery of the software to you.
\par \hich\af2\dbch\af11\loch\f2 13. That all rights not expressly granted to you in this MSR-SSLA are reserved.
\par \hich\af2\dbch\af11\loch\f2 14. That this MSR-SSLA shall be construed and controlled by the laws of \hich\af2\dbch\af11\loch\f2
the State of Washington, USA, without regard to conflicts of law. If any provision of this MSR-SSLA shall be deemed unenforceable or contrary to law, the rest of this MSR-SSLA shall remain in full effect and interpreted in an enforceable manner that most
\hich\af2\dbch\af11\loch\f2 \hich\af2\dbch\af11\loch\f2 nearly captures the intent of the original language.
\par
\par
\par \hich\af2\dbch\af11\loch\f2 \hich\f2 Copyright \'a9\loch\f2 Microsoft Corporation. All rights reserved\hich\af2\dbch\af11\loch\f2 .}{\loch\af2\insrsid6188942
\par }}
================================================
FILE: bert/MSRP/msr_paraphrase_README.htm
================================================
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
<meta name=Generator content="Microsoft Word 11 (filtered)">
<title>This file contains pairs of sentences gleaned over a period of 18 months
from thousands of news sources on the web</title>
<style>
<!--
/* Font Definitions */
@font-face
{font-family:Courier;
panose-1:2 7 4 9 2 2 5 2 4 4;}
@font-face
{font-family:Wingdings;
panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
{font-family:"MS Mincho";
panose-1:2 2 6 9 4 2 5 8 3 4;}
@font-face
{font-family:SimSun;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:"MS Gothic";
panose-1:2 11 6 9 7 2 5 8 2 4;}
@font-face
{font-family:"MS Gothic";
panose-1:2 11 6 9 7 2 5 8 2 4;}
@font-face
{font-family:"MS Mincho";
panose-1:2 2 6 9 4 2 5 8 3 4;}
@font-face
{font-family:Verdana;
panose-1:2 11 6 4 3 5 4 4 2 4;}
@font-face
{font-family:"Franklin Gothic Medium";
panose-1:2 11 6 3 2 1 2 2 2 4;}
@font-face
{font-family:"\@SimSun";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman";}
h1
{margin-top:12.0pt;
margin-right:0cm;
margin-bottom:3.0pt;
margin-left:0cm;
page-break-after:avoid;
font-size:16.0pt;
font-family:Arial;}
h2
{margin-top:12.0pt;
margin-right:0cm;
margin-bottom:3.0pt;
margin-left:0cm;
page-break-after:avoid;
font-size:14.0pt;
font-family:Arial;
font-style:italic;}
h3
{margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:42.55pt;
margin-bottom:.0001pt;
page-break-after:avoid;
font-size:12.0pt;
font-family:Arial;
font-weight:normal;}
h4
{margin-top:0cm;
margin-right:0cm;
margin-bottom:0cm;
margin-left:42.55pt;
margin-bottom:.0001pt;
page-break-after:avoid;
font-size:12.0pt;
font-family:"Times New Roman";}
p.MsoHeader, li.MsoHeader, div.MsoHeader
{margin:0cm;
margin-bottom:.0001pt;
layout-grid-mode:char;
font-size:12.0pt;
font-family:"Times New Roman";}
p.MsoFooter, li.MsoFooter, div.MsoFooter
{margin:0cm;
margin-bottom:.0001pt;
layout-grid-mode:char;
font-size:12.0pt;
font-family:"Times New Roman";}
p.MsoDate, li.MsoDate, div.MsoDate
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman";}
a:link, span.MsoHyperlink
{color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{color:purple;
text-decoration:underline;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
{margin:0cm;
margin-bottom:.0001pt;
font-size:8.0pt;
font-family:Arial;}
ins
{text-decoration:none;}
span.msoIns
{text-decoration:underline;}
span.msoDel
{text-decoration:line-through;
color:red;}
/* Page Definitions */
@page Section1
{size:612.0pt 792.0pt;
margin:72.0pt 89.85pt 72.0pt 89.85pt;}
div.Section1
{page:Section1;}
/* List Definitions */
ol
{margin-bottom:0cm;}
ul
{margin-bottom:0cm;}
-->
</style>
</head>
<body lang=JA link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal align=center style='text-align:center'><b><span lang=EN-US
style='font-size:14.0pt;font-family:Arial'>Microsoft Research Paraphrase Corpus</span></b></p>
<p class=MsoNormal align=center style='text-align:center'><span lang=EN-US> </span></p>
<p class=MsoNormal align=center style='text-align:center'><b><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Bill Dolan</span></b><b><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>, Chris Brockett, and
Chris Quirk</span></b></p>
<p class=MsoNormal align=center style='text-align:center'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>Microsoft Research</span></b></p>
<p class=MsoNormal align=center style='text-align:center'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>March 2, 2005</span></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>This document provides
some information about the creation of the corpus, along with results of the
annotation effort. If you use the corpus in your research, we would appreciate
your citing one or both of the following papers, which give some details of our
work on paraphrase and our data annotation efforts. (A paper describing in
detail how this corpus was created is currently in progress.) We are continuing
to tag data, and hope to release a larger version of this corpus to the
research community in the future.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><b><span lang=EN-US
style='font-size:8.5pt;font-family:Verdana'>Quirk, C., C. Brockett, and W. B.
Dolan. 2004. <a
href="http://research.microsoft.com/copyright/accept.asp?path=http://www.research.microsoft.com/nlp/publications/Paraphrase_EMNLP2004.pdf&pub=ACL"><span
style='color:#003399'>Monolingual Machine Translation for Paraphrase Generation</span></a></span></b><span
lang=EN-US style='font-size:8.5pt;font-family:Verdana'>, In <i>Proceedings of
the 2004 Conference on Empirical Methods in Natural Language Processing</i>, Barcelona Spain. </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><b><span lang=EN-US
style='font-size:8.5pt;font-family:Verdana'>Dolan W. B., C. Quirk, and C.
Brockett. 2004. <a
href="http://research.microsoft.com/copyright/accept.asp?path=http://www.research.microsoft.com/nlp/publications/Paraphrase_Coling.pdf&pub=COLING"><span
style='color:#003399'>Unsupervised Construction of Large Paraphrase Corpora:
Exploiting Massively Parallel News Sources</span></a></span></b><span
lang=EN-US style='font-size:8.5pt;font-family:Verdana'>. <i>COLING 2004</i>, Geneva, Switzerland. </span></p>
<p class=MsoNormal><span lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:21.25pt;text-indent:-21.25pt'><b><i><span
lang=EN-US style='font-size:14.0pt;font-family:Arial'>1.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'>Introduction to the paraphrase tagging task</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>This dataset consists of
5801 pairs of sentences gleaned over a period of 18 months from thousands of
news sources on the web. Accompanying each pair is judgment reflecting whether multiple
human annotators considered the two sentences to be close enough in meaning to
be considered close paraphrases.</span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Each pair of sentences
has been examined by 2 human judges who were asked to give a binary judgment as
to whether the two sentences could be considered semantically equivalent.
Disagreements were resolved by a 3rd judge. This annotation task was carried
out by an independent company, the Butler Hill Group, LLC. Mo Corston-Oliver
directed the effort, with Jeff Stevenson, Amy Muia, and David Rojas acting as
raters. Mo Corston-Oliver and Jeff Stevenson also helped with the preparation
of this document.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>After resolving
differences between raters, </span><span lang=EN-US>3900 </span><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>(67%) of the original </span><span
lang=EN-US>5801 </span><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'>pairs were judged semantically equivalent.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>In many instances, the
pair of sentences rated by 2 judges as semantically equivalent will in fact
diverge semantically to at least some degree. If a full paraphrase relationship
can be described as bidirectional entailment, then the majority of the
equivalent pairs in this dataset exhibit mostly bidirectional entailments,
with one sentence containing information that differs from or is not contained
in the other. Some specific rating criteria are included in a tagging
specification (Section 3), but by and large the degree of mismatch allowed
before the pair was judged non-equivalent was left to the discretion of the
individual rater: did a particular set of asymmetries alter the meanings of the
sentences enough that they couldnt be considered the same in meaning? This
task was ill-defined enough that we were surprised at how high interrater
agreement was (averaging 83%). </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>A series of experiments
aimed at making the judging task more concrete resulted in uniformly degraded interrater
agreement. Providing a checkbox to allow judges to specify that one sentence
entailed another, for instance, left the raters frustrated and had a negative
impact on agreement. Similarly, efforts to identify classes of syntactic
alternations that would not count against an equivalent judgment resulted, in
most cases, in a collapse in interrater agreement. The relatively few situations
where we found firm guidelines of this type to be helpful (e.g. in dealing with
anaphora) are included in Section 3.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>The decision to tag
sentences as being more or less semantically equivalent, rather than
semantically equivalent was ultimately a practical one: insisting on complete
sets of bidirectional entailments would have ruled out all but the most trivial
sorts of paraphrase relationships, such as sentence pairs differing only a
single word or in the presence of titles like Mr. and Ms.. Our interest was
in identifying more complex paraphrase relationships, which required a somewhat
looser definition of what semantic equivalence means. In an effort to focus
on these more interesting pairs, the dataset was restricted to pairs with a
minimum word-based Levenshtein distance of </span><span lang=EN-US
style='font-size:10.0pt;font-family:"Franklin Gothic Medium"'>≥</span><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> 8.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Given our relatively
loose definition of equivalence, any 2 of the following sentences would probably
have been considered paraphrases, despite obvious differences in information
content:</span></p>
<p class=MsoNormal><span lang=EN-US> </span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-align:justify;text-justify:
inter-ideograph;text-indent:-36.0pt;page-break-after:avoid;layout-grid-mode:
char'><span lang=EN-US style='font-size:10.0pt;font-family:Symbol;color:black'><span
style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'>The genome of the fungal pathogen that causes Sudden Oak Death has
been sequenced by US scientists</span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-align:justify;text-justify:
inter-ideograph;text-indent:-36.0pt;page-break-after:avoid;layout-grid-mode:
char'><span lang=EN-US style='font-size:10.0pt;font-family:Symbol;color:black'><span
style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'>Researchers announced Thursday they've completed the genetic
blueprint of the blight-causing culprit responsible for sudden oak death</span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-align:justify;text-justify:
inter-ideograph;text-indent:-36.0pt;page-break-after:avoid;layout-grid-mode:
char'><span lang=EN-US style='font-size:10.0pt;font-family:Symbol;color:black'><span
style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'>Scientists have figured out the complete genetic code of a
virulent pathogen that has killed tens of thousands of California native oaks</span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-align:justify;text-justify:
inter-ideograph;text-indent:-36.0pt;layout-grid-mode:char'><span lang=EN-US
style='font-size:10.0pt;font-family:Symbol;color:black'><span
style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'>The East Bay-based Joint Genome Institute said Thursday it has
unraveled the genetic blueprint for the diseases that cause the sudden death of
oak trees</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Raters were presented
with sentences in which several classes of named entities were replaced by
generic tags, so that Tuesday became %%DAY%%, $10,000 became %%MONEY%%,
and so on. The release versions, however, preserve the original strings.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Note that many of the
sentence pairs judged to be <b>not equivalent</b> will still overlap
significantly in information content and even wording. A variety of automatic
filtering techniques were used to create an initial dataset that was rich in
paraphrase relationships, and the success of these techniques meant that
approximately 70% of the pairs examined by raters were, by our criteria,
semantically equivalent. The remaining 30% represent a range of relationships,
from pairs that are completely unrelated semantically, to those that are
partially overlapping, to those that are almost-but-not-quite semantically
equivalent. For this reason, this <b>not equivalent</b> set should not be
used as negative training data.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>We have made every effort
to ensure that each sentence in this dataset has been given proper attribution.
If you encounter any errors/omissions, please contact Bill Dolan (<a
href="mailto:billdol@microsoft.com">billdol@microsoft.com</a>), and we will
promptly modify the data to reflect the correct information.</span></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'><span style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'><span style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'><span style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'><span style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal style='margin-left:21.25pt;text-indent:-21.25pt'><b><i><span
lang=EN-US style='font-size:14.0pt;font-family:Arial'>2.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'>Methodology and Results</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>This
data set consists of 5801 sentence pairs, with a binary human judgment of
whether or not the pairing constitutes a paraphrase.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>2.1.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Methodology</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>To generate the
judgments, we used 3 raters to score the sentence pairs according to a given
specification. Rater 1 scored all 5801 sentences. Rater 2 scored 3533
sentences, and Rater 3 scored 2268 sentences. For the sentences where Rater 1
and 2 did not agree on the judgment, Rater 3 gave a final judgment, while Rater
2 gave the final judgment on sentences where Rater 1 and Rater 3 did not agree.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>2.2.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Interrater
Agreement</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>To
test interrater agreement, we took a simple percentage:</span></p>
<p class=MsoNormal style='page-break-after:avoid'><span lang=EN-US> </span></p>
<div align=center>
<table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0
style='border-collapse:collapse;border:none'>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Total
scored</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Total
agreements</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Percentage
agreement</span></p>
</td>
</tr>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-top:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Raters
1 & 2</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>3533</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>2904</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>82.20</span></p>
</td>
</tr>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-top:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Raters
1 & 3</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>2268</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>1921</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>84.70</span></p>
</td>
</tr>
</table>
</div>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>2.3.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Overall
scoring results</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>We
computed scoring results for each individual (raw scores, before resolving</span><span
lang=EN-US> differences):</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<div align=center>
<table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0
style='border-collapse:collapse;border:none'>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Total
scored</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Number
yes</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-left:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Percentage
yes</span></p>
</td>
</tr>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-top:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Rater
1</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>5801</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>3601</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>62.08</span></p>
</td>
</tr>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-top:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Rater
2</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>3533</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>2589</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>73.28</span></p>
</td>
</tr>
<tr>
<td width=148 valign=top style='width:88.55pt;border:solid windowtext 1.0pt;
border-top:none;padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Rater
3</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>2268</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>1612</span></p>
</td>
<td width=148 valign=top style='width:88.55pt;border-top:none;border-left:
none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;
padding:0cm 5.4pt 0cm 5.4pt'>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>71.08</span></p>
</td>
</tr>
</table>
</div>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>After resolving
differences, we judged 3900 out of 5801 sentence pairs to be valid paraphrases,
for a final percentage of 67.23%</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:28.1pt;text-indent:-28.1pt;page-break-after:
avoid'><b><i><span lang=EN-US style='font-family:Arial'>2.4.<span
style='font:7.0pt "Times New Roman"'> </span></span></i></b><b><i><u><span
lang=EN-US style='font-family:Arial'>Test/training</span></u></i></b></p>
<p class=MsoNormal style='page-break-after:avoid'><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>We assigned a random
sequence ID to each sentence pair, sorted them, and assigned the first 30% of
the data to be training and the last 70% to be test data. For obscure technical
reasons, the final test/train percentage is inexact (29.7% (1725 sentence
pairs) vs. 70.3% (4076 sentence pairs))</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:14.0pt'> </span></b></p>
<p class=MsoNormal style='margin-left:21.25pt;text-indent:-21.25pt'><b><i><span
lang=EN-US style='font-family:Arial'>3.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-size:14.0pt;
font-family:Arial'>Detailed Tagging Guidelines</span></u></i></b><b><i><u><span
lang=EN-US style='font-family:Arial'> </span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-family:Arial'><span
style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-family:Arial'><span
style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>3.1.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Equivalent
vs. not equivalent content</span></u></i></b></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-family:Arial'><span
style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal style='margin-left:36.0pt;text-indent:-36.0pt'><span
lang=EN-US style='font-size:10.0pt;font-family:Symbol'><span style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>In
this task, we are trying to determine if two sentences express the same
content.</span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-indent:-36.0pt'><span
lang=EN-US style='font-size:10.0pt;font-family:Symbol'><span style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>As is
true for paraphrase in general, this may be realized by means of alternative
but similar syntactic constructions and lexical items, etc.</span></p>
<p class=MsoNormal style='margin-left:36.0pt;text-indent:-36.0pt'><span
lang=EN-US style='font-size:10.0pt;font-family:Symbol'><span style='font:7.0pt "Times New Roman"'>
</span></span><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>In
general, the standard as to whether two sentences express the same content should
be relatively high, meaning that many of ambiguous cases should be marked
"<b>not equivalent</b>" rather than "<b>equivalent</b>".</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Examples
of sentences with <b>equivalent</b> content expressed via alternative lexical
items:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>The Senate Select
Committee on Intelligence is preparing a blistering report on <b>prewar
intelligence </b>on Iraq.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>American <b>intelligence
leading up to the war</b> on Iraq will be criticised by a powerful US Congressional committee due to report soon, officials said today.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>A strong geomagnetic
storm was expected to hit Earth today <b>with the potential to affect</b>
electrical grids and satellite communications.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>A strong geomagnetic
storm is expected to hit Earth sometime %%DAY%% and <b>could knock out</b>
electrical grids and satellite communications.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'>These sentences are clearly paraphrases. The different lexical
items are still expressing the same content. </span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>This type of sentence pair should be
tagged as <b>equivalent</b>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>3.2.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Equivalent
sentence pairs with minor differences in content</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Minor differences between
sentences can be overlooked when determining if two sentences are paraphrases.
For example:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>An autopsy found
Hatab's death was caused by "strangulation/asphyxiation," Rawson said
%%DAY%%.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>An autopsy found that
Nagem Sadoon Hatab's death on %%DATE%% was caused by "strangulation/asphyxiation,
Marine spokesman %%NUMBER%% st Lt. Dan Rawson said %%DAY%%.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>The
following sentences also express <b>equivalent</b> content:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:blue'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Mr. Concannon</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier;color:black'> <u>had
been doused in petrol</u>, <b>set himself alight</b> and <b>jumped onto a bike
to leap</b> <u>eight metres</u> <b>onto a mattress</b> below.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:blue'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>A SYDNEY man</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier;color:black'> <u>suffered
serious burns</u> <b>after setting himself alight</b> before <b>attempting to
jump a BMX bike</b> <u>off a toilet block</u> <b>into a pile of mattresses</b>
, </span><b><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:red'>police said</span></b><span lang=EN-US style='font-size:10.0pt;
font-family:Courier;color:black'>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial;color:black'>The agent (<i>Mr.
Concanon</i>), the predicated actions (<i>set himself alight, jumped a bike</i>),
and important details (<i>onto a pile of mattresses</i>) are present in both
sentences. Additional lexical material in either sentence mainly serves to
embellish the main propositions (for example, . . .<i>suffered serious burns</i>
which is logically entailed by <i>set himself alight</i>). Also notice that
the details of a given proposition need not be exact: <i>a mattress </i>(sing.)
vs. <i>a pile of mattresses</i> (plur.). </span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>Finally, notice that the second of
the sentence pairs in the previous example is attributed to the police where
the first is not. This difference between sentences is also acceptable for
purposes of tagging them as paraphrases.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>For this type of sentence
pair, we want to mark them as equivalent (paraphrases). Notice that the
sentence pairs, while clearly similar overall in content, <i>both</i> differ in
additional, modifying content.. As the main content of the sentences similar in
meaning, we allow some minor content mismatch.</span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>3.3.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Anaphora</span></u></i></b></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US></span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Sometimes the difference between
two sentences involves anaphora (NPs and pronominal). These sentences can be
tagged as paraphrases despite the (sometimes) fairly large gap between them in
terms of their corresponding full-form NPs</span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>. Examples follow.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.3.1.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'> Demonstratives</span></i></b></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>But Secretary of State Colin
Powell brushed off <b>this possibility</b> %%day%%.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Secretary of State Colin Powell
last week ruled out <b>a non-aggression treaty</b>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.3.2.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>NP ->
pro</span></i></b></h3>
<p class=MsoNormal style='page-break-after:avoid'><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Meteorologists predicted <b>the
storm</b> would become a category %%number%% hurricane before landfall.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>It</span></b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> was predicted to become a
category 1 hurricane overnight.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'> </span></i></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.3.3.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><span lang=EN-US style='font-size:10.0pt'> <b><i>Proper
NP (+animate) -> pro</i></b></span></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Earlier, <b>he </b>told France
Inter-Radio , ''I think we can now qualify what is happening as a genuine
epidemic.''</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>''I think we can now qualify what
is happening as a genuine epidemic,'' <b>he<i>alth minister Jean-Francois
Mattei </i></b><i>said on France Inter Radio.</i></span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.3.4.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>Title +
proper NP (+animate) -> pro</span></i></b></h3>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-size:10.0pt;
font-family:Arial'><span style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>''United is continuing to deliver
major cost reductions and is now coupling that effort with significant unit
revenue improvement, '' <b>chief financial officer Jake Brace</b> said in a statement.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>''United is continuing to deliver
major cost reductions and is now coupling that effort with significant unit
revenue improvement,'' <b>he</b> said.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'> </span></i></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.3.5.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><span lang=EN-US style='font-size:10.0pt'> <b><i>NP
(-animate) -> pro</i></b></span></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>''Spoofing</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier'> is a problem faced by
any company with a trusted domain name that uses e-mail to communicate with its
customers.</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:36.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>It</span></b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> is a problem for Amazon and
others that have a trusted domain name and use e-mail to communicate with
customers.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><b><i><u><span lang=EN-US style='font-family:Arial'><span
style='text-decoration:none'> </span></span></u></i></b></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>3.4.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Inherent
ambiguity of the task</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>The relatively
holistic/vague criteria established above should work well for most sentence
pairs. In the end, were tagging something thats not quite paraphrase, but
something like semantic near-equivalence sentences pairs that ideally
involve complete sets of bidirectional entailments, but which in fact often
have some entailment asymmetries or other mismatches. The issue here is when
those asymmetries/differences become significant enough to make the pair
different enough that you dont think they mean more or less the same thing
anymore, where more or less becomes a personal judgment call.</span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:1.0cm;text-indent:-1.0cm'><b><i><span
lang=EN-US style='font-family:Arial'>3.5.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-family:Arial'>Sentence
pairs with different content</span></u></i></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.5.1.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><u><span lang=EN-US style='font-size:10.0pt'></span></u></i></b><b><i><span
lang=EN-US style='font-size:10.0pt'>Different content: prototypical example</span></i></b></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'>In contrast to the examples above, the following sentences clearly
express <b>different</b> content:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:27.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Prime Minister
Junichiro Koizumi</span></b><span lang=EN-US style='font-size:10.0pt;
font-family:Courier;color:black'> did not have to dissolve </span><b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier;color:red'>parliament</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier;color:black'> until next
summer , when elections for the upper house are also due .</span></p>
<p class=MsoNormal style='margin-left:27.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:27.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Prime Minister
Junichiro Koizumi</span></b><span lang=EN-US style='font-size:10.0pt;
font-family:Courier;color:black'> has urged </span><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:red'>Nakasone</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier;color:black'> to give up
his seat in accordance with the new age rule .</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>While the principal agent
(<i><span style='color:black'>Koizumi</span></i><span style='color:black'>)<b> </b>is
the same, predicated actions, i.e. verbs (<i>dissolve / urge</i>) and other
arguments (<i>parliament / Nakasone</i>) are clearly different. The additional
material found in either sentence does not embellish the main proposition but
instead contains important content itself. These two sentence pairs should be
marked as <b>not equivalent</b> in that while they share an agent Koizumi,
they are about unrelated events. Again, </span>ambiguous cases should be marked
"<b>not equivalent</b>" rather than "<b>equivalent</b>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.5.2.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>Shared
content of the same event, etc. but lacking details (one sentence is a superset
of the other)</span></i></b></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:27.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Researchers have
identified a genetic pilot light for puberty in both mice and humans .</span></p>
<p class=MsoNormal style='margin-left:27.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'> </span></p>
<p class=MsoNormal style='margin-left:27.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>The discovery of a
gene that appears to be a key regulator of puberty in humans and mice <b>could
lead to new infertility treatments and contraceptives</b>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial;color:black'>These
sentences are similar in content, refer to a similar key piece of information,
but cannot be marked as <b>equivalent</b>. The sentences should be tagged as
<b>not equivalent</b> because even though the </span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>content of the sentences is similar,
one sentence is a significantly larger superset of the other: all the content
of the first sentence is in the second, but not vice-versa. The superset
sentence contains important content information (above, in bold) not present in
the second sentence.</span></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'> </span></b></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'>Some
similar sentence pairs follow (missing content in superset sentence is in
bold):</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'> </span></i></p>
<p class=MsoNormal style='margin-left:18.0pt'><u><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>SOME %%NUMBER%% jobs</span></u><span
lang=EN-US style='font-size:10.0pt;font-family:Courier'> are set to go at
Cadbury Schweppes , the confectionery and drinks giant , <b>as part of a
sweeping cost reduction programme announced today .</b></span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Confectionery group Cadbury
Schweppes has warned of further cuts to <u>its %%NUMBER%% -strong UK workforce</u> .</span></p>
<p class=MsoNormal><i><span lang=EN-US style='font-size:10.0pt;font-family:
Arial;color:navy'> </span></i></p>
<p class=MsoNormal style='margin-left:18.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:black'> </span></i></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial;color:black'>This sentence
is difficult in that, while one sentence is a superset of the other, it is <i>also</i>
arguably the case that the sentences are almost paraphrases except when we
see that</span><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> the
content of the underlined portions in the two sentences above is exclusive to
one sentence. In the end, however, the material in bold is an important
difference in content between the sentences, and adds important additional
content, leading us to prefer tag them as <b>not equivalent</b>.</span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Please use your best
judgment in choosing to tag sentences as <b>equivalent</b> or <b>not
equivalent</b>. Many of the sentence pairs you see differ due to the way
editors eliminate language/content they deem unnecessary. Sometimes the two
sentences will differ in information that conveys important additional
information. Sentences like these should be tagged as <b>not equivalent</b>:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial;
color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>The former wife of
rapper Eminem has been electronically tagged after missing two court
appearances .</span></p>
<p class=MsoNormal style='margin-left:36.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:navy'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>After missing two
court appearances <b>in a cocaine possession case</b> , Eminem's ex-wife has
been placed under electronic house arrest .</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:black'> </span></i></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>The issue of whether or
not the extra/missing information is significant enough to warrant treating the
sentences as not equivalent amounts to a judgment call. Minor differences
between sentences can be overlooked when determining if two sentences are
paraphrases. As seen in a previous example sentence pair, the only differences
in content between the following sentences are the reduced forms of names and
adverbial modifiers (dates). <span style='color:black'>There are no major
differences in content between these sentences. They can be marked as <b>equivalent</b>.</span></span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>An autopsy found
Hatab's death was caused by "strangulation/asphyxiation," Rawson said
%%DAY%% .</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>An autopsy found that
Nagem Sadoon Hatab's death <b>on %%DATE%%</b> was caused by "
strangulation/asphyxiation , " Marine spokesman %%NUMBER%% st Lt. Dan
Rawson said %%DAY%%.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:black'> </span></i></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>The role of content
asymmetries in determining whether sentences should be marked as equivalent/not
equivalent is also linked to sentence length. In a pair of 20-word sentences,
the presence/absence of a single modifier might be lost in the noise, while in
a pair of 5 word sentences it might take on much greater significance. There is
no good way to normalize for length in such cases, so again, just depend on
your own judgment.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:black'> </span></i></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'> </span></b></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.5.3.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>Cannot
determine if sentences refer to the same event</span></i></b></h3>
<p class=MsoNormal><b><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'> </span></b></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>More than <b>%%NUMBER%%
acres</b> burned and more than %%NUMBER%% homes were destroyed in the <b>massive</b>
Cedar <b>Fire</b> .</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><b><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Major fires</span></b><span
lang=EN-US style='font-size:10.0pt;font-family:Courier'> had burned <b>%%NUMBER%%
acres</b> by early last night.</span></p>
<p class=MsoNormal><i><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'> </span></i></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>In this example, both
sentences could be about the same series of events (fires). However, these are
possibly about two events: one is about a specific fire, the other about a
cluster of fires. This should lead us to annotate these sentences as expressing
<b>not equivalent</b> content. Another such example follows:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>The spokeswoman said four soldiers
were wounded in the attack, which took place just before noon around %%NUMBER%%
km ( %%NUMBER%% miles ) north of the capital Baghdad.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'>Two US soldiers were killed in a
mortar attack near the Iraqi town of Samarra yesterday , a US military spokeswoman said.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>Notice that both
sentences report the deaths of soldiers in an attack in <i>some</i> Iraqi town.
However, it is clear that the two sentences could be describing two isolated
events. The fact that there is a discrepancy in the number of reported deaths
should add to ones suspicions that this might be the case. Since the sentences
share some content, but we cannot be sure they refer to the same event, we
should seek to err on the side of caution and mark them as <b>not equivalent</b>.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.5.4.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>Shared
content but different rhetorical structure</span></i></b></h3>
<p class=MsoNormal><i><span lang=EN-US style='font-size:10.0pt;font-family:
Arial;color:black'> </span></i></p>
<p class=MsoNormal style='margin-left:12.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>The <b>search feature</b>
works with around <b>%%NUMBER%% titles</b> from <b>%%NUMBER%% publishers</b>,
which translates into some <b>%%NUMBER%% million pages</b> of searchable text .</span></p>
<p class=MsoNormal style='margin-left:12.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'> </span></p>
<p class=MsoNormal style='margin-left:12.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>This innovative <b>search
feature</b> lets Amazon customers search the full text of a title to find a
book , supplementing the existing search by author or title .</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>In this sentence pair,
both sentences clearly make statements about a new search feature. However,
notice the emphasis placed on the amount of data in the first sentence via the
rhetorical device of reiterated citation of numbers. </span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>The two sentences are about the same
subject matter, but they are significantly different in that the first might
occur as a detailed exploration of the second</span><span lang=EN-US
style='font-size:10.0pt;font-family:Arial'>. Therefore, this leads us to mark
the sentences as <b>not equivalent</b>.</span></p>
<p class=MsoNormal><b><span lang=EN-US style='font-size:10.0pt;font-family:
Arial'> </span></b></p>
<h3 style='margin-left:35.45pt;text-indent:-35.45pt'><b><i><span lang=EN-US
style='font-size:10.0pt'>3.5.5.<span style='font:7.0pt "Times New Roman"'>
</span></span></i></b><b><i><span lang=EN-US style='font-size:10.0pt'>Same
event but details different emphasis</span></i></b></h3>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>A Hunter Valley woman sentenced to %%NUMBER%% years jail for killing her four babies was only a
danger to children in her care, a court was told.</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>As she stood up
yesterday to receive a sentence of %%NUMBER%% years for killing her four
babies, Kathleen Folbigg showed no emotion.</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>These sentences clearly
report information related to the same event, but the first sentence emphasizes
a particular legal argument presented by the convicted womans lawyer, while
the second focuses on her apparent mental state at the trial. This type of
sentence pair should be tagged as <b>not equivalent</b>. Given the magnitude
of the semantic divergence between these two sentences both in terms of
content and emphasis they should be treated as not equivalent.</span></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><i><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></i></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial'>More example sentence
pairs which, while clearly significantly overlapping in content, should be
tagged as <b>not equivalent</b>:</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Arial'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Authorities dubbed the
investigation Operation Rollback , a reference to Wal-Mart's name for price
reductions .</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Courier;
color:black'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>The ICE's
investigation , known as " Operation Rollback " , targeted workers at
%%NUMBER%% Wal-Mart stores in %%NUMBER%% states .</span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Researchers also found
that women with mutations in the BRCA1 or BRCA2 gene have a %%NUMBER%% % to
%%NUMBER%% % risk of ovarian cancer , depending on which gene is affected .</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier'> </span></p>
<p class=MsoNormal style='margin-left:18.0pt'><span lang=EN-US
style='font-size:10.0pt;font-family:Courier;color:black'>Earlier studies had
suggested that the breast cancer risk from the gene mutations ranged from
%%NUMBER%% % to %%NUMBER%% % .</span></p>
<p class=MsoNormal style='margin-left:18.0pt'><i><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:black'> </span></i></p>
<p class=MsoNormal style='text-align:justify;text-justify:inter-ideograph'><span
lang=EN-US style='font-size:10.0pt;font-family:Arial;color:black'>Note that
while the sentences may refer to the same piece of information, the inclusion
of <i>earlier studies.</i> suggests this may not be the case. Therefore,
they should be tagged as <b>not equivalent</b>.</span></p>
</div>
</body>
</html>
================================================
FILE: bert/MSRP/msr_paraphrase_README.rtf
================================================
{\rtf1\ansi\ansicpg932\uc2\deff0\stshfdbch11\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe1041{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
{\f2\fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f5\fmodern\fcharset0\fprq1{\*\panose 02070409020205020404}Courier;}
{\f10\fnil\fcharset2\fprq2{\*\panose 05000000000000000000}Wingdings;}{\f11\froman\fcharset128\fprq1{\*\panose 02020609040205080304}\'82\'6c\'82\'72 \'96\'be\'92\'a9{\*\falt MS Mincho};}
{\f13\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}SimSun{\*\falt \'cb\'ce\'cc\'e5};}{\f15\fmodern\fcharset128\fprq1{\*\panose 020b0609070205080204}\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e{\*\falt MS Gothic};}
{\f39\fmodern\fcharset128\fprq1{\*\panose 020b0609070205080204}@\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e;}{\f40\froman\fcharset128\fprq1{\*\panose 02020609040205080304}@\'82\'6c\'82\'72 \'96\'be\'92\'a9;}
{\f43\fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Verdana;}{\f44\fswiss\fcharset0\fprq2{\*\panose 020b0603020102020204}Franklin Gothic Medium;}{\f45\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}@SimSun;}
{\f46\froman\fcharset238\fprq2 Times New Roman CE;}{\f47\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f49\froman\fcharset161\fprq2 Times New Roman Greek;}{\f50\froman\fcharset162\fprq2 Times New Roman Tur;}
{\f51\froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f52\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f53\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f54\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
{\f56\fswiss\fcharset238\fprq2 Arial CE;}{\f57\fswiss\fcharset204\fprq2 Arial Cyr;}{\f59\fswiss\fcharset161\fprq2 Arial Greek;}{\f60\fswiss\fcharset162\fprq2 Arial Tur;}{\f61\fswiss\fcharset177\fprq2 Arial (Hebrew);}
{\f62\fswiss\fcharset178\fprq2 Arial (Arabic);}{\f63\fswiss\fcharset186\fprq2 Arial Baltic;}{\f64\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f66\fmodern\fcharset238\fprq1 Courier New CE;}{\f67\fmodern\fcharset204\fprq1 Courier New Cyr;}
{\f69\fmodern\fcharset161\fprq1 Courier New Greek;}{\f70\fmodern\fcharset162\fprq1 Courier New Tur;}{\f71\fmodern\fcharset177\fprq1 Courier New (Hebrew);}{\f72\fmodern\fcharset178\fprq1 Courier New (Arabic);}
{\f73\fmodern\fcharset186\fprq1 Courier New Baltic;}{\f74\fmodern\fcharset163\fprq1 Courier New (Vietnamese);}{\f158\froman\fcharset0\fprq1 MS Mincho Western{\*\falt MS Mincho};}{\f156\froman\fcharset238\fprq1 MS Mincho CE{\*\falt MS Mincho};}
{\f157\froman\fcharset204\fprq1 MS Mincho Cyr{\*\falt MS Mincho};}{\f159\froman\fcharset161\fprq1 MS Mincho Greek{\*\falt MS Mincho};}{\f160\froman\fcharset162\fprq1 MS Mincho Tur{\*\falt MS Mincho};}
{\f163\froman\fcharset186\fprq1 MS Mincho Baltic{\*\falt MS Mincho};}{\f178\fnil\fcharset0\fprq2 SimSun Western{\*\falt \'cb\'ce\'cc\'e5};}{\f198\fmodern\fcharset0\fprq1 MS Gothic Western{\*\falt MS Gothic};}
{\f196\fmodern\fcharset238\fprq1 MS Gothic CE{\*\falt MS Gothic};}{\f197\fmodern\fcharset204\fprq1 MS Gothic Cyr{\*\falt MS Gothic};}{\f199\fmodern\fcharset161\fprq1 MS Gothic Greek{\*\falt MS Gothic};}
{\f200\fmodern\fcharset162\fprq1 MS Gothic Tur{\*\falt MS Gothic};}{\f203\fmodern\fcharset186\fprq1 MS Gothic Baltic{\*\falt MS Gothic};}{\f438\fmodern\fcharset0\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e Western;}
{\f436\fmodern\fcharset238\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e CE;}{\f437\fmodern\fcharset204\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e Cyr;}
{\f439\fmodern\fcharset161\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e Greek;}{\f440\fmodern\fcharset162\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e Tur;}
{\f443\fmodern\fcharset186\fprq1 @\'82\'6c\'82\'72 \'83\'53\'83\'56\'83\'62\'83\'4e Baltic;}{\f448\froman\fcharset0\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Western;}{\f446\froman\fcharset238\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 CE;}
{\f447\froman\fcharset204\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Cyr;}{\f449\froman\fcharset161\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Greek;}{\f450\froman\fcharset162\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Tur;}
{\f453\froman\fcharset186\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Baltic;}{\f476\fswiss\fcharset238\fprq2 Verdana CE;}{\f477\fswiss\fcharset204\fprq2 Verdana Cyr;}{\f479\fswiss\fcharset161\fprq2 Verdana Greek;}
{\f480\fswiss\fcharset162\fprq2 Verdana Tur;}{\f483\fswiss\fcharset186\fprq2 Verdana Baltic;}{\f484\fswiss\fcharset163\fprq2 Verdana (Vietnamese);}{\f486\fswiss\fcharset238\fprq2 Franklin Gothic Medium CE;}
{\f487\fswiss\fcharset204\fprq2 Franklin Gothic Medium Cyr;}{\f489\fswiss\fcharset161\fprq2 Franklin Gothic Medium Greek;}{\f490\fswiss\fcharset162\fprq2 Franklin Gothic Medium Tur;}{\f493\fswiss\fcharset186\fprq2 Franklin Gothic Medium Baltic;}
{\f498\fnil\fcharset0\fprq2 @SimSun Western;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;
\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red0\green51\blue153;}{\stylesheet{
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1041\loch\f0\hich\af0\dbch\af11\cgrid\langnp1033\langfenp1041 \snext0 Normal;}{
\s1\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0 \b\fs32\lang1033\langfe1041\kerning32\loch\f1\hich\af1\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext0 \styrsid12138041 heading 1;}{
\s2\ql \li0\ri0\sb240\sa60\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel1\adjustright\rin0\lin0\itap0 \b\i\fs28\lang1033\langfe1041\loch\f1\hich\af1\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext0 \styrsid12138041 heading 2;}{
\s3\ql \li851\ri0\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel2\adjustright\rin0\lin851\itap0 \fs24\lang1033\langfe1041\loch\f1\hich\af1\dbch\af15\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext0 \styrsid15927319 heading 3;}{
\s4\ql \li851\ri0\keepn\widctlpar\aspalpha\aspnum\faauto\outlinelevel3\adjustright\rin0\lin851\itap0 \b\fs24\lang1033\langfe1041\loch\f0\hich\af0\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext0 \styrsid15927319 heading 4;}{\*\cs10 \additive
\ssemihidden Default Paragraph Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\loch\f0\hich\af0\dbch\af11\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}{\*\cs15 \additive \ul\cf2 \sbasedon10 \styrsid3281436
Hyperlink;}{\*\ts16\tsrowd\trbrdrt\brdrs\brdrw10 \trbrdrl\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrh\brdrs\brdrw10 \trbrdrv\brdrs\brdrw10
\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0
\fs20\lang1024\langfe1024\loch\f0\hich\af0\dbch\af11\cgrid\langnp1024\langfenp1024 \sbasedon11 \snext16 \styrsid3362214 Table Grid;}{\s17\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0
\fs24\lang1033\langfe1041\loch\f0\hich\af0\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext0 \styrsid2313633 Date;}{\s18\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0
\fs16\lang1033\langfe1041\loch\f1\hich\af1\dbch\af15\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext18 \ssemihidden \styrsid14623808 Balloon Text;}{\s19\ql \li0\ri0\widctlpar
\tqc\tx4252\tqr\tx8504\aspalpha\aspnum\faauto\nosnaplinegrid\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1041\loch\f0\hich\af0\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext19 \styrsid15927319 header;}{\s20\ql \li0\ri0\widctlpar
\tqc\tx4252\tqr\tx8504\aspalpha\aspnum\faauto\nosnaplinegrid\adjustright\rin0\lin0\itap0 \fs24\lang1033\langfe1041\loch\f0\hich\af0\dbch\af11\cgrid\langnp1033\langfenp1041 \sbasedon0 \snext20 \styrsid15927319 footer;}{\*\cs21 \additive
\sbasedon10 \styrsid5994305 page number;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\listtable{\list\listtemplateid1161447578\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0
{\leveltext\leveltemplateid67698689\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext
\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693
\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689
\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}
\f2\fbias0 \fi-360\li3600\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0
\fi-360\li4320\jclisttab\tx4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698689\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li5040
\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel
\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\leveltemplateid67698693\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid431631336}
{\list\listtemplateid-1402815602{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\fbias0 \fi-390\li390\jclisttab\tx390\lin390 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat0\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\fbias0 \fi-390\li390\jclisttab\tx390\lin390 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1800\li1800\jclisttab\tx1800\lin1800 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1800\li1800\jclisttab\tx1800\lin1800 }{\listname ;}\listid457771252}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}
\fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-851\li851\jclisttab\tx851\lin851 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid550504541}
{\list\listtemplateid2133510874\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid1716412224\'01{\uc1\u-3913 ?};}{\levelnumbers;}
\loch\af3\hich\af3\dbch\af13\fbias0 \fi-360\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0
\fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698693\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li2160
\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698689\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li2880\jclisttab\tx2880\lin2880
}{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li3600\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698693\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li4320\jclisttab\tx4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698689\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li5040\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698691\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\leveltemplateid67698693\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid610820163}{\list\listtemplateid-147130004{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fs20\fbias0 \fi-360\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'01o;}{\levelnumbers;}\f2\fs20\fbias0 \fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}
\f10\fs20\fbias0 \fi-360\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li2880
\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li3600\jclisttab\tx3600\lin3600 }{\listlevel
\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li4320\jclisttab\tx4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li5040\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fs20\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid679893778}{\list\listtemplateid-829655036{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\ulnone\fbias0 \fi-555\li555\jclisttab\tx555\lin555 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat3\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers
\'01\'03;}\ulnone\fbias0 \fi-555\li555\jclisttab\tx555\lin555 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat3\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\ulnone\fbias0 \fi-720\li720
\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\ulnone\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\ulnone\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel
\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\ulnone\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel
\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\ulnone\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }{\listlevel
\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\ulnone\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\ulnone\fbias0 \fi-1800\li1800
\jclisttab\tx1800\lin1800 }{\listname ;}\listid744495498}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fbias0 \fi-425\li425
\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fbias0 \fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0
\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid761267708}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fbias0 \fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fbias0 \fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers
\'01\'03\'05\'07;}\fbias0 \fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0
\fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1134\li1134
\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1276\li1276
\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0
\fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers
\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid843324955}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fbias0 \fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fbias0
\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-709\li709\jclisttab\tx709\lin709 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid870872927}
{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fbias0 \fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fbias0 \fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid880091234}{\list\listtemplateid-1775071652{\listlevel\levelnfc0
\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat3\levelspace0\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\fbias0 \fi-660\li660\jclisttab\tx660\lin660 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat2
\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\fbias0 \fi-660\li690\jclisttab\tx690\lin690 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat4\levelspace0\levelindent0{\leveltext
\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-720\li780\jclisttab\tx780\lin780 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers
\'01\'03\'05\'07;}\fbias0 \fi-720\li810\jclisttab\tx810\lin810 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0
\fi-1080\li1200\jclisttab\tx1200\lin1200 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1080\li1230
\jclisttab\tx1230\lin1230 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1440\li1620
\jclisttab\tx1620\lin1620 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0
\fi-1440\li1650\jclisttab\tx1650\lin1650 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers
\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1800\li2040\jclisttab\tx2040\lin2040 }{\listname ;}\listid1074741133}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fbias0 \fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fbias0
\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-709\li709\jclisttab\tx709\lin709 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid1078210532
\liststyleid1217929462}{\list\listtemplateid-1780467540{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat3\levelspace0\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\f0\fbias0 \fi-408\li408\jclisttab\tx408\lin408 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat2\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\f0\fbias0 \fi-408\li408\jclisttab\tx408\lin408 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat3\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\f0\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\f0\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\f0\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\f0\fbias0 \fi-1080\li1080\jclisttab\tx1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\f0\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\f0\fbias0 \fi-1440\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\f0\fbias0 \fi-1800\li1800\jclisttab\tx1800\lin1800 }{\listname ;}\listid1111631278}{\list\listtemplateid67698719{\listlevel\levelnfc0
\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0
\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers
\'01\'03\'05;}\fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-851\li851
\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-992\li992\jclisttab\tx992\lin992 }{\listlevel
\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0
\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0
\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid1217929462
{\*\liststylename 1 / 1.1 / 1.1.1;}}{\list\listtemplateid1161447578{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li720
\jclisttab\tx720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1
\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext
\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li3600\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0
\fi-360\li4320\jclisttab\tx4320\lin4320 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3913 ?};}{\levelnumbers;}\f3\fbias0 \fi-360\li5040\jclisttab\tx5040\lin5040 }
{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01o;}{\levelnumbers;}\f2\fbias0 \fi-360\li5760\jclisttab\tx5760\lin5760 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace360\levelindent0{\leveltext\'01{\uc1\u-3929 ?};}{\levelnumbers;}\f10\fbias0 \fi-360\li6480\jclisttab\tx6480\lin6480 }{\listname ;}\listid1227111723}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}
\fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-851\li851\jclisttab\tx851\lin851 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid1410998889}
{\list\listtemplateid365342112{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat22\levelspace0\levelindent0{\leveltext\'03\'00.0;}{\levelnumbers\'01;}\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\fbias0 \fi-720\li1440\jclisttab\tx1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1
\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\fbias0 \fi-720\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\fbias0 \fi-1080\li3240\jclisttab\tx3240\lin3240 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\fbias0 \fi-1440\li4320\jclisttab\tx4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fbias0 \fi-1440\li5040\jclisttab\tx5040\lin5040 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fbias0 \fi-1800\li6120\jclisttab\tx6120\lin6120 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fbias0 \fi-1800\li6840\jclisttab\tx6840\lin6840 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fbias0 \fi-2160\li7920\jclisttab\tx7920\lin7920 }{\listname ;}\listid1716854414}{\list\listtemplateid67698719{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0
{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}
\fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\fi-851\li851\jclisttab\tx851\lin851 }
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-992\li992\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0
\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1134\li1134\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0
\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1276\li1276\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1418\li1418\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid1778790640}{\list\listtemplateid67698719
{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\fi-425\li425\jclisttab\tx425\lin425 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0
\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\fi-567\li567\jclisttab\tx567\lin567 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext
\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\fi-709\li709\jclisttab\tx709\lin709 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'08\'00.\'01.\'02.\'03.;}{\levelnumbers
\'01\'03\'05\'07;}\fi-851\li851\jclisttab\tx851\lin851 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\fi-992\li992
\jclisttab\tx992\lin992 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\fi-1134\li1134
\jclisttab\tx1134\lin1134 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\fi-1276\li1276
\jclisttab\tx1276\lin1276 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\fi-1418\li1418
\jclisttab\tx1418\lin1418 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}
\fi-1559\li1559\jclisttab\tx1559\lin1559 }{\listname ;}\listid1838499815}{\list\listtemplateid1303278200{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat2\levelspace0\levelindent0{\leveltext\'03\'00.0;}{\levelnumbers\'01;}
\fbias0 \fi-720\li720\jclisttab\tx720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\lev
gitextract_z313sz7r/
├── ReadMe.md
├── attribute_level/
│ ├── attribute.py
│ ├── best_test.sh
│ ├── cp_AttA3_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AttA3_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_Bert/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_CNN_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ └── networks2.py
├── bert/
│ ├── CONTRIBUTING.md
│ ├── MSRP/
│ │ ├── LICENSE
│ │ ├── MSR Paraphrase Corpus.lnk
│ │ ├── Microsoft Shared Source License.htm
│ │ ├── Microsoft Shared Source License.rtf
│ │ ├── msr_paraphrase_README.htm
│ │ ├── msr_paraphrase_README.rtf
│ │ ├── msr_paraphrase_data.txt
│ │ ├── msr_paraphrase_test.txt
│ │ └── msr_paraphrase_train.txt
│ ├── README.md
│ ├── __init__.py
│ ├── convert_tf_checkpoint_to_pytorch.py
│ ├── convert_tf_checkpoint_to_pytorch_raw.py
│ ├── download_MSRP.py
│ ├── extract_features.py
│ ├── glue_data/
│ │ ├── aspect_ensemble_online/
│ │ │ ├── 1/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 2/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 3/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 4/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 5/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ ├── test.tsv
│ │ │ │ └── train.tsv
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── generate_npy.py
│ │ ├── generate_npy_for_polarity.py
│ │ ├── polarity_ensemble_online/
│ │ │ ├── 1/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 2/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 3/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 4/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ ├── 5/
│ │ │ │ ├── dev.ind
│ │ │ │ ├── dev.tsv
│ │ │ │ └── train.tsv
│ │ │ └── test.tsv
│ │ └── test.tsv
│ ├── modeling.py
│ ├── notebooks/
│ │ ├── Comparing TF and PT models SQuAD predictions.ipynb
│ │ └── Comparing TF and PT models.ipynb
│ ├── optimization.py
│ ├── requirements.txt
│ ├── run_classifier.py
│ ├── run_classifier_2.py
│ ├── run_classifier_ensemble.py
│ ├── run_classifier_ensemble_polarity.py
│ ├── run_squad.py
│ ├── samples/
│ │ ├── input.txt
│ │ └── sample_text.txt
│ ├── tests/
│ │ ├── modeling_test.py
│ │ ├── optimization_test.py
│ │ └── tokenization_test.py
│ └── tokenization.py
├── data/
│ ├── aspect_ensemble_online/
│ │ ├── 1/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 2/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 3/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ ├── 4/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ ├── test.tsv
│ │ │ └── train.tsv
│ │ └── 5/
│ │ ├── dev.ind
│ │ ├── dev.tsv
│ │ ├── test.tsv
│ │ └── train.tsv
│ ├── backup/
│ │ ├── test_predict_aspect_ensemble_['Mon', 'Nov', '19', '20_30_28', '2018'].txt
│ │ ├── test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_48_57', '2018'].txt
│ │ ├── test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_49_23', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_25_00', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_26_03', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_30_35', '2018'].txt
│ │ ├── test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_42_02', '2018'].txt
│ │ └── test_predict_polarity_ensemble_['Tue', 'Nov', '13', '21_49_37', '2018'].txt
│ ├── build_test_for_predict.py
│ ├── generate_dev.py
│ ├── generate_dev_polarity.py
│ ├── generate_test.py
│ ├── polarity_ensemble_online/
│ │ ├── 1/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 2/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 3/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 4/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ ├── 5/
│ │ │ ├── dev.ind
│ │ │ ├── dev.tsv
│ │ │ └── train.tsv
│ │ └── test.tsv
│ ├── submit2.csv
│ ├── submit2.py
│ ├── submit2_stacking_all_bert.csv
│ ├── submit_example_2.csv
│ ├── test.txt
│ ├── test_predict_aspect_ensemble.txt
│ ├── test_predict_polarity_ensemble.txt
│ ├── test_public_2.csv
│ ├── train.txt
│ └── vocabulary.pkl
├── dataset/
│ ├── attribute.json
│ ├── clean_data.py
│ ├── clean_test.py
│ ├── polarity.json
│ ├── submit_example_2.csv
│ ├── test_public_2.csv
│ └── train_2.csv
├── embedding/
│ ├── embedding_all_fasttext2_300.txt
│ ├── embedding_all_merge_300.txt
│ └── embedding_all_tencent_200.txt
├── polarity_level_aspect/
│ ├── ab_polarity.py
│ ├── backup/
│ │ ├── cp_AT_LSTM_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_ft2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_AT_LSTM_tc/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_Bert/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_ft2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_GCAE_tc/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_HEAT_0/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ ├── cp_HEAT_2/
│ │ │ └── npy/
│ │ │ ├── oof_test.npy
│ │ │ ├── oof_train.npy
│ │ │ └── oof_train_y.npy
│ │ └── cp_HEAT_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── best_test.sh
│ ├── cp_AT_LSTM_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_AT_LSTM_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_Bert/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_GCAE_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_0/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_ft2/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ ├── cp_HEAT_tc/
│ │ └── npy/
│ │ ├── oof_test.npy
│ │ ├── oof_train.npy
│ │ └── oof_train_y.npy
│ └── networks.py
└── utils/
├── Data.py
├── data_helper.py
├── evaluate.py
├── prepare_w2v.py
├── prepare_w2v_with_UNK.py
├── train.py
├── train2.py
├── train_single.py
├── utils.py
└── vocabulary2.pkl
SYMBOL INDEX (450 symbols across 33 files)
FILE: attribute_level/attribute.py
class AttributeClassifier (line 66) | class AttributeClassifier: # Neural network method
method __init__ (line 67) | def __init__(self):
method train_from_data (line 72) | def train_from_data(self, train_raw_data, test_raw_data, W, word2index...
method load_model (line 141) | def load_model(self, check_point):
function split_dev (line 145) | def split_dev(train_texts, train_labels, folds=5):
function main (line 158) | def main():
function kfold_split (line 195) | def kfold_split(length, k=5):
function ensemble (line 208) | def ensemble():
function test (line 249) | def test():
function dev (line 276) | def dev():
function load_elmo (line 300) | def load_elmo(test_texts):
function get_oof (line 315) | def get_oof(clfs, raw_texts, raw_labels, test_data, word2index, attr_dict):
function load_oof (line 363) | def load_oof(dir):
function stacking (line 371) | def stacking():
FILE: attribute_level/networks2.py
class WordRep (line 6) | class WordRep(nn.Module):
method __init__ (line 7) | def __init__(self, vocab_size, word_embed_dim, char_size, args):
method forward (line 23) | def forward(self, input_tensors):
class LSTM (line 74) | class LSTM(nn.Module):
method __init__ (line 75) | def __init__(self, word_embed_dim, output_size, vocab_size, args=None):
method forward (line 96) | def forward(self, input_tensors):
class CNN (line 109) | class CNN(nn.Module):
method __init__ (line 110) | def __init__(self, word_embed_dim, output_size, vocab_size, args=None,...
method forward (line 146) | def forward(self, input_tensors):
method optimize_step (line 166) | def optimize_step(self, input_tensors, category_tensor, optimizer):
class AttA3 (line 183) | class AttA3(nn.Module):
method __init__ (line 184) | def __init__(self, word_embed_dim, output_size, vocab_size, args=None):
method forward (line 226) | def forward(self, input_tensors):
method optimize_step (line 271) | def optimize_step(self, input_tensors, category_tensor, optimizer):
FILE: bert/convert_tf_checkpoint_to_pytorch.py
function convert (line 51) | def convert():
FILE: bert/convert_tf_checkpoint_to_pytorch_raw.py
function convert (line 51) | def convert():
FILE: bert/download_MSRP.py
function download_and_extract (line 26) | def download_and_extract(task, data_dir):
function format_mrpc (line 35) | def format_mrpc(data_dir, path_to_data):
function download_diagnostic (line 79) | def download_diagnostic(data_dir):
function get_tasks (line 88) | def get_tasks(task_names):
function main (line 99) | def main(arguments):
FILE: bert/extract_features.py
class InputExample (line 41) | class InputExample(object):
method __init__ (line 43) | def __init__(self, unique_id, text_a, text_b):
class InputFeatures (line 49) | class InputFeatures(object):
method __init__ (line 52) | def __init__(self, unique_id, tokens, input_ids, input_mask, input_typ...
function convert_examples_to_features (line 60) | def convert_examples_to_features(examples, seq_length, tokenizer):
function _truncate_seq_pair (line 151) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function read_examples (line 168) | def read_examples(input_file):
function main (line 192) | def main():
FILE: bert/glue_data/generate_npy.py
function categories_from_output (line 7) | def categories_from_output(output, t = 0.45):
function kfold_split (line 24) | def kfold_split(length, k=5):
function merge_oof (line 37) | def merge_oof(dir):
FILE: bert/glue_data/generate_npy_for_polarity.py
function categories_from_output (line 7) | def categories_from_output(output, t = 0.45):
function kfold_split (line 24) | def kfold_split(length, k=5):
function merge_oof (line 37) | def merge_oof(dir):
FILE: bert/modeling.py
function gelu (line 29) | def gelu(x):
class BertConfig (line 37) | class BertConfig(object):
method __init__ (line 40) | def __init__(self,
method from_dict (line 89) | def from_dict(cls, json_object):
method from_json_file (line 97) | def from_json_file(cls, json_file):
method to_dict (line 103) | def to_dict(self):
method to_json_string (line 108) | def to_json_string(self):
class BERTLayerNorm (line 113) | class BERTLayerNorm(nn.Module):
method __init__ (line 114) | def __init__(self, config, variance_epsilon=1e-12):
method forward (line 122) | def forward(self, x):
class BERTEmbeddings (line 128) | class BERTEmbeddings(nn.Module):
method __init__ (line 129) | def __init__(self, config):
method forward (line 142) | def forward(self, input_ids, token_type_ids=None):
class BERTSelfAttention (line 159) | class BERTSelfAttention(nn.Module):
method __init__ (line 160) | def __init__(self, config):
method transpose_for_scores (line 176) | def transpose_for_scores(self, x):
method forward (line 181) | def forward(self, hidden_states, attention_mask):
class BERTSelfOutput (line 210) | class BERTSelfOutput(nn.Module):
method __init__ (line 211) | def __init__(self, config):
method forward (line 217) | def forward(self, hidden_states, input_tensor):
class BERTAttention (line 224) | class BERTAttention(nn.Module):
method __init__ (line 225) | def __init__(self, config):
method forward (line 230) | def forward(self, input_tensor, attention_mask):
class BERTIntermediate (line 236) | class BERTIntermediate(nn.Module):
method __init__ (line 237) | def __init__(self, config):
method forward (line 242) | def forward(self, hidden_states):
class BERTOutput (line 248) | class BERTOutput(nn.Module):
method __init__ (line 249) | def __init__(self, config):
method forward (line 255) | def forward(self, hidden_states, input_tensor):
class BERTLayer (line 262) | class BERTLayer(nn.Module):
method __init__ (line 263) | def __init__(self, config):
method forward (line 269) | def forward(self, hidden_states, attention_mask):
class BERTEncoder (line 276) | class BERTEncoder(nn.Module):
method __init__ (line 277) | def __init__(self, config):
method forward (line 282) | def forward(self, hidden_states, attention_mask):
class BERTPooler (line 290) | class BERTPooler(nn.Module):
method __init__ (line 291) | def __init__(self, config):
method forward (line 296) | def forward(self, hidden_states):
class BertModel (line 305) | class BertModel(nn.Module):
method __init__ (line 322) | def __init__(self, config: BertConfig):
method forward (line 333) | def forward(self, input_ids, token_type_ids=None, attention_mask=None):
class BertForSequenceClassification (line 360) | class BertForSequenceClassification(nn.Module):
method __init__ (line 381) | def __init__(self, config, num_labels):
method forward (line 399) | def forward(self, input_ids, token_type_ids, attention_mask, labels=No...
class BertForSequenceClassificationLSTM (line 412) | class BertForSequenceClassificationLSTM(nn.Module):
method __init__ (line 433) | def __init__(self, config, num_labels):
method forward (line 452) | def forward(self, input_ids, token_type_ids, attention_mask, labels=No...
class BertForMultiLabelClassification (line 473) | class BertForMultiLabelClassification(nn.Module):
method __init__ (line 494) | def __init__(self, config, num_labels):
method forward (line 512) | def forward(self, input_ids, token_type_ids, attention_mask, labels=No...
class BertForQuestionAnswering (line 525) | class BertForQuestionAnswering(nn.Module):
method __init__ (line 544) | def __init__(self, config):
method forward (line 563) | def forward(self, input_ids, token_type_ids, attention_mask, start_pos...
FILE: bert/optimization.py
function warmup_cosine (line 22) | def warmup_cosine(x, warmup=0.002):
function warmup_constant (line 27) | def warmup_constant(x, warmup=0.002):
function warmup_linear (line 32) | def warmup_linear(x, warmup=0.002):
class BERTAdam (line 44) | class BERTAdam(Optimizer):
method __init__ (line 58) | def __init__(self, params, lr, warmup=-1, t_total=-1, schedule='warmup...
method get_lr (line 78) | def get_lr(self):
method to (line 93) | def to(self, device):
method initialize_step (line 99) | def initialize_step(self, initial_step):
method step (line 114) | def step(self, closure=None):
FILE: bert/run_classifier.py
class InputExample (line 43) | class InputExample(object):
method __init__ (line 46) | def __init__(self, guid, text_a, text_b=None, label=None):
class InputFeatures (line 64) | class InputFeatures(object):
method __init__ (line 67) | def __init__(self, input_ids, input_mask, segment_ids, label_id):
class DataProcessor (line 74) | class DataProcessor(object):
method get_train_examples (line 77) | def get_train_examples(self, data_dir):
method get_dev_examples (line 81) | def get_dev_examples(self, data_dir):
method get_labels (line 85) | def get_labels(self):
method _read_tsv (line 90) | def _read_tsv(cls, input_file, quotechar=None):
class MrpcProcessor (line 100) | class MrpcProcessor(DataProcessor):
method get_train_examples (line 103) | def get_train_examples(self, data_dir):
method get_dev_examples (line 109) | def get_dev_examples(self, data_dir):
method get_labels (line 114) | def get_labels(self):
method _create_examples (line 118) | def _create_examples(self, lines, set_type):
class MnliProcessor (line 133) | class MnliProcessor(DataProcessor):
method get_train_examples (line 136) | def get_train_examples(self, data_dir):
method get_dev_examples (line 141) | def get_dev_examples(self, data_dir):
method get_labels (line 147) | def get_labels(self):
method _create_examples (line 151) | def _create_examples(self, lines, set_type):
class ColaProcessor (line 166) | class ColaProcessor(DataProcessor):
method get_train_examples (line 169) | def get_train_examples(self, data_dir):
method get_dev_examples (line 174) | def get_dev_examples(self, data_dir):
method get_labels (line 179) | def get_labels(self):
method _create_examples (line 183) | def _create_examples(self, lines, set_type):
function convert_examples_to_features (line 195) | def convert_examples_to_features(examples, label_list, max_seq_length, t...
function _truncate_seq_pair (line 292) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function accuracy (line 308) | def accuracy(out, labels):
function main (line 312) | def main():
FILE: bert/run_classifier_2.py
class InputExample (line 45) | class InputExample(object):
method __init__ (line 48) | def __init__(self, guid, text_a, text_b=None, label=None):
class InputFeatures (line 66) | class InputFeatures(object):
method __init__ (line 69) | def __init__(self, input_ids, input_mask, segment_ids, label_id):
class DataProcessor (line 76) | class DataProcessor(object):
method get_train_examples (line 79) | def get_train_examples(self, data_dir):
method get_dev_examples (line 83) | def get_dev_examples(self, data_dir):
method get_labels (line 87) | def get_labels(self):
method _read_tsv (line 92) | def _read_tsv(cls, input_file, quotechar=None):
class MrpcProcessor (line 102) | class MrpcProcessor(DataProcessor):
method get_train_examples (line 105) | def get_train_examples(self, data_dir):
method get_dev_examples (line 111) | def get_dev_examples(self, data_dir):
method get_labels (line 116) | def get_labels(self):
method _create_examples (line 120) | def _create_examples(self, lines, set_type):
class SstProcessor (line 135) | class SstProcessor(DataProcessor):
method get_train_examples (line 138) | def get_train_examples(self, data_dir):
method get_dev_examples (line 144) | def get_dev_examples(self, data_dir):
method get_labels (line 149) | def get_labels(self):
method _create_examples (line 153) | def _create_examples(self, lines, set_type):
class AspectProcessor (line 167) | class AspectProcessor(DataProcessor):
method get_train_examples (line 170) | def get_train_examples(self, data_dir):
method get_dev_examples (line 176) | def get_dev_examples(self, data_dir):
method get_labels (line 181) | def get_labels(self):
method _create_examples (line 185) | def _create_examples(self, lines, set_type):
class MnliProcessor (line 202) | class MnliProcessor(DataProcessor):
method get_train_examples (line 205) | def get_train_examples(self, data_dir):
method get_dev_examples (line 210) | def get_dev_examples(self, data_dir):
method get_labels (line 216) | def get_labels(self):
method _create_examples (line 220) | def _create_examples(self, lines, set_type):
class ColaProcessor (line 235) | class ColaProcessor(DataProcessor):
method get_train_examples (line 238) | def get_train_examples(self, data_dir):
method get_dev_examples (line 243) | def get_dev_examples(self, data_dir):
method get_labels (line 248) | def get_labels(self):
method _create_examples (line 252) | def _create_examples(self, lines, set_type):
function convert_examples_to_features (line 264) | def convert_examples_to_features(examples, label_list, max_seq_length, t...
function _truncate_seq_pair (line 369) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function accuracy (line 388) | def accuracy(out, labels):
function accuracy1 (line 397) | def accuracy1(out, labels):
function accuracy2 (line 405) | def accuracy2(out, labels):
function accuracy3 (line 416) | def accuracy3(out, labels):
function accuracy3_2 (line 427) | def accuracy3_2(out, labels):
function accuracy4 (line 438) | def accuracy4(out, labels):
function accuracy5 (line 450) | def accuracy5(out, labels):
function accuracy7 (line 463) | def accuracy7(out, labels):
function score_list (line 475) | def score_list(predicted, golden):
function categories_from_output (line 497) | def categories_from_output(output, t = 0.45):
function main (line 520) | def main():
FILE: bert/run_classifier_ensemble.py
class InputExample (line 45) | class InputExample(object):
method __init__ (line 48) | def __init__(self, guid, text_a, text_b=None, label=None):
class InputFeatures (line 66) | class InputFeatures(object):
method __init__ (line 69) | def __init__(self, input_ids, input_mask, segment_ids, label_id):
class DataProcessor (line 76) | class DataProcessor(object):
method get_train_examples (line 79) | def get_train_examples(self, data_dir):
method get_dev_examples (line 83) | def get_dev_examples(self, data_dir):
method get_labels (line 87) | def get_labels(self):
method _read_tsv (line 92) | def _read_tsv(cls, input_file, quotechar=None):
class MrpcProcessor (line 102) | class MrpcProcessor(DataProcessor):
method get_train_examples (line 105) | def get_train_examples(self, data_dir):
method get_dev_examples (line 111) | def get_dev_examples(self, data_dir):
method get_labels (line 116) | def get_labels(self):
method _create_examples (line 120) | def _create_examples(self, lines, set_type):
class SstProcessor (line 135) | class SstProcessor(DataProcessor):
method get_train_examples (line 138) | def get_train_examples(self, data_dir):
method get_dev_examples (line 144) | def get_dev_examples(self, data_dir):
method get_labels (line 149) | def get_labels(self):
method _create_examples (line 153) | def _create_examples(self, lines, set_type):
class AspectProcessor (line 167) | class AspectProcessor(DataProcessor):
method get_train_examples (line 170) | def get_train_examples(self, data_dir):
method get_dev_examples (line 176) | def get_dev_examples(self, data_dir):
method get_test_examples (line 181) | def get_test_examples(self, data_dir):
method get_labels (line 186) | def get_labels(self):
method _create_examples (line 190) | def _create_examples(self, lines, set_type):
class MnliProcessor (line 210) | class MnliProcessor(DataProcessor):
method get_train_examples (line 213) | def get_train_examples(self, data_dir):
method get_dev_examples (line 218) | def get_dev_examples(self, data_dir):
method get_labels (line 224) | def get_labels(self):
method _create_examples (line 228) | def _create_examples(self, lines, set_type):
class ColaProcessor (line 243) | class ColaProcessor(DataProcessor):
method get_train_examples (line 246) | def get_train_examples(self, data_dir):
method get_dev_examples (line 251) | def get_dev_examples(self, data_dir):
method get_labels (line 256) | def get_labels(self):
method _create_examples (line 260) | def _create_examples(self, lines, set_type):
function convert_examples_to_features (line 272) | def convert_examples_to_features(examples, label_list, max_seq_length, t...
function _truncate_seq_pair (line 380) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function accuracy (line 399) | def accuracy(out, labels):
function accuracy1 (line 408) | def accuracy1(out, labels):
function accuracy2 (line 416) | def accuracy2(out, labels):
function accuracy3 (line 427) | def accuracy3(out, labels):
function accuracy3_2 (line 438) | def accuracy3_2(out, labels):
function accuracy4 (line 449) | def accuracy4(out, labels):
function accuracy5 (line 461) | def accuracy5(out, labels):
function accuracy7 (line 474) | def accuracy7(out, labels):
function score_list (line 487) | def score_list(predicted, golden):
function categories_from_output (line 509) | def categories_from_output(output, t = 0.45):
function main (line 532) | def main():
function train (line 639) | def train(args):
FILE: bert/run_classifier_ensemble_polarity.py
class InputExample (line 45) | class InputExample(object):
method __init__ (line 48) | def __init__(self, guid, text_a, text_b=None, label=None):
class InputFeatures (line 66) | class InputFeatures(object):
method __init__ (line 69) | def __init__(self, input_ids, input_mask, segment_ids, label_id):
class DataProcessor (line 76) | class DataProcessor(object):
method get_train_examples (line 79) | def get_train_examples(self, data_dir):
method get_dev_examples (line 83) | def get_dev_examples(self, data_dir):
method get_labels (line 87) | def get_labels(self):
method _read_tsv (line 92) | def _read_tsv(cls, input_file, quotechar=None):
class MrpcProcessor (line 102) | class MrpcProcessor(DataProcessor):
method get_train_examples (line 105) | def get_train_examples(self, data_dir):
method get_dev_examples (line 111) | def get_dev_examples(self, data_dir):
method get_labels (line 116) | def get_labels(self):
method _create_examples (line 120) | def _create_examples(self, lines, set_type):
class PolarityProcessor (line 135) | class PolarityProcessor(DataProcessor):
method get_train_examples (line 138) | def get_train_examples(self, data_dir):
method get_dev_examples (line 144) | def get_dev_examples(self, data_dir):
method get_test_examples (line 149) | def get_test_examples(self, data_dir):
method get_labels (line 154) | def get_labels(self):
method _create_examples (line 158) | def _create_examples(self, lines, set_type):
class SstProcessor (line 173) | class SstProcessor(DataProcessor):
method get_train_examples (line 176) | def get_train_examples(self, data_dir):
method get_dev_examples (line 182) | def get_dev_examples(self, data_dir):
method get_test_examples (line 187) | def get_test_examples(self, data_dir):
method get_labels (line 192) | def get_labels(self):
method _create_examples (line 196) | def _create_examples(self, lines, set_type):
class AspectProcessor (line 210) | class AspectProcessor(DataProcessor):
method get_train_examples (line 213) | def get_train_examples(self, data_dir):
method get_dev_examples (line 219) | def get_dev_examples(self, data_dir):
method get_test_examples (line 224) | def get_test_examples(self, data_dir):
method get_labels (line 229) | def get_labels(self):
method _create_examples (line 233) | def _create_examples(self, lines, set_type):
class MnliProcessor (line 253) | class MnliProcessor(DataProcessor):
method get_train_examples (line 256) | def get_train_examples(self, data_dir):
method get_dev_examples (line 261) | def get_dev_examples(self, data_dir):
method get_labels (line 267) | def get_labels(self):
method _create_examples (line 271) | def _create_examples(self, lines, set_type):
class ColaProcessor (line 286) | class ColaProcessor(DataProcessor):
method get_train_examples (line 289) | def get_train_examples(self, data_dir):
method get_dev_examples (line 294) | def get_dev_examples(self, data_dir):
method get_labels (line 299) | def get_labels(self):
method _create_examples (line 303) | def _create_examples(self, lines, set_type):
function convert_examples_to_features (line 315) | def convert_examples_to_features(examples, label_list, max_seq_length, t...
function _truncate_seq_pair (line 414) | def _truncate_seq_pair(tokens_a, tokens_b, max_length):
function accuracy (line 434) | def accuracy(out, labels):
function score2 (line 439) | def score2(predicted, golden):
function main (line 455) | def main():
function do_eval (line 572) | def do_eval(model, device, eval_features, args):
function train (line 628) | def train(args):
FILE: bert/run_squad.py
class SquadExample (line 46) | class SquadExample(object):
method __init__ (line 49) | def __init__(self,
method __str__ (line 63) | def __str__(self):
method __repr__ (line 66) | def __repr__(self):
class InputFeatures (line 79) | class InputFeatures(object):
method __init__ (line 82) | def __init__(self,
function read_squad_examples (line 107) | def read_squad_examples(input_file, is_training):
function convert_examples_to_features (line 176) | def convert_examples_to_features(examples, tokenizer, max_seq_length,
function _improve_answer_span (line 329) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
function _check_is_max_context (line 366) | def _check_is_max_context(doc_spans, cur_span_index, position):
function write_predictions (line 408) | def write_predictions(all_examples, all_features, all_results, n_best_size,
function get_final_text (line 542) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
function _get_best_indexes (line 638) | def _get_best_indexes(logits, n_best_size):
function _compute_softmax (line 650) | def _compute_softmax(scores):
function main (line 673) | def main():
FILE: bert/tests/modeling_test.py
class BertModelTest (line 28) | class BertModelTest(unittest.TestCase):
class BertModelTester (line 29) | class BertModelTester(object):
method __init__ (line 31) | def __init__(self,
method create_model (line 69) | def create_model(self):
method check_output (line 104) | def check_output(self, result):
method test_default (line 111) | def test_default(self):
method test_config_to_json_string (line 114) | def test_config_to_json_string(self):
method run_tester (line 120) | def run_tester(self, tester):
method ids_tensor (line 125) | def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
FILE: bert/tests/optimization_test.py
class OptimizationTest (line 25) | class OptimizationTest(unittest.TestCase):
method assertListAlmostEqual (line 27) | def assertListAlmostEqual(self, list1, list2, tol):
method test_adam (line 32) | def test_adam(self):
FILE: bert/tests/tokenization_test.py
class TokenizationTest (line 25) | class TokenizationTest(unittest.TestCase):
method test_full_tokenizer (line 27) | def test_full_tokenizer(self):
method test_basic_tokenizer_lower (line 46) | def test_basic_tokenizer_lower(self):
method test_basic_tokenizer_no_lower (line 54) | def test_basic_tokenizer_no_lower(self):
method test_wordpiece_tokenizer (line 61) | def test_wordpiece_tokenizer(self):
method test_convert_tokens_to_ids (line 81) | def test_convert_tokens_to_ids(self):
method test_is_whitespace (line 95) | def test_is_whitespace(self):
method test_is_control (line 105) | def test_is_control(self):
method test_is_punctuation (line 113) | def test_is_punctuation(self):
FILE: bert/tokenization.py
function convert_to_unicode (line 27) | def convert_to_unicode(text):
function printable_text (line 47) | def printable_text(text):
function load_vocab (line 70) | def load_vocab(vocab_file):
function convert_tokens_to_ids (line 85) | def convert_tokens_to_ids(vocab, tokens):
function whitespace_tokenize (line 93) | def whitespace_tokenize(text):
class FullTokenizer (line 102) | class FullTokenizer(object):
method __init__ (line 105) | def __init__(self, vocab_file, do_lower_case=True):
method tokenize (line 110) | def tokenize(self, text):
method convert_tokens_to_ids (line 118) | def convert_tokens_to_ids(self, tokens):
class BasicTokenizer (line 122) | class BasicTokenizer(object):
method __init__ (line 125) | def __init__(self, do_lower_case=True):
method tokenize (line 133) | def tokenize(self, text):
method _run_strip_accents (line 157) | def _run_strip_accents(self, text):
method _run_split_on_punc (line 168) | def _run_split_on_punc(self, text):
method _tokenize_chinese_chars (line 188) | def _tokenize_chinese_chars(self, text):
method _is_chinese_char (line 201) | def _is_chinese_char(self, cp):
method _clean_text (line 223) | def _clean_text(self, text):
class WordpieceTokenizer (line 237) | class WordpieceTokenizer(object):
method __init__ (line 240) | def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=...
method tokenize (line 245) | def tokenize(self, text):
function _is_whitespace (line 299) | def _is_whitespace(char):
function _is_control (line 311) | def _is_control(char):
function _is_punctuation (line 323) | def _is_punctuation(char):
FILE: data/generate_dev.py
function kfold_split (line 5) | def kfold_split(length, k=5):
function load_attr_data (line 17) | def load_attr_data(filename):
FILE: data/generate_dev_polarity.py
function kfold_split (line 6) | def kfold_split(length, k=5):
function load_abp_raw (line 19) | def load_abp_raw(filename): # aspect_based polarity
function splits (line 24) | def splits(fo, train_index, dev_index):
function count_instance (line 52) | def count_instance(fo):
FILE: data/generate_test.py
function kfold_split (line 5) | def kfold_split(length, k=5):
function load_attr_data (line 17) | def load_attr_data(filename):
FILE: polarity_level_aspect/ab_polarity.py
class Classifier (line 65) | class Classifier: # Neural network method
method __init__ (line 66) | def __init__(self):
method train_from_data (line 71) | def train_from_data(self, train_raw_data, test_raw_data, W, word2index...
method split_dev (line 145) | def split_dev(self, train_texts, train_t, train_ow):
method predict (line 175) | def predict(self, rnn, test_raw_data, word2index, args):
function kfold_split (line 203) | def kfold_split(length, k=5):
function splits (line 216) | def splits(fo, train_index, dev_index):
function count_instance (line 244) | def count_instance(fo):
function ensemble (line 258) | def ensemble():
function main (line 299) | def main():
function test (line 339) | def test():
function load_elmo (line 388) | def load_elmo(test_texts):
function get_oof (line 403) | def get_oof(clfs, fo, test_data, word2index, polarity_dict, attr_dict):
function get_oof_test (line 454) | def get_oof_test(clfs, test_data):
function load_oof_dir (line 476) | def load_oof_dir(dir):
function load_oof_test (line 484) | def load_oof_test(dir):
function load_oof (line 490) | def load_oof(clfs, fo, test_data, word2index, polarity_dict, attr_dict):
function load_oof3 (line 503) | def load_oof3(clfs, fo, test_data, word2index, polarity_dict, attr_dict)...
function softmax (line 516) | def softmax(x):
function stacking (line 523) | def stacking():
function blending (line 602) | def blending():
FILE: polarity_level_aspect/networks.py
class WordRep (line 6) | class WordRep(nn.Module):
method __init__ (line 7) | def __init__(self, vocab_size, word_embed_dim, char_size, args):
method forward (line 23) | def forward(self, input_tensors):
class AT_LSTM (line 74) | class AT_LSTM(nn.Module):
method __init__ (line 75) | def __init__(self, word_embed_dim, output_size, vocab_size, aspect_siz...
method forward (line 104) | def forward(self, input_tensors):
class GCAE (line 155) | class GCAE(nn.Module):
method __init__ (line 156) | def __init__(self, word_embed_dim, output_size, vocab_size, aspect_siz...
method forward (line 180) | def forward(self, input_tensors):
class HEAT (line 203) | class HEAT(nn.Module):
method __init__ (line 204) | def __init__(self, word_embed_dim, output_size, vocab_size, aspect_siz...
method forward (line 246) | def forward(self, input_tensors):
FILE: utils/Data.py
class Data (line 5) | class Data:
method __init__ (line 6) | def __init__(self, train_raw_data, word2index, attr_dict=None, args=No...
method get (line 23) | def get(self, index, cuda_flag):
method add_feature (line 42) | def add_feature(self, features):
method get_input (line 45) | def get_input(self, index, cuda_flag):
method to_tensor (line 48) | def to_tensor(self, text, word2id):
method label2tensor (line 67) | def label2tensor(self, labels, attrDict):
method generate_char_tensor (line 81) | def generate_char_tensor(self, text, char2id):
class Data2 (line 102) | class Data2: # data for polarity:
method __init__ (line 103) | def __init__(self, train_raw_data, word2index, polarity_dict=None, arg...
method get (line 123) | def get(self, index, cuda_flag):
method to_tensor (line 142) | def to_tensor(self, text, word2id):
method label2tensor (line 157) | def label2tensor(self, labels, attrDict):
class Data3 (line 178) | class Data3: # data for aspect_polarity:
method __init__ (line 179) | def __init__(self, train_raw_data, word2index, polarity_dict=None, arg...
method get (line 206) | def get(self, index, cuda_flag):
method add_feature (line 231) | def add_feature(self, features):
method to_tensor (line 234) | def to_tensor(self, text, word2id):
method label2tensor (line 249) | def label2tensor(self, labels, attrDict):
FILE: utils/data_helper.py
function load_w2v (line 42) | def load_w2v(filename):
function load_attr_data (line 66) | def load_attr_data(filename):
function load_abp_data (line 85) | def load_abp_data(filename, dev=False, folds=5): # aspect_based polarity
function load_abp_raw (line 121) | def load_abp_raw(filename): # aspect_based polarity
function load_test_data (line 127) | def load_test_data(filename):
function load_ab_test (line 140) | def load_ab_test(f1, f2):
function load_pos (line 166) | def load_pos(ds):
function load_char2id (line 188) | def load_char2id(ds):
function generate_sentence_label (line 203) | def generate_sentence_label(train_texts, train_ow): # combine all ow la...
function parse_json (line 226) | def parse_json(filename):
FILE: utils/evaluate.py
function score_list (line 5) | def score_list(predicted, golden):
function score (line 27) | def score(predicted, golden):
function label_analysis (line 49) | def label_analysis(predicted, golden):
function score2 (line 63) | def score2(predicted, golden):
function label_analysis2 (line 82) | def label_analysis2(predicted, golden, label_num):
function score_aspect (line 95) | def score_aspect(predict_list, true_list):
FILE: utils/prepare_w2v.py
function prepare_w2v (line 13) | def prepare_w2v(ds=None):
function load_vocab (line 77) | def load_vocab():
function load_ft (line 112) | def load_ft():
function test_miss (line 148) | def test_miss():
function test (line 175) | def test():
FILE: utils/prepare_w2v_with_UNK.py
function prepare_w2v (line 13) | def prepare_w2v(ds=None):
function load_vocab (line 78) | def load_vocab():
function load_ft (line 113) | def load_ft():
function test_miss (line 149) | def test_miss():
function test (line 176) | def test():
FILE: utils/train.py
function train (line 15) | def train(rnn, train_data, dev_data, test_data, attr_dict, W, args):
function optimize_step (line 117) | def optimize_step(rnn, input_tensors, category_tensor, optimizer):
function customized_loss2 (line 133) | def customized_loss2(input, target):
function category_from_output (line 162) | def category_from_output(output):
function categories_from_output (line 170) | def categories_from_output(output, threshold=[0.45 for _ in range(10)]):
function predict (line 196) | def predict(rnn, dev_data, args):
function predict_with_logit (line 223) | def predict_with_logit(rnn, dev_data, args):
FILE: utils/train2.py
function train (line 15) | def train(rnn, train_data, dev_data, test_data, attr_dict, W, args):
function optimize_step (line 117) | def optimize_step(rnn, input_tensors, category_tensor, optimizer):
function customized_loss2 (line 133) | def customized_loss2(input, target):
function category_from_output (line 162) | def category_from_output(output):
function categories_from_output (line 170) | def categories_from_output(output, threshold=[0.45 for _ in range(10)]):
function predict (line 196) | def predict(rnn, dev_data, args):
function predict_with_logit (line 223) | def predict_with_logit(rnn, dev_data, args):
FILE: utils/train_single.py
function train (line 15) | def train(rnn, train_data, dev_data, test_data, attr_dict, W, args):
function optimize_step (line 138) | def optimize_step(rnn, input_tensors, category_tensor, optimizer):
function category_from_output (line 157) | def category_from_output(output):
function categories_from_output (line 165) | def categories_from_output(output):
function predict (line 183) | def predict(rnn, dev_data, args):
function predict_with_logit (line 213) | def predict_with_logit(rnn, dev_data, args):
FILE: utils/utils.py
function time_since (line 4) | def time_since(since):
Copy disabled (too large)
Download .json
Condensed preview — 261 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (62,670K chars).
[
{
"path": "ReadMe.md",
"chars": 13006,
"preview": "# 汽车行业用户观点主题及情感识别 (Just a test 团队决赛一等奖方案)\n\n\n## 注意:\n* 目前开源的代码按照下面的说明应该是可以跑通的,但是因为整个框架比较复杂所以可能有文档没有说清楚的地方,遇到问题可以给我们提issue,"
},
{
"path": "attribute_level/attribute.py",
"chars": 18265,
"preview": "import codecs\n# import torch\nimport sys\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn.linear_model im"
},
{
"path": "attribute_level/best_test.sh",
"chars": 181,
"preview": " CUDA_VISIBLE_DEVICES=2 python attribute.py --mode 3 --use_elmo 2 --test_dir cp_CNN_0#cp_CNN_ft2#cp_CNN_2#cp_CNN_tc#cp_A"
},
{
"path": "attribute_level/networks2.py",
"chars": 11948,
"preview": "import torch\nfrom torch import nn\nimport torch.nn.functional as F\n\n\nclass WordRep(nn.Module):\n def __init__(self, voc"
},
{
"path": "bert/CONTRIBUTING.md",
"chars": 1323,
"preview": "# How to Contribute\n\nBERT needs to maintain permanent compatibility with the pre-trained model files,\nso we do not plan "
},
{
"path": "bert/MSRP/LICENSE",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "bert/MSRP/Microsoft Shared Source License.htm",
"chars": 9685,
"preview": "<html>\n\n<head>\n<meta http-equiv=Content-Type content=\"text/html; charset=shift_jis\">\n<meta name=Generator content=\"Micro"
},
{
"path": "bert/MSRP/Microsoft Shared Source License.rtf",
"chars": 14468,
"preview": "{\\rtf1\\ansi\\ansicpg932\\uc2\\deff0\\stshfdbch11\\stshfloch21\\stshfhich21\\stshfbi0\\deflang1033\\deflangfe1041{\\fonttbl{\\f0\\fro"
},
{
"path": "bert/MSRP/msr_paraphrase_README.htm",
"chars": 69642,
"preview": "<html>\n\n<head>\n<meta http-equiv=Content-Type content=\"text/html; charset=windows-1252\">\n<meta name=Generator content=\"Mi"
},
{
"path": "bert/MSRP/msr_paraphrase_README.rtf",
"chars": 179214,
"preview": "{\\rtf1\\ansi\\ansicpg932\\uc2\\deff0\\stshfdbch11\\stshfloch0\\stshfhich0\\stshfbi0\\deflang1033\\deflangfe1041{\\fonttbl{\\f0\\froma"
},
{
"path": "bert/MSRP/msr_paraphrase_data.txt",
"chars": 1936771,
"preview": "Sentence ID\tString\tAuthor\tURL\tAgency\tDate\tWeb Date\n702876\tAmrozi accused his brother, whom he called \"the witness\", of "
},
{
"path": "bert/MSRP/msr_paraphrase_test.txt",
"chars": 431041,
"preview": "Quality\t#1 ID\t#2 ID\t#1 String\t#2 String\n1\t1089874\t1089925\tPCCW's chief operating officer, Mike Butcher, and Alex Arena,"
},
{
"path": "bert/MSRP/msr_paraphrase_train.txt",
"chars": 1022381,
"preview": "Quality\t#1 ID\t#2 ID\t#1 String\t#2 String\n1\t702876\t702977\tAmrozi accused his brother, whom he called \"the witness\", of de"
},
{
"path": "bert/README.md",
"chars": 14368,
"preview": "# PyTorch implementation of Google AI's BERT model with a script to load Google's pre-trained models\n\n## Introduction\n\nT"
},
{
"path": "bert/__init__.py",
"chars": 616,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 "
},
{
"path": "bert/convert_tf_checkpoint_to_pytorch.py",
"chars": 3965,
"preview": "# coding=utf-8\n# Copyright 2018 The HugginFace Inc. team.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "bert/convert_tf_checkpoint_to_pytorch_raw.py",
"chars": 3535,
"preview": "# coding=utf-8\n# Copyright 2018 The HugginFace Inc. team.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Licen"
},
{
"path": "bert/download_MSRP.py",
"chars": 7033,
"preview": "import os\nimport sys\nimport shutil\nimport argparse\nimport tempfile\nimport urllib.request\nimport zipfile\nimport codecs\n\nT"
},
{
"path": "bert/extract_features.py",
"chars": 12641,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/glue_data/aspect_ensemble_online/1/dev.ind",
"chars": 10547,
"preview": "2525\n8981\n845\n5357\n4227\n3484\n1325\n2232\n218\n9330\n3614\n9306\n1727\n6500\n8957\n7463\n7294\n7266\n741\n2315\n9639\n7432\n1713\n7035\n909"
},
{
"path": "bert/glue_data/aspect_ensemble_online/1/dev.tsv",
"chars": 161657,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "bert/glue_data/aspect_ensemble_online/1/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "bert/glue_data/aspect_ensemble_online/1/train.tsv",
"chars": 668272,
"preview": "text labels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\n是的 , 油耗 到 8.6 了 。\t油耗\nSE 是 EJ 发动机 , 不是 FB 的\t动力\n我 2.5 尊贵 , 最"
},
{
"path": "bert/glue_data/aspect_ensemble_online/2/dev.ind",
"chars": 10576,
"preview": "5865\n10073\n4769\n6202\n6546\n4100\n6532\n1645\n9530\n5425\n3500\n3875\n90\n7560\n5836\n9264\n4173\n4880\n6320\n33\n3923\n5249\n2142\n584\n7299"
},
{
"path": "bert/glue_data/aspect_ensemble_online/2/dev.tsv",
"chars": 166939,
"preview": "text labels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\n是的 , 油耗 到 8.6 了 。\t油耗\nSE 是 EJ 发动机 , 不是 FB 的\t动力\n我 2.5 尊贵 , 最"
},
{
"path": "bert/glue_data/aspect_ensemble_online/2/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "bert/glue_data/aspect_ensemble_online/2/train.tsv",
"chars": 662990,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "bert/glue_data/aspect_ensemble_online/3/dev.ind",
"chars": 10600,
"preview": "3256\n9608\n10041\n4386\n8191\n9039\n6785\n4779\n5660\n5908\n6540\n7829\n4394\n9263\n781\n2936\n9409\n7132\n3029\n6040\n8125\n6053\n7382\n7641\n"
},
{
"path": "bert/glue_data/aspect_ensemble_online/3/dev.tsv",
"chars": 163053,
"preview": "text labels\n油耗 8.6 是 怎样 做到 的 , 我 的 已经 12\t油耗\n是 个 四 个 轱辘 带 发动机 的 车 都 能 进 藏\t动力\n空调 开 了 本来 就 会 影响 自动 启 停 , 不然 压缩机 靠 什么 工作 "
},
{
"path": "bert/glue_data/aspect_ensemble_online/3/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "bert/glue_data/aspect_ensemble_online/3/train.tsv",
"chars": 666876,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "bert/glue_data/aspect_ensemble_online/4/dev.ind",
"chars": 10519,
"preview": "4183\n9886\n3203\n1623\n2978\n1412\n5380\n10303\n6359\n8627\n54\n9245\n4759\n7418\n3525\n1657\n8537\n2000\n6591\n7605\n2635\n7164\n510\n2096\n81"
},
{
"path": "bert/glue_data/aspect_ensemble_online/4/dev.tsv",
"chars": 168585,
"preview": "text labels\n我 虽然 全 款 森林 人 , 但是 我 一直 在 黑 啊 , 我 是 用 大 价钱 在 卧底 啊 , 期待 有一天 长城 赶紧 的 把 富士 重工 收购 了 , 把 水平 对 置 拿 过来 我们 就 牛逼 了"
},
{
"path": "bert/glue_data/aspect_ensemble_online/4/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "bert/glue_data/aspect_ensemble_online/4/train.tsv",
"chars": 661344,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "bert/glue_data/aspect_ensemble_online/5/dev.ind",
"chars": 10551,
"preview": "4254\n7832\n8194\n9638\n8198\n930\n4273\n1665\n1109\n931\n540\n2681\n8838\n2717\n8382\n1676\n644\n9163\n4371\n879\n2349\n8776\n6726\n4202\n5464\n"
},
{
"path": "bert/glue_data/aspect_ensemble_online/5/dev.tsv",
"chars": 169503,
"preview": "text labels\n平台 更新 , 外形 变化 不大 , 最 关键 的 是 内饰 肯定 能 好 不少 , 至少 能 保持 新翼 豹 的 水平 就 不错 。\t内饰\n上 130 - 140 还是 很 轻松 的 , 爬坡 除外 但 也 "
},
{
"path": "bert/glue_data/aspect_ensemble_online/5/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "bert/glue_data/aspect_ensemble_online/5/train.tsv",
"chars": 660426,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "bert/glue_data/generate_npy.py",
"chars": 3237,
"preview": "import numpy as np\nimport csv\nimport os\nseed =1024\nimport torch\n\ndef categories_from_output(output, t = 0.45):\n # cat"
},
{
"path": "bert/glue_data/generate_npy_for_polarity.py",
"chars": 3302,
"preview": "import numpy as np\nimport csv\nimport os\nseed =1024\nimport torch\n\ndef categories_from_output(output, t = 0.45):\n # cat"
},
{
"path": "bert/glue_data/polarity_ensemble_online/1/dev.ind",
"chars": 12784,
"preview": "3014\n10702\n950\n6368\n5014\n4117\n1582\n2647\n2648\n222\n11081\n4255\n11055\n2060\n7715\n7716\n10677\n8949\n8950\n8749\n8712\n8713\n835\n2739"
},
{
"path": "bert/glue_data/polarity_ensemble_online/1/dev.tsv",
"chars": 206294,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "bert/glue_data/polarity_ensemble_online/1/train.tsv",
"chars": 850967,
"preview": "text\taspect\tlabels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\t0\n是的 , 油耗 到 8.6 了 。\t油耗\t0\nSE 是 EJ 发动机 , 不是 FB 的\t动力\t0\n我 "
},
{
"path": "bert/glue_data/polarity_ensemble_online/2/dev.ind",
"chars": 12955,
"preview": "6943\n11905\n5724\n7350\n7802\n7803\n7804\n7805\n4846\n7780\n1952\n11311\n6436\n4137\n4563\n92\n9069\n9070\n6909\n11012\n4942\n5856\n7500\n7501"
},
{
"path": "bert/glue_data/polarity_ensemble_online/2/dev.tsv",
"chars": 214290,
"preview": "text\taspect\tlabels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\t0\n是的 , 油耗 到 8.6 了 。\t油耗\t0\nSE 是 EJ 发动机 , 不是 FB 的\t动力\t0\n我 "
},
{
"path": "bert/glue_data/polarity_ensemble_online/2/train.tsv",
"chars": 842971,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "bert/glue_data/polarity_ensemble_online/3/dev.ind",
"chars": 12934,
"preview": "3882\n11397\n11862\n5217\n5218\n9832\n10773\n8162\n5734\n6671\n6999\n7791\n7792\n7793\n7794\n9400\n5228\n5229\n11011\n880\n3531\n11169\n8552\n3"
},
{
"path": "bert/glue_data/polarity_ensemble_online/3/dev.tsv",
"chars": 207942,
"preview": "text\taspect\tlabels\n油耗 8.6 是 怎样 做到 的 , 我 的 已经 12\t油耗\t0\n是 个 四 个 轱辘 带 发动机 的 车 都 能 进 藏\t动力\t0\n空调 开 了 本来 就 会 影响 自动 启 停 , 不然 压缩机 "
},
{
"path": "bert/glue_data/polarity_ensemble_online/3/train.tsv",
"chars": 849319,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "bert/glue_data/polarity_ensemble_online/4/dev.ind",
"chars": 12887,
"preview": "4957\n4958\n11685\n3825\n3826\n3827\n3828\n1929\n3578\n1702\n1703\n6391\n12178\n12179\n7550\n10306\n55\n10991\n5713\n8897\n4162\n1965\n10212\n2"
},
{
"path": "bert/glue_data/polarity_ensemble_online/4/dev.tsv",
"chars": 214854,
"preview": "text\taspect\tlabels\n我 虽然 全 款 森林 人 , 但是 我 一直 在 黑 啊 , 我 是 用 大 价钱 在 卧底 啊 , 期待 有一天 长城 赶紧 的 把 富士 重工 收购 了 , 把 水平 对 置 拿 过来 我们 就 "
},
{
"path": "bert/glue_data/polarity_ensemble_online/4/train.tsv",
"chars": 842407,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "bert/glue_data/polarity_ensemble_online/5/dev.ind",
"chars": 12746,
"preview": "5046\n9404\n9835\n11429\n11430\n9839\n1039\n5066\n1975\n1232\n1040\n587\n3217\n10548\n3269\n10040\n1988\n717\n10909\n5200\n986\n2773\n10483\n80"
},
{
"path": "bert/glue_data/polarity_ensemble_online/5/dev.tsv",
"chars": 213695,
"preview": "text\taspect\tlabels\n平台 更新 , 外形 变化 不大 , 最 关键 的 是 内饰 肯定 能 好 不少 , 至少 能 保持 新翼 豹 的 水平 就 不错 。\t内饰\t1\n上 130 - 140 还是 很 轻松 的 , 爬坡 除"
},
{
"path": "bert/glue_data/polarity_ensemble_online/5/train.tsv",
"chars": 843566,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "bert/glue_data/polarity_ensemble_online/test.tsv",
"chars": 462510,
"preview": "sentence\taspect\tlabel\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t配置\t0\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来"
},
{
"path": "bert/glue_data/test.tsv",
"chars": 181729,
"preview": "text labels\n欧蓝德 , 价格 便宜 , 森林 人 太贵 啦 !\t价格\n楼主 什么 时候 提 的 车 , 南昌 优惠 多少 啊\t价格\n吉林 , 2.5 优惠 20000 , 送 三 年 九 次 保养 , 贴膜\t价格\n那 估计"
},
{
"path": "bert/modeling.py",
"chars": 25534,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/notebooks/Comparing TF and PT models SQuAD predictions.ipynb",
"chars": 197913,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Comparing TensorFlow (original) a"
},
{
"path": "bert/notebooks/Comparing TF and PT models.ipynb",
"chars": 52521,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Comparing TensorFlow (original) a"
},
{
"path": "bert/optimization.py",
"chars": 7624,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/requirements.txt",
"chars": 10,
"preview": "torch\ntqdm"
},
{
"path": "bert/run_classifier.py",
"chars": 24771,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/run_classifier_2.py",
"chars": 32994,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/run_classifier_ensemble.py",
"chars": 36061,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/run_classifier_ensemble_polarity.py",
"chars": 38546,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/run_squad.py",
"chars": 41350,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "bert/samples/input.txt",
"chars": 52,
"preview": "Who was Jim Henson ? ||| Jim Henson was a puppeteer\n"
},
{
"path": "bert/samples/sample_text.txt",
"chars": 4364,
"preview": "This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত\nText should be one-sentence-per-line, wi"
},
{
"path": "bert/tests/modeling_test.py",
"chars": 5443,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 "
},
{
"path": "bert/tests/optimization_test.py",
"chars": 1842,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 "
},
{
"path": "bert/tests/tokenization_test.py",
"chars": 4461,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors.\n#\n# Licensed under the Apache License, Version 2.0 "
},
{
"path": "bert/tokenization.py",
"chars": 11410,
"preview": "# coding=utf-8\n# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.\n#\n# Licensed under the"
},
{
"path": "data/aspect_ensemble_online/1/dev.ind",
"chars": 10547,
"preview": "2525\n8981\n845\n5357\n4227\n3484\n1325\n2232\n218\n9330\n3614\n9306\n1727\n6500\n8957\n7463\n7294\n7266\n741\n2315\n9639\n7432\n1713\n7035\n909"
},
{
"path": "data/aspect_ensemble_online/1/dev.tsv",
"chars": 161657,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "data/aspect_ensemble_online/1/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "data/aspect_ensemble_online/1/train.tsv",
"chars": 668272,
"preview": "text labels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\n是的 , 油耗 到 8.6 了 。\t油耗\nSE 是 EJ 发动机 , 不是 FB 的\t动力\n我 2.5 尊贵 , 最"
},
{
"path": "data/aspect_ensemble_online/2/dev.ind",
"chars": 10576,
"preview": "5865\n10073\n4769\n6202\n6546\n4100\n6532\n1645\n9530\n5425\n3500\n3875\n90\n7560\n5836\n9264\n4173\n4880\n6320\n33\n3923\n5249\n2142\n584\n7299"
},
{
"path": "data/aspect_ensemble_online/2/dev.tsv",
"chars": 166939,
"preview": "text labels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\n是的 , 油耗 到 8.6 了 。\t油耗\nSE 是 EJ 发动机 , 不是 FB 的\t动力\n我 2.5 尊贵 , 最"
},
{
"path": "data/aspect_ensemble_online/2/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "data/aspect_ensemble_online/2/train.tsv",
"chars": 662990,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "data/aspect_ensemble_online/3/dev.ind",
"chars": 10600,
"preview": "3256\n9608\n10041\n4386\n8191\n9039\n6785\n4779\n5660\n5908\n6540\n7829\n4394\n9263\n781\n2936\n9409\n7132\n3029\n6040\n8125\n6053\n7382\n7641\n"
},
{
"path": "data/aspect_ensemble_online/3/dev.tsv",
"chars": 163053,
"preview": "text labels\n油耗 8.6 是 怎样 做到 的 , 我 的 已经 12\t油耗\n是 个 四 个 轱辘 带 发动机 的 车 都 能 进 藏\t动力\n空调 开 了 本来 就 会 影响 自动 启 停 , 不然 压缩机 靠 什么 工作 "
},
{
"path": "data/aspect_ensemble_online/3/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "data/aspect_ensemble_online/3/train.tsv",
"chars": 666876,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "data/aspect_ensemble_online/4/dev.ind",
"chars": 10519,
"preview": "4183\n9886\n3203\n1623\n2978\n1412\n5380\n10303\n6359\n8627\n54\n9245\n4759\n7418\n3525\n1657\n8537\n2000\n6591\n7605\n2635\n7164\n510\n2096\n81"
},
{
"path": "data/aspect_ensemble_online/4/dev.tsv",
"chars": 168585,
"preview": "text labels\n我 虽然 全 款 森林 人 , 但是 我 一直 在 黑 啊 , 我 是 用 大 价钱 在 卧底 啊 , 期待 有一天 长城 赶紧 的 把 富士 重工 收购 了 , 把 水平 对 置 拿 过来 我们 就 牛逼 了"
},
{
"path": "data/aspect_ensemble_online/4/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "data/aspect_ensemble_online/4/train.tsv",
"chars": 661344,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "data/aspect_ensemble_online/5/dev.ind",
"chars": 10551,
"preview": "4254\n7832\n8194\n9638\n8198\n930\n4273\n1665\n1109\n931\n540\n2681\n8838\n2717\n8382\n1676\n644\n9163\n4371\n879\n2349\n8776\n6726\n4202\n5464\n"
},
{
"path": "data/aspect_ensemble_online/5/dev.tsv",
"chars": 169503,
"preview": "text labels\n平台 更新 , 外形 变化 不大 , 最 关键 的 是 内饰 肯定 能 好 不少 , 至少 能 保持 新翼 豹 的 水平 就 不错 。\t内饰\n上 130 - 140 还是 很 轻松 的 , 爬坡 除外 但 也 "
},
{
"path": "data/aspect_ensemble_online/5/test.tsv",
"chars": 377097,
"preview": "text labels\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机"
},
{
"path": "data/aspect_ensemble_online/5/train.tsv",
"chars": 660426,
"preview": "text labels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\n价格 差不多 明显 车 要好 不少\t价格\n我 有 一 辆"
},
{
"path": "data/backup/test_predict_aspect_ensemble_['Mon', 'Nov', '19', '20_30_28', '2018'].txt",
"chars": 18052,
"preview": "配置\n操控\n动力\n配置\n舒适性\n舒适性\n配置\n空间\n油耗\n操控|油耗\n价格\n舒适性\n油耗\n动力\n操控\n油耗\n动力|操控\n操控\n空间\n外观\n舒适性\n动力\n操控|舒适性\n价格|空间\n空间\n操控\n配置\n动力\n内饰\n动力\n动力\n动力\n动力\n价格\n操"
},
{
"path": "data/backup/test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_48_57', '2018'].txt",
"chars": 17511,
"preview": "配置\n操控\n动力\n配置\n舒适性\n舒适性\n配置\n动力|空间\n油耗\n油耗\n价格\n舒适性\n油耗\n动力\n操控\n油耗\n动力|操控\n操控\n空间\n外观\n舒适性\n动力\n操控|舒适性\n空间\n空间\n操控\n空间\n动力\n内饰\n动力\n动力\n动力\n动力\n价格\n操控\n内"
},
{
"path": "data/backup/test_predict_aspect_ensemble_['Tue', 'Nov', '13', '21_49_23', '2018'].txt",
"chars": 18052,
"preview": "配置\n操控\n动力\n配置\n舒适性\n舒适性\n配置\n空间\n油耗\n操控|油耗\n价格\n舒适性\n油耗\n动力\n操控\n油耗\n动力|操控\n操控\n空间\n外观\n舒适性\n动力\n操控|舒适性\n价格|空间\n空间\n操控\n配置\n动力\n内饰\n动力\n动力\n动力\n动力\n价格\n操"
},
{
"path": "data/backup/test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_25_00', '2018'].txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/backup/test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_26_03', '2018'].txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/backup/test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_30_35', '2018'].txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/backup/test_predict_polarity_ensemble_['Mon', 'Nov', '19', '20_42_02', '2018'].txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/backup/test_predict_polarity_ensemble_['Tue', 'Nov', '13', '21_49_37', '2018'].txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/build_test_for_predict.py",
"chars": 915,
"preview": "import codecs\n\nf_sentence = codecs.open(\"test.txt\", encoding='utf-8').readlines()\nfw = codecs.open(\"test.tsv\", 'w', enco"
},
{
"path": "data/generate_dev.py",
"chars": 1983,
"preview": "import codecs\nimport numpy as np\nseed = 1024\n\ndef kfold_split(length, k=5):\n np.random.seed(seed)\n index_list = np"
},
{
"path": "data/generate_dev_polarity.py",
"chars": 3325,
"preview": "import codecs\nimport numpy as np\nseed = 1024\n\n\ndef kfold_split(length, k=5):\n np.random.seed(seed)\n index_list = n"
},
{
"path": "data/generate_test.py",
"chars": 1151,
"preview": "import codecs\nimport numpy as np\nseed = 1024\n\ndef kfold_split(length, k=5):\n np.random.seed(seed)\n index_list = np"
},
{
"path": "data/polarity_ensemble_online/1/dev.ind",
"chars": 12784,
"preview": "3014\n10702\n950\n6368\n5014\n4117\n1582\n2647\n2648\n222\n11081\n4255\n11055\n2060\n7715\n7716\n10677\n8949\n8950\n8749\n8712\n8713\n835\n2739"
},
{
"path": "data/polarity_ensemble_online/1/dev.tsv",
"chars": 206294,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "data/polarity_ensemble_online/1/train.tsv",
"chars": 850967,
"preview": "text\taspect\tlabels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\t0\n是的 , 油耗 到 8.6 了 。\t油耗\t0\nSE 是 EJ 发动机 , 不是 FB 的\t动力\t0\n我 "
},
{
"path": "data/polarity_ensemble_online/2/dev.ind",
"chars": 12955,
"preview": "6943\n11905\n5724\n7350\n7802\n7803\n7804\n7805\n4846\n7780\n1952\n11311\n6436\n4137\n4563\n92\n9069\n9070\n6909\n11012\n4942\n5856\n7500\n7501"
},
{
"path": "data/polarity_ensemble_online/2/dev.tsv",
"chars": 214290,
"preview": "text\taspect\tlabels\n这样 吖 … 有 时间 试试 毕竟 习惯 左手 放 7 点 左右 右手 不 放 方向盘\t操控\t0\n是的 , 油耗 到 8.6 了 。\t油耗\t0\nSE 是 EJ 发动机 , 不是 FB 的\t动力\t0\n我 "
},
{
"path": "data/polarity_ensemble_online/2/train.tsv",
"chars": 842971,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "data/polarity_ensemble_online/3/dev.ind",
"chars": 12934,
"preview": "3882\n11397\n11862\n5217\n5218\n9832\n10773\n8162\n5734\n6671\n6999\n7791\n7792\n7793\n7794\n9400\n5228\n5229\n11011\n880\n3531\n11169\n8552\n3"
},
{
"path": "data/polarity_ensemble_online/3/dev.tsv",
"chars": 207942,
"preview": "text\taspect\tlabels\n油耗 8.6 是 怎样 做到 的 , 我 的 已经 12\t油耗\t0\n是 个 四 个 轱辘 带 发动机 的 车 都 能 进 藏\t动力\t0\n空调 开 了 本来 就 会 影响 自动 启 停 , 不然 压缩机 "
},
{
"path": "data/polarity_ensemble_online/3/train.tsv",
"chars": 849319,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "data/polarity_ensemble_online/4/dev.ind",
"chars": 12887,
"preview": "4957\n4958\n11685\n3825\n3826\n3827\n3828\n1929\n3578\n1702\n1703\n6391\n12178\n12179\n7550\n10306\n55\n10991\n5713\n8897\n4162\n1965\n10212\n2"
},
{
"path": "data/polarity_ensemble_online/4/dev.tsv",
"chars": 214854,
"preview": "text\taspect\tlabels\n我 虽然 全 款 森林 人 , 但是 我 一直 在 黑 啊 , 我 是 用 大 价钱 在 卧底 啊 , 期待 有一天 长城 赶紧 的 把 富士 重工 收购 了 , 把 水平 对 置 拿 过来 我们 就 "
},
{
"path": "data/polarity_ensemble_online/4/train.tsv",
"chars": 842407,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "data/polarity_ensemble_online/5/dev.ind",
"chars": 12746,
"preview": "5046\n9404\n9835\n11429\n11430\n9839\n1039\n5066\n1975\n1232\n1040\n587\n3217\n10548\n3269\n10040\n1988\n717\n10909\n5200\n986\n2773\n10483\n80"
},
{
"path": "data/polarity_ensemble_online/5/dev.tsv",
"chars": 213695,
"preview": "text\taspect\tlabels\n平台 更新 , 外形 变化 不大 , 最 关键 的 是 内饰 肯定 能 好 不少 , 至少 能 保持 新翼 豹 的 水平 就 不错 。\t内饰\t1\n上 130 - 140 还是 很 轻松 的 , 爬坡 除"
},
{
"path": "data/polarity_ensemble_online/5/train.tsv",
"chars": 843566,
"preview": "text\taspect\tlabels\n同感 , 我 经常 开 s 60 L , 高速 很 爽 , 动力 强 . 市区 转弯 不方便 , 停车 也 麻烦 , 就是 因为 转弯 半径 太大\t动力\t1\n价格 差不多 明显 车 要好 不少\t价格\t0"
},
{
"path": "data/polarity_ensemble_online/test.tsv",
"chars": 462510,
"preview": "sentence\taspect\tlabel\nXV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t配置\t0\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来"
},
{
"path": "data/submit2.csv",
"chars": 133034,
"preview": "content_id,subject,sentiment_value,sentiment_word\n01htTx8jcsqp3CYr,配置,1,\n01kgxZUTtfODp2hr,操控,1,\n03wYj5KyOnLcmbHe,动力,0,\n0"
},
{
"path": "data/submit2.py",
"chars": 796,
"preview": "import codecs\nf_example = codecs.open(\"test_public_2.csv\", encoding='utf-8').readlines()\nfw = codecs.open(\"submit2.csv\","
},
{
"path": "data/submit2_stacking_all_bert.csv",
"chars": 133003,
"preview": "content_id,subject,sentiment_value,sentiment_word\n01htTx8jcsqp3CYr,配置,1,\n01kgxZUTtfODp2hr,操控,1,\n03wYj5KyOnLcmbHe,动力,0,\n0"
},
{
"path": "data/submit_example_2.csv",
"chars": 134306,
"preview": "content_id,subject,sentiment_value,sentiment_word\n01htTx8jcsqp3CYr,动力,-1,\n01kgxZUTtfODp2hr,动力,-1,\n03wYj5KyOnLcmbHe,动力,-1"
},
{
"path": "data/test.txt",
"chars": 377082,
"preview": "XV 新款 低 配 比 以前 低 配 配置 方面 高 了 好多 , 森林 人 应该 不会 加 的\t\n斯巴鲁 的 操控 确实 不错 , 十一 可以 开 着 我 斯巴鲁 游玩 了 , 走 起来\t\n助力 跟着 发动机 走 ? 很 奇葩 啊 ! 最"
},
{
"path": "data/test_predict_aspect_ensemble.txt",
"chars": 18052,
"preview": "配置\n操控\n动力\n配置\n舒适性\n舒适性\n配置\n空间\n油耗\n操控|油耗\n价格\n舒适性\n油耗\n动力\n操控\n油耗\n动力|操控\n操控\n空间\n外观\n舒适性\n动力\n操控|舒适性\n价格|空间\n空间\n操控\n配置\n动力\n内饰\n动力\n动力\n动力\n动力\n价格\n操"
},
{
"path": "data/test_predict_polarity_ensemble.txt",
"chars": 30011,
"preview": "配置,1\n操控,1\n动力,0\n配置,0\n舒适性,0\n舒适性,0\n配置,0\n空间,0\n油耗,0\n操控,1\n油耗,0\n价格,0\n舒适性,0\n油耗,0\n动力,0\n操控,0\n油耗,0\n动力,0\n操控,0\n操控,-1\n空间,1\n外观,0\n舒适性,0\n"
},
{
"path": "data/test_public_2.csv",
"chars": 323142,
"preview": "content_id,content\n01htTx8jcsqp3CYr,XV新款低配比以前低配配置方面高了好多,森林人应该不会加的 \n01kgxZUTtfODp2hr,斯巴鲁的操控确实不错,十一可以开着我斯巴鲁游玩了,走起来\n03wYj"
},
{
"path": "data/train.txt",
"chars": 857079,
"preview": "因为 森林 人 即将 换代 , 这套 系统 没 必要 装 在 一 款 即将 换代 的 车型 上 , 因为 肯定 会 影响 价格 。\t价格#0\n四驱 价格 貌似 挺 高 的 , 高 的 可以 看齐 XC 60 了 , 看 实 车前 脸 有点 "
},
{
"path": "dataset/attribute.json",
"chars": 759,
"preview": "{\n\t\"message\":\"\",\n\t\"value\": [\n\t\t{\n\t\t\t\"attribute1\":\"1\",\n\t\t\t\"attribute2\":\"动力\",\n\t\t\t\"attribute3\":\"0\"\n\t\t},{\n\t\t\t\"attribute1\":\"2"
},
{
"path": "dataset/clean_data.py",
"chars": 2316,
"preview": "import codecs\nimport pandas as pd\nimport numpy as np\n# import jieba\nfrom pyhanlp import HanLP\n# from nltk.tokenize.stanf"
},
{
"path": "dataset/clean_test.py",
"chars": 2039,
"preview": "import codecs\nimport pandas as pd\nimport numpy as np\n# import jieba\nfrom pyhanlp import HanLP\n# from nltk.tokenize.stanf"
},
{
"path": "dataset/polarity.json",
"chars": 271,
"preview": "{\n\t\"message\":\"\",\n\t\"value\": [\n\t\t{\n\t\t\t\"attribute1\":\"1\",\n\t\t\t\"attribute2\":\"-1\",\n\t\t\t\"attribute3\":\"1\"\n\t\t},{\n\t\t\t\"attribute1\":\"2"
},
{
"path": "dataset/submit_example_2.csv",
"chars": 134306,
"preview": "content_id,subject,sentiment_value,sentiment_word\n01htTx8jcsqp3CYr,动力,-1,\n01kgxZUTtfODp2hr,动力,-1,\n03wYj5KyOnLcmbHe,动力,-1"
},
{
"path": "dataset/test_public_2.csv",
"chars": 323142,
"preview": "content_id,content\n01htTx8jcsqp3CYr,XV新款低配比以前低配配置方面高了好多,森林人应该不会加的 \n01kgxZUTtfODp2hr,斯巴鲁的操控确实不错,十一可以开着我斯巴鲁游玩了,走起来\n03wYj"
},
{
"path": "dataset/train_2.csv",
"chars": 780791,
"preview": "content_id,content,subject,sentiment_value,sentiment_word\n13149,因为森林人即将换代,这套系统没必要装在一款即将换代的车型上,因为肯定会影响价格。,价格,0,影响\n2288,四驱"
},
{
"path": "polarity_level_aspect/ab_polarity.py",
"chars": 26627,
"preview": "import codecs\nimport sys\n\nsys.path.append(\"..\")\nfrom utils.data_helper import load_attr_data, load_w2v, load_ab_test, lo"
},
{
"path": "polarity_level_aspect/best_test.sh",
"chars": 229,
"preview": "CUDA_VISIBLE_DEVICES=2 python ab_polarity.py --mode 3 --use_elmo 2 --test_dir cp_HEAT_0#cp_AT_LSTM_0#cp_HEAT_ft2#cp_AT_L"
},
{
"path": "polarity_level_aspect/networks.py",
"chars": 12288,
"preview": "import torch\nfrom torch import nn\nimport torch.nn.functional as F\n\n\nclass WordRep(nn.Module):\n def __init__(self, voc"
},
{
"path": "utils/Data.py",
"chars": 8659,
"preview": "import torch\nimport numpy as np\n\n\nclass Data:\n def __init__(self, train_raw_data, word2index, attr_dict=None, args=No"
},
{
"path": "utils/data_helper.py",
"chars": 7889,
"preview": "import codecs\nimport numpy as np\nimport pandas as pd\n\n\n# def process_files():\n# from gensim.models.keyedvectors impo"
},
{
"path": "utils/evaluate.py",
"chars": 5642,
"preview": "import numpy as np\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall"
},
{
"path": "utils/prepare_w2v.py",
"chars": 6288,
"preview": "import codecs\nfrom gensim.models.keyedvectors import KeyedVectors\nimport numpy as np\nimport pickle\n\n# w2v_model = KeyedV"
},
{
"path": "utils/prepare_w2v_with_UNK.py",
"chars": 6336,
"preview": "import codecs\nfrom gensim.models.keyedvectors import KeyedVectors\nimport numpy as np\nimport pickle\n\n# w2v_model = KeyedV"
},
{
"path": "utils/train.py",
"chars": 8348,
"preview": "import torch\nfrom torch import nn\nimport time\nimport numpy as np\nimport torch.nn.functional as F\nimport copy\nimport matp"
},
{
"path": "utils/train2.py",
"chars": 8338,
"preview": "import torch\nfrom torch import nn\nimport time\nimport numpy as np\nimport torch.nn.functional as F\nimport copy\nimport matp"
},
{
"path": "utils/train_single.py",
"chars": 8116,
"preview": "import torch\nfrom torch import nn\nimport time\nimport numpy as np\nimport torch.nn.functional as F\nimport copy\nimport matp"
},
{
"path": "utils/utils.py",
"chars": 151,
"preview": "import time\n\n\ndef time_since(since):\n now = time.time()\n s = now - since\n m = math.floor(s / 60)\n s -= m * 6"
}
]
// ... and 111 more files (download for full content)
About this extraction
This page contains the full source code of the yilifzf/BDCI_Car_2018 GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 261 files (206.2 MB), approximately 8.0M tokens, and a symbol index with 450 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.