Full Code of INK-USC/DS-RelationExtraction for AI

master eebcfa7fd2ed cached

292 files

66.5 MB

12.6M tokens

1906 symbols

1 requests

Copy disabled (too large) Download .txt

Showing preview only (50,519K chars total). Download the full file to get everything.

Repository: INK-USC/DS-RelationExtraction
Branch: master
Commit: eebcfa7fd2ed
Files: 292
Total size: 66.5 MB

Directory structure:
gitextract_ogmutb0y/

├── .gitignore
├── LICENSE.txt
├── README.md
├── code/
│   ├── Classifier/
│   │   ├── CLPL.py
│   │   ├── Classifier.py
│   │   ├── Classifier_em.py
│   │   ├── DataIO.py
│   │   ├── HierarchySVM.py
│   │   ├── Logistic.py
│   │   ├── MulticlassSVM.py
│   │   ├── PLSVM.py
│   │   ├── Perceptron.py
│   │   ├── SVM.py
│   │   ├── TypeHierarchy.py
│   │   ├── liblinear.py
│   │   ├── liblinear.so.3
│   │   └── liblinearutil.py
│   ├── DataProcessor/
│   │   ├── Feature/
│   │   │   ├── __init__.py
│   │   │   ├── abstract_feature.py
│   │   │   ├── brown_feature.py
│   │   │   ├── dependency_feature.py
│   │   │   ├── em_brown_feature.py
│   │   │   ├── em_dependency_feature.py
│   │   │   ├── em_other_feature.py
│   │   │   ├── em_token_feature.py
│   │   │   ├── other_feature.py
│   │   │   └── token_feature.py
│   │   ├── __init__.py
│   │   ├── feature_generation.py
│   │   ├── liblinear_processor.py
│   │   ├── mention.py
│   │   ├── mention_reader.py
│   │   ├── ner_feature.py
│   │   ├── nlp_parse.py
│   │   ├── pruning_heuristics.py
│   │   └── statistic.py
│   ├── Evaluation/
│   │   ├── convertPredictionToJson.py
│   │   ├── emb_prediction.py
│   │   ├── emb_test.py
│   │   ├── evaluation.py
│   │   └── tune_threshold.py
│   └── Model/
│       ├── FCM/
│       │   ├── README.md
│       │   ├── all.sen
│       │   ├── code/
│       │   │   ├── BaseComponentModel.cpp
│       │   │   ├── BaseComponentModel.h
│       │   │   ├── Commons.h
│       │   │   ├── EmbeddingModel.cpp
│       │   │   ├── EmbeddingModel.h
│       │   │   ├── FctCoarseModel.cpp
│       │   │   ├── FctCoarseModel.h
│       │   │   ├── FctConvolutionModel.cpp
│       │   │   ├── FctConvolutionModel.h
│       │   │   ├── FctDeepModel.cpp
│       │   │   ├── FctDeepModel.h
│       │   │   ├── FeatureModel.cpp
│       │   │   ├── FeatureModel.h
│       │   │   ├── FullFctModel.cpp
│       │   │   ├── FullFctModel.h
│       │   │   ├── Instances.cpp
│       │   │   ├── Instances.h
│       │   │   ├── RE_FCT
│       │   │   ├── RE_FCT.cpp
│       │   │   ├── RE_FCT_fixed
│       │   │   ├── RE_FCT_fixed.cpp
│       │   │   ├── makefile
│       │   │   └── predict.fea.fullnerpair.onlyne.txt
│       │   ├── data/
│       │   │   ├── SemEval.test.fea.sst
│       │   │   ├── SemEval.test.keys
│       │   │   ├── SemEval.train.fea.sst
│       │   │   ├── semeval2010_task8_scorer-v1.2.pl
│       │   │   └── vectors.nyt2011.cbow.semeval.filtered
│       │   ├── filter.py
│       │   ├── gen_fmt.py
│       │   ├── gen_sen.py
│       │   ├── predict.txt
│       │   ├── process.py
│       │   ├── run.sh
│       │   ├── test.fmt
│       │   ├── test.fmt.tmp
│       │   ├── test.id
│       │   ├── test.sen
│       │   ├── train.fmt
│       │   ├── train.id
│       │   ├── train.sen
│       │   ├── vec.emb
│       │   ├── word2vec
│       │   └── word2vec.cpp
│       ├── baselines/
│       │   ├── hypenet/
│       │   │   ├── README.md
│       │   │   ├── data/
│       │   │   │   └── README.md
│       │   │   ├── evaluation.py
│       │   │   ├── helper.py
│       │   │   ├── lemmatize.py
│       │   │   ├── model.h5
│       │   │   ├── plot.py
│       │   │   ├── postprocess.py
│       │   │   ├── preprocess.py
│       │   │   ├── sdp.py
│       │   │   ├── sentence_normalize.py
│       │   │   ├── sentence_tokens.py
│       │   │   ├── shortest_dep.py
│       │   │   ├── split_baseline_data.py
│       │   │   └── test_corenlp.py
│       │   ├── sdp-lstm/
│       │   │   ├── README.md
│       │   │   ├── dependency/
│       │   │   │   ├── analyze.py
│       │   │   │   ├── data_utils.py
│       │   │   │   ├── emb_utils.py
│       │   │   │   ├── eval.py
│       │   │   │   ├── scorer.py
│       │   │   │   ├── sprnn_model.py
│       │   │   │   ├── train.py
│       │   │   │   ├── tree.py
│       │   │   │   └── utils.py
│       │   │   └── dependency-kbp/
│       │   │       ├── __init__.py
│       │   │       ├── analyze.py
│       │   │       ├── data_utils.py
│       │   │       ├── emb_utils.py
│       │   │       ├── eval.py
│       │   │       ├── scorer.py
│       │   │       ├── sprnn_model.py
│       │   │       ├── train-cv.py
│       │   │       └── utils.py
│       │   └── sentence-level-models/
│       │       ├── README.md
│       │       ├── cotype2json.py
│       │       ├── model.py
│       │       ├── models/
│       │       │   ├── bgru.py
│       │       │   ├── cnn.py
│       │       │   ├── lstm.py
│       │       │   ├── pcnn.py
│       │       │   └── position_aware_lstm.py
│       │       ├── tacred2json.py
│       │       ├── train-cv.py
│       │       ├── train.py
│       │       ├── utils.py
│       │       └── vocab.py
│       ├── dw/
│       │   ├── deepwalk-bipa.cpp
│       │   ├── deepwalk-hete-em.cpp
│       │   ├── deepwalk-hete.cpp
│       │   └── makefile
│       ├── multir/
│       │   ├── .classpath
│       │   ├── .project
│       │   ├── .settings/
│       │   │   └── org.eclipse.jdt.core.prefs
│       │   ├── LICENSE.txt
│       │   ├── README.txt
│       │   ├── annotations/
│       │   │   ├── sentential-byrelation.txt
│       │   │   └── sentential.txt
│       │   ├── lib/
│       │   │   └── protobuf-java-2.3.0.jar
│       │   ├── multiR.jar
│       │   ├── multiR_new.jar
│       │   ├── multir.jar
│       │   ├── run.sh
│       │   └── src/
│       │       ├── cc/
│       │       │   └── factorie/
│       │       │       └── protobuf/
│       │       │           └── DocumentProtos.java
│       │       └── edu/
│       │           └── uw/
│       │               └── cs/
│       │                   └── multir/
│       │                       ├── learning/
│       │                       │   ├── algorithm/
│       │                       │   │   ├── AveragedPerceptron.java
│       │                       │   │   ├── ConditionalInference.java
│       │                       │   │   ├── FullInference.java
│       │                       │   │   ├── Model.java
│       │                       │   │   ├── Parameters.java
│       │                       │   │   ├── Parse.java
│       │                       │   │   ├── Scorer.java
│       │                       │   │   └── Viterbi.java
│       │                       │   └── data/
│       │                       │       ├── Dataset.java
│       │                       │       ├── MILDocument.java
│       │                       │       └── MemoryDataset.java
│       │                       ├── main/
│       │                       │   ├── AggregatePrecisionRecallCurve.java
│       │                       │   ├── Main.java
│       │                       │   ├── Preprocess.java
│       │                       │   ├── ResultWriter.java
│       │                       │   ├── SententialPrecisionRecallByRelation.java
│       │                       │   ├── SententialPrecisionRecallCurve.java
│       │                       │   ├── Test.java
│       │                       │   └── Train.java
│       │                       ├── preprocess/
│       │                       │   ├── ConvertProtobufToMILDocument.java
│       │                       │   └── Mappings.java
│       │                       └── util/
│       │                           ├── DenseVector.java
│       │                           └── SparseBinaryVector.java
│       ├── pte/
│       │   ├── line.cpp
│       │   ├── linelib.cpp
│       │   ├── linelib.h
│       │   ├── makefile
│       │   ├── pte-hete.cpp
│       │   ├── ransampl.c
│       │   └── ransampl.h
│       ├── retype/
│       │   ├── hplelib.cpp
│       │   ├── hplelib.h
│       │   ├── makefile
│       │   ├── ransampl.c
│       │   ├── ransampl.h
│       │   ├── retype
│       │   ├── retype-rm
│       │   ├── retype-rm.cpp
│       │   └── retype.cpp
│       ├── seq-kernel/
│       │   ├── gen_data.py
│       │   ├── gen_fmt.py
│       │   ├── gen_lb.py
│       │   ├── gen_sen.py
│       │   ├── libsvm/
│       │   │   ├── COPYRIGHT
│       │   │   ├── FAQ.html
│       │   │   ├── Makefile
│       │   │   ├── Makefile.win
│       │   │   ├── README
│       │   │   ├── heart_scale
│       │   │   ├── java/
│       │   │   │   ├── Makefile
│       │   │   │   ├── libsvm/
│       │   │   │   │   ├── svm.java
│       │   │   │   │   ├── svm.m4
│       │   │   │   │   ├── svm_model.java
│       │   │   │   │   ├── svm_node.java
│       │   │   │   │   ├── svm_parameter.java
│       │   │   │   │   ├── svm_print_interface.java
│       │   │   │   │   └── svm_problem.java
│       │   │   │   ├── libsvm.jar
│       │   │   │   ├── svm_predict.java
│       │   │   │   ├── svm_scale.java
│       │   │   │   ├── svm_toy.java
│       │   │   │   ├── svm_train.java
│       │   │   │   └── test_applet.html
│       │   │   ├── matlab/
│       │   │   │   ├── Makefile
│       │   │   │   ├── README
│       │   │   │   ├── libsvmread.c
│       │   │   │   ├── libsvmwrite.c
│       │   │   │   ├── make.m
│       │   │   │   ├── svm_model_matlab.c
│       │   │   │   ├── svm_model_matlab.h
│       │   │   │   ├── svmpredict.c
│       │   │   │   └── svmtrain.c
│       │   │   ├── model.txt
│       │   │   ├── predict.txt
│       │   │   ├── python/
│       │   │   │   ├── Makefile
│       │   │   │   ├── README
│       │   │   │   ├── svm.py
│       │   │   │   └── svmutil.py
│       │   │   ├── svm-predict
│       │   │   ├── svm-predict.c
│       │   │   ├── svm-scale
│       │   │   ├── svm-scale.c
│       │   │   ├── svm-toy/
│       │   │   │   ├── gtk/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── callbacks.cpp
│       │   │   │   │   ├── callbacks.h
│       │   │   │   │   ├── interface.c
│       │   │   │   │   ├── interface.h
│       │   │   │   │   ├── main.c
│       │   │   │   │   └── svm-toy.glade
│       │   │   │   ├── qt/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   └── svm-toy.cpp
│       │   │   │   └── windows/
│       │   │   │       └── svm-toy.cpp
│       │   │   ├── svm-train
│       │   │   ├── svm-train.c
│       │   │   ├── svm.cpp
│       │   │   ├── svm.def
│       │   │   ├── svm.h
│       │   │   ├── tools/
│       │   │   │   ├── README
│       │   │   │   ├── checkdata.py
│       │   │   │   ├── easy.py
│       │   │   │   ├── grid.py
│       │   │   │   └── subset.py
│       │   │   └── windows/
│       │   │       ├── libsvmread.mexw64
│       │   │       ├── libsvmwrite.mexw64
│       │   │       ├── svmpredict.mexw64
│       │   │       └── svmtrain.mexw64
│       │   ├── process.py
│       │   ├── run.sh
│       │   ├── ssk_core/
│       │   │   ├── base.txt
│       │   │   ├── hs_err_pid12502.log
│       │   │   ├── hs_err_pid16669.log
│       │   │   ├── hs_err_pid18953.log
│       │   │   ├── infer.txt
│       │   │   ├── libsvm/
│       │   │   │   ├── CustomKernel.java
│       │   │   │   ├── Makefile
│       │   │   │   ├── libsvm.jar
│       │   │   │   ├── svm.java
│       │   │   │   ├── svm_model.java
│       │   │   │   ├── svm_node.java
│       │   │   │   ├── svm_parameter.java
│       │   │   │   └── svm_problem.java
│       │   │   ├── readme.txt
│       │   │   └── ssk/
│       │   │       ├── FeatureDictionary.java
│       │   │       ├── InstanceExample.java
│       │   │       ├── Makefile
│       │   │       ├── SubsequenceKernel.java
│       │   │       ├── intex_node.java
│       │   │       └── ssk.jar
│       │   ├── test.lb
│       │   ├── test.sen
│       │   ├── test.txt
│       │   ├── train.shuf
│       │   ├── train.smp
│       │   ├── train.smp.lb
│       │   └── train.smp.sen
│       └── warp/
│           ├── makefile
│           └── warp.cpp
├── data/
│   └── source/
│       ├── BioInfer/
│       │   └── README.md
│       ├── KBP/
│       │   └── README.md
│       └── NYT/
│           └── README.md
└── run.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
**/eigen-3.2.5/
*.pyc
*.DS_Store
*.o
*.zip
DataProcessor/stanford-corenlp-python/
Intermediate/*
Results/*
Data/*


================================================
FILE: LICENSE.txt
================================================
MIT License

Copyright (c) [year] [fullname]

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# USC Distantly-supervised Relation Extraction System
This repository puts together recent models and data sets for **sentence-level relation extraction** *using knowledge bases (i.e., distant supervision)*. In particular, it contains the source code for WWW'17 paper *[CoType: Joint Extraction of Typed Entities and Relations with Knowledge Bases](https://arxiv.org/pdf/1610.08763.pdf)*.

**Please also check out our new repository on [handling shifted label distribution in distant supervision](https://github.com/INK-USC/shifted-label-distribution)**

**Task**: Given a text corpus with entity mentions *detected* and *heuristically labeled* using distant supervision, the task aims to identify relation types/labels between a pair of entity mentions based on the sentence context where they co-occur.

## Quick Start
- [Blog Posts](#blog-posts)
- [Data](#data)
- [Benchmark](#benchmark)
- [Usage](#usage)
- [Customized Run](#customized-run)
- [Baselines](#baselines)
- [References](#references)
- [Contributors](#contributors)

## Blog Posts
* [08/2017] [Indirect Supervision for Relation Extraction Using Question-Answer Pairs](https://ellenmellon.github.io/ReQuest/)
* [08/2016] [Heterogeneous Supervision for Relation Extraction](https://liyuanlucasliu.github.io/ReHession/)


## Data
For evaluating on sentence-level extraction, we [processed](https://github.com/shanzhenren/StructMineDataPipeline) (using our [data pipeline](https://github.com/shanzhenren/StructMineDataPipeline)) three public datasets to our JSON format. We ran [Stanford NER](https://nlp.stanford.edu/software/CRF-NER.shtml) on training set to detect entity mentions, mapped entity names to Freebase entities using [DBpediaSpotlight](https://github.com/dbpedia-spotlight/dbpedia-spotlight), aligned Freebase facts to sentences, and assign entity types of Freebase entities to their mapped names in sentences:

   * **PubMed-BioInfer**: 100k PubMed paper abstracts as training data and 1,530 manually labeled biomedical paper abstracts from [BioInfer](http://mars.cs.utu.fi/BioInfer/) ([Pyysalo et al., 2007](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-8-50)) as test data. It consists of 94 relation types (protein-protein interactions) and over 2,000 entity types (from MESH ontology). ([Download](https://drive.google.com/drive/folders/0B--ZKWD8ahE4RmFBTjR6aUJjTkU?usp=sharing))
   
   * **NYT-manual**: 1.18M sentences sampled from 294K New York Times news articles which were then aligned with Freebase facts by ([Riedel et al., ECML'10](https://pdfs.semanticscholar.org/db55/0f7af299157c67d7f1874bf784dca10ce4a9.pdf)) ([link](http://iesl.cs.umass.edu/riedel/ecml/) to Riedel's data). For test set, 395 sentences are manually annotated with 24 relation types and 47 entity types ([Hoffmann et al., ACL'11](http://raphaelhoffmann.com/publications/acl2011.pdf)) ([link](http://raphaelhoffmann.com/mr/) to Hoffmann's data). ([Download](https://drive.google.com/drive/folders/0B--ZKWD8ahE4UktManVsY1REOUk?usp=sharing))
   
   * **Wiki-KBP**: the training corpus contains 1.5M sentences sampled from 780k [Wikipedia articles](https://github.com/xiaoling/figer) ([Ling & Weld, 2012](http://xiaoling.github.io/pubs/ling-aaai12.pdf)) plus ~7,000 sentences from 2013 KBP corpus. Test data consists of 14k system-labeled sentences from [2013 KBP slot filling](http://surdeanu.info/kbp2013/) assessment results. It has 7 relation types and 126 entity types after filtering of numeric value relations. ([Download](https://drive.google.com/drive/folders/0B--ZKWD8ahE4RjFLUkVQTm93WVU?usp=sharing))

Please put the data files in corresponding subdirectories under `data/source`



## Benchmark
Performance comparison with several *relation extraction* systems over KBP 2013 dataset (**sentence-level extraction**). 

Method | Precision | Recall | F1 
-------|-----------|--------|----
Mintz (our implementation, [Mintz et al., 2009](http://web.stanford.edu/~jurafsky/mintz.pdf)) | 0.296 | 0.387 | 0.335 
LINE + Dist Sup ([Tang et al., 2015](https://arxiv.org/pdf/1503.03578.pdf)) | **0.360** | 0.257 | 0.299 
MultiR ([Hoffmann et al., 2011](http://raphaelhoffmann.com/publications/acl2011.pdf)) | 0.325 | 0.278 | 0.301 
FCM + Dist Sup ([Gormley et al., 2015](http://www.aclweb.org/anthology/D15-1205)) | 0.151 | 0.498 | 0.300 
HypeNet (our implementation, [Shwartz et al., 2016](http://www.aclweb.org/anthology/P16-1226)) | 0.210 | 0.315 | 0.252
CNN (our implementation, [Zeng et at., 2014](http://www.aclweb.org/anthology/C14-1220))| 0.198 | 0.334 | 0.242
PCNN (our implementation, [Zeng et at., 2015](http://www.aclweb.org/anthology/D15-1203))| 0.220 | 0.452 | 0.295
LSTM (our implementation) | 0.274 | 0.500 | 0.350
Bi-GRU (our implementation) | 0.301 | 0.465 | 0.362
SDP-LSTM (our implementation, [Xu et at., 2015](http://www.aclweb.org/anthology/D15-1206)) | 0.300 | 0.436 | 0.356
Position-Aware LSTM ([Zhang et al., 2017](http://www.aclweb.org/anthology/D17-1004))| 0.265 | **0.598** | 0.367
CoType-RM ([Ren et al., 2017](https://arxiv.org/pdf/1610.08763v1.pdf)) | 0.303 | 0.407 | 0.347
**CoType** ([Ren et al., 2017](https://arxiv.org/pdf/1610.08763v1.pdf)) | 0.348 | 0.406 | **0.369**

**Note**: for models that trained on sentences annotated with a single label (HypeNet, CNN/PCNN, LSTM, SDP/PA-LSTMs, Bi-GRU), we form one training instance for each sentence-label pair based on their DS-annotated data.

## Usage

### Dependencies
We will take Ubuntu for example.

* python 2.7
* Python library dependencies
```
$ pip install pexpect ujson tqdm
```

* [stanford coreNLP 3.7.0](http://stanfordnlp.github.io/CoreNLP/) and its [python wrapper](https://github.com/stanfordnlp/stanza). Please put the library under `code/DataProcessor/'.

```
$ cd code/DataProcessor/
$ git clone git@github.com:stanfordnlp/stanza.git
$ cd stanza
$ pip install -e .
$ wget http://nlp.stanford.edu/software/stanford-corenlp-full-2016-10-31.zip
$ unzip stanford-corenlp-full-2016-10-31.zip
```
* [eigen 3.2.5](http://bitbucket.org/eigen/eigen/get/3.2.5.tar.bz2) (already included). 

We have included compilied binaries. If you need to re-compile `retype.cpp` under your own g++ environment
```
$ cd code/Model/retype; make
```

### Default Run
As an example, we show how to run CoType on the Wiki-KBP dataset

Start the Stanford corenlp server for the python wrapper.
```
$ java -mx4g -cp "code/DataProcessor/stanford-corenlp-full-2016-10-31/*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer
```

Feature extraction, embedding learning on training data, and evaluation on test data.
```
$ ./run.sh  
```

For relation classification, the "none"-labeled instances need to be first removed from train/test JSON files. The hyperparamters for embedding learning are included in the run.sh script.

### Parameters
Dataset to run on.
```
Data="KBP"
```
- Hyperparameters for *relation extraction*:
```
- KBP: -negative 3 -iters 400 -lr 0.02 -transWeight 1.0
- NYT: -negative 5 -iters 700 -lr 0.02 -transWeight 7.0
- BioInfer: -negative 5 -iters 700 -lr 0.02 -transWeight 7.0
```
Hyperparameters for *relation classification* are included in the run.sh script.

### Evaluation
Evaluates relation extraction performance (precision, recall, F1): produce predictions along with their confidence score; filter the predicted instances by tuning the thresholds.
```
$ python code/Evaluation/emb_test.py extract KBP retype cosine 0.0
$ python code/Evaluation/tune_threshold.py extract KBP emb retype cosine
```

### In-text Prediction
The last command in *run.sh* generates json file for predicted results, in the same format as test.json in data/source/$DATANAME, except that we only output the predicted relation mention labels. Replace the second parameter with whatever threshold you would like.
```
$ python code/Evaluation/convertPredictionToJson.py $Data 0.0
```

## Customized Run
Code for producing the JSON files from a raw corpus for running CoType and baseline models is [here](https://github.com/shanzhenren/StructMineDataPipeline).

## Baselines
You can find our implementation of some recent relation extraction models under the [Code/Model/](https://github.com/shanzhenren/DS-RelationExtraction/tree/master/code/Model) directory.

## References
* Xiang Ren, Zeqiu Wu, Wenqi He, Meng Qu, Clare R. Voss, Heng Ji, Tarek F. Abdelzaher, Jiawei Han. "[CoType: Joint Extraction of Typed Entities and Relations with Knowledge Bases](https://arxiv.org/pdf/1610.08763.pdf)", WWW 2017.
* Meng Qu, Xiang Ren, Yu Zhang, Jiawei Han. “[Weakly-supervised Relation Extraction by Pattern-enhanced Embedding Learning](https://arxiv.org/abs/1711.03226)”, WWW 2018.
* Liyuan Liu*, Xiang Ren*, Qi Zhu, Shi Zhi, Huan Gui, Heng Ji, Jiawei Han. "[Heterogeneous Supervision for Relation Extraction: A Representation Learning Approach](https://arxiv.org/abs/1707.00166)", EMNLP 2017.
* Ellen Wu, Xiang Ren, Frank Xu, Ji Li, Jiawei Han. "[Indirect Supervision for Relation Extraction using Question-Answer Pairs](https://arxiv.org/abs/1710.11169)", WSDM 2018.


## Contributors
* Ellen Wu
* Meng Qu
* Frank Xu
* Wenqi He
* Maosen Zhang
* Qinyuan Ye
* Xiang Ren


================================================
FILE: code/Classifier/CLPL.py
================================================
__author__ = 'wenqihe'


import sys
import random


class CLPL:

    def __init__(self, feature_size, label_size, type_hierarchy, lambda_reg=0.1, max_iter=500, threshold=0.0, batch_size=1000, sample_size=10):
        self._feature_size = feature_size
        self._label_size = label_size
        self._type_hierarchy = type_hierarchy
        self._threshold = threshold
        self._sample_size = sample_size
        self._svm = Pegasos(feature_size*label_size, lambda_reg, max_iter, batch_size)

    def fit(self, train_x, train_y):
        """
        train_x = list of list
        train_y = list of list
        :param x:
        :param y:
        :return:
        """
        new_train_x = []
        new_train_value = []
        new_train_y = []
        for i in xrange(len(train_y)):
            x = train_x[i]
            y = train_y[i]
            # print x
            # print y
            ny = [k for k in range(self._label_size) if k not in y]
            # add positive examples
            new_x = []
            val_x = []
            for label in y:
                for feature in x:
                    new_x.append(feature+self._feature_size * label)
                    val_x.append(1.0/len(y))
            new_train_x.append(new_x)
            new_train_value.append(val_x)
            new_train_y.append(1)
            # sample negative examples
            sample_nys = random.sample(ny, self._sample_size)

            for sample_ny in sample_nys:
                new_x = []
                val_x = []
                for feature in x:
                    new_x.append(feature+self._feature_size * sample_ny)
                    val_x.append(1.0)
                new_train_x.append(new_x)
                new_train_value.append(val_x)
                new_train_y.append(-1)
        print 'Start train svm with %d examples'%len(new_train_y)
        self._svm.fit(new_train_x, new_train_value, new_train_y)

    def predict(self, x):
        labels = set()
        parent_mapping = self._type_hierarchy._type_hierarchy
        scores = []
        max_score = -sys.maxint
        max_index = -1

        for label in range(self._label_size):
                new_x = []
                val_x = []
                for feature in x:
                    new_x.append(feature+self._feature_size * label)
                    val_x.append(1.0)
                value = self._svm.predict_prob(new_x, val_x)
                scores.append(value)
                if value > max_score:
                    max_score = value
                    max_index = label
        labels.add(max_index)
        # Add parent of max_index if any
        temp = max_index
        while temp in parent_mapping:
            labels.add(parent_mapping[temp])
            temp = parent_mapping[temp]
        # add child of max_index if meeting threshold
        temp = max_index
        while temp != -1:
            max_sub_index = -1
            max_sub_score = -sys.maxint
            for child in parent_mapping:
                # check the maximum subtype
                if parent_mapping[child] == temp:
                    if child < self._label_size:
                        if max_sub_score < scores[child]:
                            max_sub_index = child
                            max_sub_score = scores[child]
            if max_sub_index != -1 and max_sub_score > self._threshold:
                labels.add(max_sub_index)
            temp = max_sub_index
        return labels


class Pegasos:

    def __init__(self, feature_size, lambda_reg=0.1, max_iter=500, batch_size=1000):
        self._feature_size = feature_size
        self._weight = [0 for col in range(feature_size)]
        self._lambda_reg = lambda_reg
        self._max_iter = max_iter
        self._batch_size = batch_size

    def fit(self, train_x, val_x, train_y):
        """
        :param train_x: list of list
        :param val_x: list of list
        :param train_y: list of 1/-1
        :return:
        """
        m = len(train_y)
        for t in xrange(1, self._max_iter):
            # randomly choose an example
            for temp in xrange(self._batch_size):
                i = random.randint(0, m-1)
                x = train_x[i]
                val = val_x[i]
                y = train_y[i]
                eta_t = 1.0/(self._lambda_reg*t)
                p = self.predict_prob(x, val)
                if y*p < 1:
                    for k in xrange(len(x)):
                        feature = x[k]
                        self._weight[feature] = (1-eta_t*self._lambda_reg)*self._weight[feature]+eta_t*y*val[k]
                else:
                    for feature in x:
                        self._weight[feature] *= (1-eta_t*self._lambda_reg)
            sys.stdout.write('{0} iteration done.\r'.format(t))
            sys.stdout.flush()

    def predict(self, x, val):
        prob = self.predict_prob(x, val)
        if prob >= 0:
            return 1
        else:
            return -1

    def L2_regularize(self, eta_t):
        scaling_factor = 1.0 - (eta_t * self._lambda_reg)
        if scaling_factor < MIN_SCALING_FACTOR:
            scaling_factor = MIN_SCALING_FACTOR
        for i in xrange(self._feature_size):
            self._weight[i] *= scaling_factor

    def predict_prob(self, x, val):
        result = 0.0
        for k in xrange(len(x)):
            feature = x[k]
            result += self._weight[feature] * val[k]
        # return result+self._threshold
        return result

================================================
FILE: code/Classifier/Classifier.py
================================================
__author__ = 'xiang'
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import time
import json
from DataIO import *
from Perceptron import MultilabelPerceptron
from HierarchySVM import HierarchySVM
from PLSVM import PLSVM
from CLPL import CLPL
from Logistic import Logistic
from TypeHierarchy import TypeSet # TypeHierarchy,

def classify(classifier, feature_size, label_size, train_x, train_y, learning_rate, max_iter, type_hierarchy):
    model = None
    timestamp1 = time.time()
    if classifier == 'perceptron':
        model = MultilabelPerceptron(feature_size=_feature_size,
                                     label_size=_label_size,
                                     learning_rate=_learning_rate,
                                     max_iter=_max_iter,
                                     threshold=_threshold)
    if classifier == 'plsvm':
        model = PLSVM(feature_size=feature_size, label_size=label_size, type_hierarchy=type_hierarchy, lambda_reg=0.1, max_iter=max_iter, threshold=0, batch_size=1000)
    # if classifier == 'clpl':
    #     model = CLPL(feature_size=feature_size, label_size=label_size, type_hierarchy=type_hierarchy, lambda_reg=0.1, max_iter=max_iter, threshold=10, batch_size=1000)
    if classifier == 'svm-pegasos':
        model = HierarchySVM(feature_size=feature_size, type_hierarchy=type_hierarchy._subtype_mapping, current_types=type_hierarchy._root, level=0, lambda_reg=learning_rate, max_iter=max_iter, threshold=-100)
    if classifier == 'logistic':
        model = Logistic(feature_size=_feature_size, label_size=_label_size, threshold=_threshold)
    if model:
        model.fit(train_x, train_y)
    else:
        print 'Wrong classifier name given!'
        exit(0)
    timestamp2 = time.time()
    print "This took %f seconds" % (timestamp2 - timestamp1)
    return model

def predict_em(model, test_x, type_hierarchy, _threshold):
    MultilabelPerceptron.threshold = _threshold
    Logistic.threshold = _threshold
    test_y = []
    type_distrubtion = {}
    for i in xrange(len(test_x)):
        x = test_x[i]
        labels = model.predict(x)
        parents = set()
        for l in labels:
            p = type_hierarchy.get_type_path(l)
            if len(p) > 1:
                parents.update(p)
        labels.update(parents)
        test_y.append(labels) # "labels" could be empty set (see predict in Perceptron.py)
        for l in labels:
            if l in type_distrubtion:
                type_distrubtion[l]+=1
            else:
                type_distrubtion[l] = 1
    print 'type distribution', type_distrubtion
    return test_y


def predict(model, test_x, type_hierarchy, _threshold):
    MultilabelPerceptron.threshold = _threshold
    Logistic.threshold = _threshold
    test_y = []
    type_distrubtion = {}
    for i in xrange(len(test_x)):
        x = test_x[i]
        labels = model.predict(x)
        test_y.append(labels) # "labels" could be empty set (see predict in Perceptron.py)
        for pair in labels:
            l = pair[0]
            if l in type_distrubtion:
                type_distrubtion[l]+=1
            else:
                type_distrubtion[l] = 1
    print 'type distribution', type_distrubtion
    return test_y


def casestudy(filename, output, mention_mapping, label_mapping, clean_mentions):
    with open(filename) as f, open(output, 'w') as g:
        for line in f:
            sent = json.loads(line.strip('\r\n'))
            result = putback(sent, mention_mapping, label_mapping, clean_mentions)
            if result is not '':
                g.write(result+'\n')


def putback(sent_json, mention_mapping, label_mapping, clean_mentions):
    fileid = sent_json['fileid']
    senid = sent_json['senid']
    tokens = sent_json['tokens']
    pivot = 0
    result = []
    mentions = sent_json['mentions']
    sorted_m = sorted(mentions, cmp=compare)
    for m in sorted_m:
        start = m['start']
        end = m['end']
        if end - start == 1:
            mention_name = '[%s]' % (tokens[start])
        else:
            mention_name = '[%s]' % (' '.join(tokens[start:end]))
        if pivot <= start:
            result.extend(tokens[pivot:start])
            result.append(mention_name)
            # find predicted labels if any
            m_name = '%s_%d_%d_%d'%(fileid, senid, start, end)
            if m_name in mention_mapping:
                m_id = mention_mapping[m_name]
                if m_id in clean_mentions:
                    clean_labels = [label_mapping[l] for l in clean_mentions[m_id]]
                    result.append(':'+'['+','.join(clean_labels)+']')
        pivot = end
    if pivot < len(tokens):
        result.extend(tokens[pivot:])
    result = ' '.join([x for x in result if x is not None])
    return fileid+':'+str(senid)+'\t'+result + '\n'

def compare(item1, item2):
    if item1['start'] != item2['start']:
        return item1['start'] - item2['start']
    else:
        return item2['end'] - item1['end']


if __name__ == "__main__":
    if len(sys.argv) != 6:
        print 'Usage: Classifier.py -CLASSIFIER (perceptron) -DATA(nyt_candidates) -LEARNING_RATE(0.003) -MAX_ITER(20) -THRESHOLD'
        exit(-1)

    model_name = sys.argv[1]
    indir = 'data/intermediate/' + sys.argv[2] + '/rm'
    outdir = 'data/results/' + sys.argv[2] + '/rm'

    train_x_file = indir + '/mention_feature.txt'
    train_y_file = indir + '/mention_type.txt'

    test_x_file = indir + '/mention_feature_test.txt'
    test_y_file = outdir + '/prediction_' + model_name + '_null_null.txt'
    output = outdir + '/predictionInText_' + model_name +'_null_null.txt'

    # hierarchy_file = indir + '/supertype.txt'
    feature_file = indir + '/feature.txt'
    type_file = indir + '/type.txt'
    mention_file = indir + '/mention.txt'
    json_file = indir + '/test_new.json'

    _learning_rate = float(sys.argv[3])
    _max_iter = int(sys.argv[4])
    _threshold = float(sys.argv[5])

    _feature_size = file_len(feature_file)
    _label_size = file_len(type_file)
    print 'Feature: %d, type: %d' %(_feature_size, _label_size)

    train_x = load_as_list(train_x_file)
    train_y = load_as_list(train_y_file)

    ### Train
    assert len(train_x[1]) == len(train_y[1])
    print 'Total number of training examples: %d' % len(train_x[1])
    print 'Start training'
    type_hierarchy = TypeSet(type_file, _label_size)
    model = classify(model_name, _feature_size, _label_size, train_x[1], train_y[1], _learning_rate, _max_iter, type_hierarchy)

    ### Test
    indexes, test_x = load_as_list(test_x_file)
    test_y = predict(model, test_x, type_hierarchy, _threshold)
    # save_from_list(test_y_file, indexes, test_y)
    save_from_tuples(test_y_file, indexes, test_y)

    ### Write inText Results
    # mention_mapping = load_map(mention_file, 'mention')
    # label_mapping = load_map(type_file, 'label')
    # clean_mentions = load_mention_type(test_y_file)
    # casestudy(json_file, output, mention_mapping, label_mapping, clean_mentions)



================================================
FILE: code/Classifier/Classifier_em.py
================================================
__author__ = 'xiang'
import sys
reload(sys)
sys.setdefaultencoding('utf8')

import json
from DataIO import *
from Perceptron import MultilabelPerceptron
from HierarchySVM import HierarchySVM
from PLSVM import PLSVM
from CLPL import CLPL
from Logistic import Logistic
from TypeHierarchy import TypeHierarchy

def classify(classifier, feature_size, label_size, train_x, train_y, learning_rate, max_iter, type_hierarchy):
	model = None
	if classifier == 'perceptron':
		model = MultilabelPerceptron(feature_size=_feature_size,
									 label_size=_label_size,
									 learning_rate=_learning_rate,
									 max_iter=_max_iter,
									 threshold=_threshold)
	if classifier == 'plsvm':
		model = PLSVM(feature_size=feature_size, label_size=label_size, type_hierarchy=type_hierarchy, lambda_reg=0.1, max_iter=max_iter, threshold=0, batch_size=1000)
	if classifier == 'svm-pegasos':
		model = HierarchySVM(feature_size=feature_size, type_hierarchy=type_hierarchy._subtype_mapping, current_types=type_hierarchy._root, level=0, lambda_reg=learning_rate, max_iter=max_iter, threshold=-100)
	if classifier == 'logistic':
		model = Logistic(feature_size=_feature_size, label_size=_label_size, threshold=_threshold)
	if model:
		model.fit_em(train_x, train_y)
	else:
		print 'Wrong classifier name given!'
		exit(0)

	return model

def predict_em(model, test_x, type_hierarchy, _threshold):
	test_y = []
	type_distrubtion = {}
	for i in xrange(len(test_x)):
		x = test_x[i]
		labels = model.predict_em(x)
		parents = set()
		for l in labels:
			p = type_hierarchy.get_type_path(l)
			if len(p) > 1:
				parents.update(p)
		labels.update(parents)
		test_y.append(labels) # "labels" could be empty set (see predict in Perceptron.py)
		for l in labels:
			if l in type_distrubtion:
				type_distrubtion[l]+=1
			else:
				type_distrubtion[l] = 1
	# print 'type distribution', type_distrubtion
	return test_y


if __name__ == "__main__":
	if len(sys.argv) != 6:
		print 'Usage: Classifier_em.py -CLASSIFIER (perceptron) -DATA(nyt_candidates) -LEARNING_RATE(0.003) -MAX_ITER(20) -THRESHOLD'
		exit(-1)

	model_name = sys.argv[1]
	indir = 'data/intermediate/' + sys.argv[2] + '/em'
	outdir = 'data/results/' + sys.argv[2] + '/em'

	train_x_file = indir + '/mention_feature.txt'
	train_y_file = indir + '/mention_type.txt'

	test_x_file = indir + '/mention_feature_test.txt'
	test_y_file = outdir + '/prediction_' + model_name + '_null_null.txt'

	hierarchy_file = indir + '/supertype.txt'
	feature_file = indir + '/feature.txt'
	type_file = indir + '/type.txt'
	mention_file = indir + '/mention.txt'
	json_file = indir + '/test_new.json'

	_learning_rate = float(sys.argv[3])
	_max_iter = int(sys.argv[4])
	_threshold = float(sys.argv[5])

	_feature_size = file_len(feature_file)
	_label_size = file_len(type_file)
	print '#Features: %d, #Types: %d' %(_feature_size, _label_size)

	train_x = load_as_list(train_x_file)
	train_y = load_as_list(train_y_file)

	### Train
	assert len(train_x[1]) == len(train_y[1])
	print 'Total number of training examples: %d' % len(train_x[1])
	print 'Start training'
	type_hierarchy = TypeHierarchy(hierarchy_file, _label_size)
	model = classify(model_name, _feature_size, _label_size, train_x[1], train_y[1], _learning_rate, _max_iter, type_hierarchy)

	### Test
	indexes, test_x = load_as_list(test_x_file)
	test_y = predict_em(model, test_x, type_hierarchy, _threshold)
	save_from_list(test_y_file, indexes, test_y)
	# save_from_tuples(test_y_file, indexes, test_y)

	### Write inText Results
	# mention_mapping = load_map(mention_file, 'mention')
	# label_mapping = load_map(type_file, 'label')
	# clean_mentions = load_mention_type(test_y_file)
	# casestudy(json_file, output, mention_mapping, label_mapping, clean_mentions)



================================================
FILE: code/Classifier/DataIO.py
================================================
__author__ = 'wenqihe'
from collections import defaultdict

def load_as_list(filename):
    """
    Load data as a list of list.
    e.g.[[0,1,2],[1,2]]
    """
    with open(filename) as f:
        data = []
        indexes = []
        line = f.readline()
        seg = line.strip('\r\n').split('\t')
        index = int(seg[0])
        features = [int(seg[1])]
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if index == int(seg[0]):  # Still in the same mention
                features.append(int(seg[1]))
            else:
                # Append to train_x
                data.append(sorted(features))
                indexes.append(index)
                features = [int(seg[1])]
                index = int(seg[0])
        if len(features) > 0:
            data.append(sorted(features))
            indexes.append(index)
        return indexes, data

def save_from_tuples(filename, indexes, data):
    """
    Save data(a list of list) to a file.
    :param filename:
    :param data:
    :return:
    """
    with open(filename, 'w') as f:
        for i in xrange(len(indexes)):
            index = indexes[i]
            labels = data[i]
            if len(labels) > 0:  ### only detected RMs are written
                for pair in labels:
                    f.write(str(index) + '\t' +str(pair[0]) + '\t' + str(pair[1]) + '\n')


def save_from_list(filename, indexes, data):
    """
    Save data(a list of list) to a file.
    :param filename:
    :param data:
    :return:
    """
    with open(filename, 'w') as f:
        for i in xrange(len(indexes)):
            index = indexes[i]
            labels = data[i]
            if len(labels) > 0:  ### only detected RMs are written
                for l in labels:
                    f.write(str(index) + '\t' +str(l) + '\t1\n')

def load_as_dict(filename):
    with open(filename) as f:
        data = []
        indexes = []
        line = f.readline()
        seg = line.strip('\r\n').split('\t')
        index = int(seg[0])
        features = {(int(seg[1])+1): 1}
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if index == int(seg[0]):  # Still in the same mention
                features[(int(seg[1])+1)] = 1
            else:
                # Append to train_x
                data.append(features)
                indexes.append(index)
                features = {(int(seg[1])+1): 1}
                index = int(seg[0])
        if len(features) > 0:
            data.append(features)
            indexes.append(index)
        return indexes, data


def load_map(filename, mode):
    with open(filename) as f:
        mapping = {}
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if mode == 'mention':
                mapping[seg[0]] = seg[1]
            elif mode == 'label':
                mapping[seg[1]] = seg[0]
        return mapping

def load_mention_type(filename):
    with open(filename) as f:
        mapping = defaultdict(set)
        for line in f:
            seg = line.strip('\r\n').split('\t')
            mapping[seg[0]].add(seg[1])
        return mapping

def file_len(filename):
    with open(filename) as f:
        for i, l in enumerate(f):
            pass
    return i + 1


================================================
FILE: code/Classifier/HierarchySVM.py
================================================
__author__ = 'wenqihe'

from MulticlassSVM import MulticlassSVM


class HierarchySVM:

    def __init__(self, feature_size, type_hierarchy, current_types, level=0, lambda_reg=0.1, max_iter=5000, threshold=0.1):
        if level ==0 :
            self._svm = MulticlassSVM(feature_size, len(current_types), lambda_reg, max_iter, 'ova')
        else:
            self._svm = MulticlassSVM(feature_size, len(current_types)+1, lambda_reg, max_iter, 'ova')
        self._typemapping = {}  # map type_id to class_id in this level
        self._classmapping = {}  # map class_id to type_id in this level
        self._children = {}  # map type_id to subtype classifier if exits
        self._level = level
        self._threshold = threshold
        class_id = 0
        # add other class
        if level != 0:
            self._typemapping[-1] = class_id
            self._classmapping[class_id] = -1
            class_id += 1
        for t in current_types:
            self._typemapping[t] = class_id
            self._classmapping[class_id] = t
            # check if t has subtypes
            if t in type_hierarchy:
                self._children[t] = HierarchySVM(feature_size, type_hierarchy, type_hierarchy[t], level+1, lambda_reg, max_iter)
            class_id += 1

    def fit_em(self, train_x, train_y):
        """
        row = [0]*len(x)
        data = [1]*len(x)
        train_x = list of list
        train_y = list of list
        :param x:
        :param y:
        :return:
        """
        new_train_x = []
        new_train_y = []
        for i in xrange(len(train_y)):
            x = train_x[i]
            y = train_y[i]
            flag = True
            for l in y:
                if l in self._typemapping:
                    flag = False
                    new_train_x.append(x)
                    new_train_y.append(self._typemapping[l])
            if flag:
                new_train_x.append(x)
                new_train_y.append(0)
        if len(new_train_y)>0:
            self._svm.fit(new_train_x, new_train_y)

        # train children svm
        for child in self._children:
            new_train_x = []
            new_train_y = []
            for i in xrange(len(train_y)):
                x = train_x[i]
                y = train_y[i]
                if child in y:
                    new_train_x.append(x)
                    new_train_y.append(y)
            print "Train child svm for label %d, example:#%d" % (child, len(new_train_y))
            self._children[child].fit_em(new_train_x, new_train_y)

    def predict_em(self, x):
        labels = set()
        c,score = self._svm.predict(x)
        if self._classmapping[c] == -1:
            return labels
        else:
            label = self._classmapping[c]
            if score>self._threshold:
                labels.add(label)
                if label in self._children:
                    sub_svm = self._children[label]
                    labels.update(sub_svm.predict_em(x))
            elif self._level==0:
                labels.add(label)
        return labels


================================================
FILE: code/Classifier/Logistic.py
================================================
__author__ = 'xiang'

import sys
from liblinearutil import *

class Logistic:
	def __init__(self, feature_size, label_size, threshold):
		self._feature_size = feature_size
		self._label_size = label_size
		self.model = None
		self.threshold = threshold
		# self._max_iter = 50
		# print 'max_iter = ', max_iter

	def fit(self, train_x, train_y):
		"""
		train_x: list of feature ids
		train_y: list of [labels]
		"""
		assert len(train_x) == len(train_y)
		y = []
		x = []
		for i in range(len(train_x)):
			feature = {}
			for fid in train_x[i]:
				feature[fid + 1] = 1.0
			for j in range(len(train_y[i])):
				y.append(float(train_y[i][j]))
				x.append(feature)

		prob  = problem(y, x)
		param = parameter('-s 0 -c 1 -n 35 -q')
		self.model = train(prob, param) # L2-Logistic
		print('Finish training.')

	def fit_em(self, train_x, train_y):
		self.fit(train_x, train_y)

	### give the best label
	def predict(self, train_x):
		x = {}
		for fid in train_x:
			x[fid + 1] = 1.0
		p_label, p_acc, p_vals = predict([], [x], self.model, '-q')
		labels = set()
		try:
			labels.add((p_label[0], p_vals[0][int(p_label[0])]))
		except:
			print 'rm: ', fid, 'failed!!'
		return labels

	# predict multiple labels for an EM
	def predict_em(self, train_x):
		x = {}
		for fid in train_x:
			x[fid + 1] = 1.0
		p_label, p_acc, p_vals = predict([], [x], self.model, '-b 1 -q')
	
		labels = set()
		### over threshold
		for i in range(len(p_vals[0])):
			if p_vals[0][i] > self.threshold:
				labels.add(i)

		return labels










================================================
FILE: code/Classifier/MulticlassSVM.py
================================================
__author__ = 'wenqihe'

from SVM import SVM


class MulticlassSVM:

    def __init__(self, feature_size, label_size, lambda_reg=0.1, max_iter=5000, mode='ova'):
        self._feature_size = feature_size
        self._label_size = label_size
        self._lambda_reg = lambda_reg
        self._max_iter = max_iter
        self._models = list()
        self._mode = mode
        if self._mode == 'ova':
            for i in xrange(self._label_size):
                self._models.append(SVM(feature_size=self._feature_size, lambda_reg=self._lambda_reg, max_iter=self._max_iter))
        elif self._mode == 'ava':
            for i in xrange(self._label_size-1):
                row = []
                for j in xrange(i+1, self._label_size):
                    row.append(SVM(feature_size=self._feature_size, lambda_reg=self._lambda_reg, max_iter=self._max_iter))
                self._models.append(row)
        else:
            print 'Parameter error: only support one-vs-all and all-vs-all'
            exit(1)

    def fit(self, train_x, train_y):
        """
        One-vs-All
        :param train_x: list of list. [[1,2,4],[2,3],[1,4],[0,4,5,6],[2]]. Each row is an example.
        :param train_y: list. [1,0,2,3,4]
        :return:
        """
        m = len(train_y)
        if self._mode == 'ova':
            for i in xrange(self._label_size):
                new_train_y = [-1 for col in range(m)]
                for j in xrange(m):
                    if train_y[j] == i:
                        new_train_y[j] = 1
                # print 'train svm for label %d'% i
                model = self._models[i]
                model.fit(train_x, new_train_y)
        elif self._mode == 'ava':
            for i in xrange(self._label_size-1):
                for j in xrange(i+1, self._label_size):
                    new_train_x = []
                    new_train_y = []
                    for k in xrange(m):
                        if train_y[k] == i:
                            new_train_x.append(train_x[k])
                            new_train_y.append(1)
                        elif train_y[k] == j:
                            new_train_x.append(train_x[k])
                            new_train_y.append(-1)
                    # print 'train svm for label %d and label %d' %(i,j)
                    model = self._models[i][j-i-1]
                    model.fit(new_train_x, new_train_y)


    def predict(self, x):
        if self._mode == 'ova':
            max_label = 0
            max_prob = self._models[0].predict_prob(x)
            for i in xrange(1, self._label_size):
                p = self._models[i].predict_prob(x)
                if p > max_prob:
                    max_label = i
                    max_prob = p
            return max_label, max_prob
        elif self._mode == 'ava':
            win = [0 for row in range(self._label_size)]
            for i in xrange(self._label_size-1):
                for j in xrange(i+1, self._label_size):
                    p = self._models[i][j-i-1].predict(x)
                    if p == 1:
                        win[i]+=1
                    else:
                        win[j]+=1
            max_label = 0
            max_prob = win[0]
            for i in xrange(1, self._label_size):
                if win[i] > max_prob:
                    max_label = i
                    max_prob = win[i]
            return max_label, max_prob



================================================
FILE: code/Classifier/PLSVM.py
================================================
from __future__ import division
__author__ = 'wenqihe'

import sys
import random
import math


class PLSVM:

    def __init__(self, feature_size, label_size, type_hierarchy, lambda_reg=0.1, max_iter=5000, threshold=0.5, batch_size=100):
        self._feature_size = feature_size
        self._label_size = label_size
        self._type_hierarchy = type_hierarchy
        self._weight = [[0 for col in range(feature_size)] for row in range(label_size)]
        for i in xrange(label_size):
            for j in xrange(feature_size):
                self._weight[i][j] = random.uniform(0, 1)
        self._lambda_reg = lambda_reg
        self._max_iter = max_iter
        self._threshold = threshold
        self._batch_size = batch_size

    def fit(self, train_x, train_y):
        """
        :param train_x: list of list
        :param train_y: list of list
        :return:
        """
        m = len(train_y)
        batch = int(math.ceil(m/self._batch_size))
        for t in xrange(1, self._max_iter):
            eta_t = 1.0/(self._lambda_reg*t)
            dW = [[0 for col in range(self._feature_size)] for row in range(self._label_size)]

            for j in xrange(self._batch_size):
                i = random.randint(0, m-1)
                x = train_x[i]
                y = train_y[i]
                ny = [k for k in range(self._label_size) if k not in y]
                yi = self.find_max(y, x)
                nyi = self.find_max(ny, x)
                for feature in x:
                    self._weight[yi][feature] = self._weight[yi][feature]*(1-eta_t*self._lambda_reg) + eta_t
                    self._weight[nyi][feature] = self._weight[nyi][feature]*(1-eta_t*self._lambda_reg) - eta_t
                    
            # self.update_weight(dW, eta_t, 1)

            sys.stdout.write('{0} iteration done.\r'.format(t))
            sys.stdout.flush()

    def predict(self, x):
        labels = set()
        parent_mapping = self._type_hierarchy._type_hierarchy
        scores = []
        max_index = 0
        max_value = self.inner_prod(self._weight[0], x)
        scores.append(max_value)
        for i in xrange(1, self._label_size):
            temp = self.inner_prod(self._weight[i], x)
            scores.append(temp)
            if temp>max_value:
                max_index = i
                max_value = temp
#        print scores
        labels.add(max_index)
        # Add parent of max_index if any
        temp = max_index
        while temp in parent_mapping:
            labels.add(parent_mapping[temp])
            temp = parent_mapping[temp]

        # add child of max_index if meeting threshold
        temp = max_index
        while temp != -1:
            max_sub_index = -1
            max_sub_score = -sys.maxint
            for child in parent_mapping:
                # check the maximum subtype
                if parent_mapping[child] == temp:
                    if child < self._label_size:
                     #   print child
                        if max_sub_score < scores[child]:
                            max_sub_index = child
                            max_sub_score = scores[child]
            if max_sub_index != -1 and max_sub_score > self._threshold:
                labels.add(max_sub_index)
            temp = max_sub_index
        return labels

    def find_max(self, Y, x):
        random.shuffle(Y)
        y = Y[0]
        max_value = self.inner_prod(self._weight[y], x)
        for i in xrange(1, len(Y)):
            temp = self.inner_prod(self._weight[Y[i]], x)
            if temp > max_value:
                y = Y[i]
                max_value = temp
        return y

    def update_weight(self, dW, eta_t, m):
        for i in xrange(self._label_size):
            # L2 = 0
            for j in xrange(self._feature_size):
                self._weight[i][j] = self._weight[i][j]*(1-eta_t*self._lambda_reg) + eta_t*dW[i][j]/m
                # L2 += self._weight[i][j] * self._weight[i][j]
            # if L2>0:
            #     factor = min(1, 1/(math.sqrt(self._lambda_reg)*math.sqrt(L2)))
            #     if factor < 1:
            #         for j in xrange(self._feature_size):
            #             self._weight[i][j] *= factor

    @staticmethod
    def inner_prod(weight, x):
        result = 0
        for feature in x:
            result += weight[feature]
        return result

    @staticmethod
    def kernel(x1, x2):
        i1 = 0
        i2 = 0
        result = 0
        while i1<len(x1) and i2<len(x2):
            if x1[i1] == x2[i2]:
                result += 1
                i1 += 1
                i2 += 1
            elif x1[i1] < x2[i2]:
                i1 += 1
            else:
                i2 += 1
        return result


================================================
FILE: code/Classifier/Perceptron.py
================================================
__author__ = 'wenqihe'
import sys



class MultilabelPerceptron:

    def __init__(self, feature_size, label_size, weights=None, learning_rate=0.003, max_iter=1, threshold=0.3):
        if weights is None:
            self._weights = [[0 for col in range(feature_size)] for row in range(label_size)]
        else:
            self._weights = weights
        self._feature_size = feature_size
        self._label_size = label_size
        self._learning_rate = learning_rate
        self._max_iter = max_iter
        self.threshold = threshold
        print 'max_iter = ', max_iter

    def fit(self, train_x, train_y):
        for time in xrange(self._max_iter):
            sys.stdout.write('{0} iteration done.\r'.format(time))
            sys.stdout.flush()
            for i in xrange(len(train_x)):
                x = train_x[i]
                y = train_y[i]
                predictions = self.predict(x)
                l_set = set()
                for pair in predictions:
                    l = pair[0]
                    l_set.add(l)
                    if l not in y:
                        for feature in x:
                            self._weights[l][feature] -= self._learning_rate
                for l in y:
                    if l not in l_set:
                        for feature in x:
                            self._weights[l][feature] += self._learning_rate
        print('Finish training.')


    def fit_em(self, train_x, train_y):
        for time in xrange(self._max_iter):
            sys.stdout.write('{0} iteration done.\r'.format(time))
            sys.stdout.flush()
            for i in xrange(len(train_x)):
                x = train_x[i]
                y = train_y[i]
                predictions = self.predict_em(x)
                for l in predictions:
                    if l not in y:
                        for feature in x:
                            self._weights[l][feature] -= self._learning_rate
                for l in y:
                    if l not in predictions:
                        for feature in x:
                            self._weights[l][feature] += self._learning_rate
        print('Finish training.')

    ### give the best label
    def predict(self, x):
        labels = set()
        maxid = 0
        maxscore = -1
        for i in xrange(0, self._label_size):
            result = 0
            for feature in x:
                result += self._weights[i][feature]
            if result > maxscore:
                maxid = i
                maxscore = result
        if maxscore > self.threshold:
            labels.add((maxid, maxscore))
        return labels

    def predict_em(self, x):
        labels = set()
        maxid = 0
        maxscore = -1
        for i in xrange(0, self._label_size):
            result = 0
            for feature in x:
                result += self._weights[i][feature]
            if result > self.threshold:
                labels.add(i)
            if result > maxscore:
                maxid = i
                maxscore = result
        if len(labels) == 0:
            labels.add(maxid)
        return labels


================================================
FILE: code/Classifier/SVM.py
================================================
from __future__ import division
__author__ = 'wenqihe'

import random

MIN_SCALING_FACTOR = 0.0000001


class SVM:
    """
    Use pegasos algorithm to train SVM.
    """
    def __init__(self, feature_size, lambda_reg=0.1, max_iter=50):
        self._feature_size = feature_size
        self._weight = [0 for col in range(feature_size)]
        self._lambda_reg = lambda_reg
        self._max_iter = max_iter


    def fit(self, train_x, train_y):
        """
        :param train_x: list of list
        :param train_y: list of 1/-1
        :return:
        """
        m = len(train_y)
        pos = []
        neg = []
        for j in xrange(m):
            if train_y[j] == 1:
                pos.append(j)
            else:
                neg.append(j)
        for t in xrange(1, self._max_iter):
            # randomly choose a positive example
            for temp in xrange(1000):
                i = random.randint(0, m-1)
                x = train_x[i]
                y = train_y[i]
                eta_t = 1.0/(self._lambda_reg*t)
                p = self.predict_prob(x)
                if y*p < 1:
                    for feature in x:
                        self._weight[feature] = (1-eta_t*self._lambda_reg)*self._weight[feature]+eta_t*y
                else:
                    for feature in x:
                        self._weight[feature] *= (1-eta_t*self._lambda_reg)

    def predict(self, x):
        prob = self.predict_prob(x)
        if prob >= 0:
            return 1
        else:
            return -1

    def L2_regularize(self, eta_t):
        scaling_factor = 1.0 - (eta_t * self._lambda_reg)
        if scaling_factor < MIN_SCALING_FACTOR:
            scaling_factor = MIN_SCALING_FACTOR
        for i in xrange(self._feature_size):
            self._weight[i] *= scaling_factor

    def predict_prob(self, x):
        result = 0.0
        for feature in x:
            result += self._weight[feature]
        return result


    @staticmethod
    def kernel(x1, x2):
        i1 = 0
        i2 = 0
        result = 0
        while i1<len(x1) and i2<len(x2):
            if x1[i1] == x2[i2]:
                result += 1
                i1 += 1
                i2 += 1
            elif x1[i1] < x2[i2]:
                i1 += 1
            else:
                i2 += 1
        return result


================================================
FILE: code/Classifier/TypeHierarchy.py
================================================
__author__ = 'xiang'

from collections import defaultdict

class TypeSet:
    def __init__(self, file_name, number_of_types):
        self._type_hierarchy = {} # type -> [parent type]
        self._subtype_mapping = defaultdict(list) # type -> [subtype]
        self._root = set() # root types (on 1-level)
        with open(file_name) as f:
            for line in f:
                try:
                    type, tid, freq = line.strip('\r\n').split('\t')
                    self._root.add(int(tid))
                except Exception as e:
                    print e
                    pass
        #self._root = list(set(range(0, number_of_types)).difference(self._root))

    def get_type_path(self, label):
        if label in self._type_hierarchy:  # label has super type
            path = [label]
            while label in self._type_hierarchy:
                path.append(self._type_hierarchy[label])
                label = self._type_hierarchy[label]
            path.reverse()
            return path
        else:  # label is the root type
            return [label]

    def get_subtypes(self, label):
        if label in self._subtype_mapping:
            return self._subtype_mapping[label]
        else:
            return None

class TypeHierarchy:
    def __init__(self, file_name, number_of_types):
        self._type_hierarchy = {} # type -> [parent type]
        self._subtype_mapping = defaultdict(list) # type -> [subtype]
        self._root = set() # root types (on 1-level)
        with open(file_name) as f:
            for line in f:
                t = line.strip('\r\n').split('\t')
                self._type_hierarchy[int(t[0])] = int(t[1])
                self._subtype_mapping[int(t[1])].append(int(t[0]))
                self._root.add(int(t[0]))
        self._root = list(set(range(0, number_of_types)).difference(self._root))

    def get_type_path(self, label):
        if label in self._type_hierarchy:  # label has super type
            path = [label]
            while label in self._type_hierarchy:
                path.append(self._type_hierarchy[label])
                label = self._type_hierarchy[label]
            path.reverse()
            return path
        else:  # label is the root type
            return [label]

    def get_subtypes(self, label):
        if label in self._subtype_mapping:
            return self._subtype_mapping[label]
        else:
            return None


================================================
FILE: code/Classifier/liblinear.py
================================================
#!/usr/bin/env python

from ctypes import *
from ctypes.util import find_library
from os import path
import sys

__all__ = ['liblinear', 'feature_node', 'gen_feature_nodearray', 'problem',
           'parameter', 'model', 'toPyModel', 'L2R_LR', 'L2R_L2LOSS_SVC_DUAL',
           'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL', 'MCSVM_CS', 
           'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', 'L2R_L2LOSS_SVR', 
           'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL', 'print_null']

try:
	dirname = path.dirname(path.abspath(__file__))
	if sys.platform == 'win32':
		liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
	else:
		liblinear = CDLL(path.join(dirname, './liblinear.so.3'))
except:
# For unix the prefix 'lib' is not considered.
	if find_library('linear'):
		liblinear = CDLL(find_library('linear'))
	elif find_library('liblinear'):
		liblinear = CDLL(find_library('liblinear'))
	else:
		raise Exception('LIBLINEAR library not found.')

L2R_LR = 0
L2R_L2LOSS_SVC_DUAL = 1 
L2R_L2LOSS_SVC = 2 
L2R_L1LOSS_SVC_DUAL = 3
MCSVM_CS = 4 
L1R_L2LOSS_SVC = 5 
L1R_LR = 6 
L2R_LR_DUAL = 7  
L2R_L2LOSS_SVR = 11
L2R_L2LOSS_SVR_DUAL = 12
L2R_L1LOSS_SVR_DUAL = 13

PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
def print_null(s): 
	return 

def genFields(names, types): 
	return list(zip(names, types))

def fillprototype(f, restype, argtypes): 
	f.restype = restype
	f.argtypes = argtypes

class feature_node(Structure):
	_names = ["index", "value"]
	_types = [c_int, c_double]
	_fields_ = genFields(_names, _types)

	def __str__(self):
		return '%d:%g' % (self.index, self.value)

def gen_feature_nodearray(xi, feature_max=None, issparse=True):
	if isinstance(xi, dict):
		index_range = xi.keys()
	elif isinstance(xi, (list, tuple)):
		xi = [0] + xi  # idx should start from 1
		index_range = range(1, len(xi))
	else:
		raise TypeError('xi should be a dictionary, list or tuple')

	if feature_max:
		assert(isinstance(feature_max, int))
		index_range = filter(lambda j: j <= feature_max, index_range)
	if issparse: 
		index_range = filter(lambda j:xi[j] != 0, index_range)

	index_range = sorted(index_range)
	ret = (feature_node * (len(index_range)+2))()
	ret[-1].index = -1 # for bias term
	ret[-2].index = -1
	for idx, j in enumerate(index_range):
		ret[idx].index = j
		ret[idx].value = xi[j]
	max_idx = 0
	if index_range : 
		max_idx = index_range[-1]
	return ret, max_idx

class problem(Structure):
	_names = ["l", "n", "y", "x", "bias"]
	_types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
	_fields_ = genFields(_names, _types)

	def __init__(self, y, x, bias = -1):
		if len(y) != len(x) :
			raise ValueError("len(y) != len(x)")
		self.l = l = len(y)
		self.bias = -1

		max_idx = 0
		x_space = self.x_space = []
		for i, xi in enumerate(x):
			tmp_xi, tmp_idx = gen_feature_nodearray(xi)
			x_space += [tmp_xi]
			max_idx = max(max_idx, tmp_idx)
		self.n = max_idx

		self.y = (c_double * l)()
		for i, yi in enumerate(y): self.y[i] = y[i]

		self.x = (POINTER(feature_node) * l)() 
		for i, xi in enumerate(self.x_space): self.x[i] = xi

		self.set_bias(bias)

	def set_bias(self, bias):
		if self.bias == bias:
			return 
		if bias >= 0 and self.bias < 0: 
			self.n += 1
			node = feature_node(self.n, bias)
		if bias < 0 and self.bias >= 0: 
			self.n -= 1
			node = feature_node(-1, bias)

		for xi in self.x_space:
			xi[-2] = node
		self.bias = bias


class parameter(Structure):
	_names = ["solver_type", "eps", "C", "nr_thread", "nr_weight", "weight_label", "weight", "p", "init_sol"]
	_types = [c_int, c_double, c_double, c_int, c_int, POINTER(c_int), POINTER(c_double), c_double, POINTER(c_double)]
	_fields_ = genFields(_names, _types)

	def __init__(self, options = None):
		if options == None:
			options = ''
		self.parse_options(options)

	def __str__(self):
		s = ''
		attrs = parameter._names + list(self.__dict__.keys())
		values = map(lambda attr: getattr(self, attr), attrs) 
		for attr, val in zip(attrs, values):
			s += (' %s: %s\n' % (attr, val))
		s = s.strip()

		return s

	def set_to_default_values(self):
		self.solver_type = L2R_L2LOSS_SVC_DUAL
		self.eps = float('inf')
		self.C = 1
		self.p = 0.1
		self.nr_thread = 1
		self.nr_weight = 0
		self.weight_label = None
		self.weight = None
		self.init_sol = None
		self.bias = -1
		self.flag_cross_validation = False
		self.flag_C_specified = False
		self.flag_solver_specified = False
		self.flag_find_C = False
		self.flag_omp = False
		self.nr_fold = 0
		self.print_func = cast(None, PRINT_STRING_FUN)

	def parse_options(self, options):
		if isinstance(options, list):
			argv = options
		elif isinstance(options, str):
			argv = options.split()
		else:
			raise TypeError("arg 1 should be a list or a str.")
		self.set_to_default_values()
		self.print_func = cast(None, PRINT_STRING_FUN)
		weight_label = []
		weight = []

		i = 0
		while i < len(argv) :
			if argv[i] == "-s":
				i = i + 1
				self.solver_type = int(argv[i])
				self.flag_solver_specified = True
			elif argv[i] == "-c":
				i = i + 1
				self.C = float(argv[i])
				self.flag_C_specified = True
			elif argv[i] == "-p":
				i = i + 1
				self.p = float(argv[i])
			elif argv[i] == "-e":
				i = i + 1
				self.eps = float(argv[i])
			elif argv[i] == "-B":
				i = i + 1
				self.bias = float(argv[i])
			elif argv[i] == "-v":
				i = i + 1
				self.flag_cross_validation = 1
				self.nr_fold = int(argv[i])
				if self.nr_fold < 2 :
					raise ValueError("n-fold cross validation: n must >= 2")
			elif argv[i] == "-n":
				i = i + 1
				self.flag_omp = True
				self.nr_thread = int(argv[i])
			elif argv[i].startswith("-w"):
				i = i + 1
				self.nr_weight += 1
				weight_label += [int(argv[i-1][2:])]
				weight += [float(argv[i])]
			elif argv[i] == "-q":
				self.print_func = PRINT_STRING_FUN(print_null)
			elif argv[i] == "-C":
				self.flag_find_C = True

			else :
				raise ValueError("Wrong options")
			i += 1

		liblinear.set_print_string_function(self.print_func)
		self.weight_label = (c_int*self.nr_weight)()
		self.weight = (c_double*self.nr_weight)()
		for i in range(self.nr_weight): 
			self.weight[i] = weight[i]
			self.weight_label[i] = weight_label[i]

		# default solver for parameter selection is L2R_L2LOSS_SVC
		if self.flag_find_C:
			if not self.flag_cross_validation:
				self.nr_fold = 5
			if not self.flag_solver_specified:
				self.solver_type = L2R_L2LOSS_SVC
				self.flag_solver_specified = True
			elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC]:
				raise ValueError("Warm-start parameter search only available for -s 0 and -s 2")
		if self.flag_omp:
			if not self.flag_solver_specified:
				self.solver_type = L2R_L2LOSS_SVC
				self.flag_solver_specified = True
			elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC, L2R_L2LOSS_SVR, L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL]:
				raise ValueError("Parallel LIBLINEAR is only available for -s 0, 1, 2, 3, 11 now")
	
		if self.eps == float('inf'):
			if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
				self.eps = 0.01
			elif self.solver_type in [L2R_L2LOSS_SVR]:
				self.eps = 0.001
			elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
				self.eps = 0.1
			elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
				self.eps = 0.01
			elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
				self.eps = 0.1

class model(Structure):
	_names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
	_types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double]
	_fields_ = genFields(_names, _types)

	def __init__(self):
		self.__createfrom__ = 'python'

	def __del__(self):
		# free memory created by C to avoid memory leak
		if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
			liblinear.free_and_destroy_model(pointer(self))

	def get_nr_feature(self):
		return liblinear.get_nr_feature(self)

	def get_nr_class(self):
		return liblinear.get_nr_class(self)

	def get_labels(self):
		nr_class = self.get_nr_class()
		labels = (c_int * nr_class)()
		liblinear.get_labels(self, labels)
		return labels[:nr_class]

	def get_decfun_coef(self, feat_idx, label_idx=0):
		return liblinear.get_decfun_coef(self, feat_idx, label_idx)

	def get_decfun_bias(self, label_idx=0):
		return liblinear.get_decfun_bias(self, label_idx)

	def get_decfun(self, label_idx=0):
		w = [liblinear.get_decfun_coef(self, feat_idx, label_idx) for feat_idx in range(1, self.nr_feature+1)]
		b = liblinear.get_decfun_bias(self, label_idx)
		return (w, b)

	def is_probability_model(self):
		return (liblinear.check_probability_model(self) == 1)

	def is_regression_model(self):
		return (liblinear.check_regression_model(self) == 1)

def toPyModel(model_ptr):
	"""
	toPyModel(model_ptr) -> model

	Convert a ctypes POINTER(model) to a Python model
	"""
	if bool(model_ptr) == False:
		raise ValueError("Null pointer")
	m = model_ptr.contents
	m.__createfrom__ = 'C'
	return m

fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
fillprototype(liblinear.find_parameter_C, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double)])
fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])

fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])

fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
fillprototype(liblinear.load_model, POINTER(model), [c_char_p])

fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
fillprototype(liblinear.get_decfun_coef, c_double, [POINTER(model), c_int, c_int])
fillprototype(liblinear.get_decfun_bias, c_double, [POINTER(model), c_int])

fillprototype(liblinear.free_model_content, None, [POINTER(model)])
fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
fillprototype(liblinear.check_regression_model, c_int, [POINTER(model)])
fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])


================================================
FILE: code/Classifier/liblinearutil.py
================================================
#!/usr/bin/env python

import os, sys
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path 
from liblinear import *
from liblinear import __all__ as liblinear_all
from ctypes import c_double

__all__ = ['svm_read_problem', 'load_model', 'save_model', 'evaluations',
           'train', 'predict'] + liblinear_all


def svm_read_problem(data_file_name):
	"""
	svm_read_problem(data_file_name) -> [y, x]

	Read LIBSVM-format data from data_file_name and return labels y
	and data instances x.
	"""
	prob_y = []
	prob_x = []
	for line in open(data_file_name):
		line = line.split(None, 1)
		# In case an instance with all zero features
		if len(line) == 1: line += ['']
		label, features = line
		xi = {}
		for e in features.split():
			ind, val = e.split(":")
			xi[int(ind)] = float(val)
		prob_y += [float(label)]
		prob_x += [xi]
	return (prob_y, prob_x)

def load_model(model_file_name):
	"""
	load_model(model_file_name) -> model

	Load a LIBLINEAR model from model_file_name and return.
	"""
	model = liblinear.load_model(model_file_name.encode())
	if not model:
		print("can't open model file %s" % model_file_name)
		return None
	model = toPyModel(model)
	return model

def save_model(model_file_name, model):
	"""
	save_model(model_file_name, model) -> None

	Save a LIBLINEAR model to the file model_file_name.
	"""
	liblinear.save_model(model_file_name.encode(), model)

def evaluations(ty, pv):
	"""
	evaluations(ty, pv) -> (ACC, MSE, SCC)

	Calculate accuracy, mean squared error and squared correlation coefficient
	using the true values (ty) and predicted values (pv).
	"""
	if len(ty) != len(pv):
		raise ValueError("len(ty) must equal to len(pv)")
	total_correct = total_error = 0
	sumv = sumy = sumvv = sumyy = sumvy = 0
	for v, y in zip(pv, ty):
		if y == v:
			total_correct += 1
		total_error += (v-y)*(v-y)
		sumv += v
		sumy += y
		sumvv += v*v
		sumyy += y*y
		sumvy += v*y
	l = len(ty)
	ACC = 100.0*total_correct/l
	MSE = total_error/l
	try:
		SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
	except:
		SCC = float('nan')
	return (ACC, MSE, SCC)

def train(arg1, arg2=None, arg3=None):
	"""
	train(y, x [, options]) -> model | ACC
	train(prob [, options]) -> model | ACC
	train(prob, param) -> model | ACC

	Train a model from data (y, x) or a problem prob using
	'options' or a parameter param.
	If '-v' is specified in 'options' (i.e., cross validation)
	either accuracy (ACC) or mean-squared error (MSE) is returned.

	options:
		-s type : set type of solver (default 1)
		  for multi-class classification
			 0 -- L2-regularized logistic regression (primal)
			 1 -- L2-regularized L2-loss support vector classification (dual)
			 2 -- L2-regularized L2-loss support vector classification (primal)
			 3 -- L2-regularized L1-loss support vector classification (dual)
			 4 -- support vector classification by Crammer and Singer
			 5 -- L1-regularized L2-loss support vector classification
			 6 -- L1-regularized logistic regression
			 7 -- L2-regularized logistic regression (dual)
		  for regression
			11 -- L2-regularized L2-loss support vector regression (primal)
			12 -- L2-regularized L2-loss support vector regression (dual)
			13 -- L2-regularized L1-loss support vector regression (dual)
		-c cost : set the parameter C (default 1)
		-p epsilon : set the epsilon in loss function of SVR (default 0.1)
		-e epsilon : set tolerance of termination criterion
			-s 0 and 2
				|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
				where f is the primal function, (default 0.01)
			-s 11
				|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
			-s 1, 3, 4, and 7
				Dual maximal violation <= eps; similar to liblinear (default 0.)
			-s 5 and 6
				|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
				where f is the primal function (default 0.01)
			-s 12 and 13
				|f'(alpha)|_1 <= eps |f'(alpha0)|,
				where f is the dual function (default 0.1)
		-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
		-wi weight: weights adjust the parameter C of different classes (see README for details)
		-v n: n-fold cross validation mode
		-n nr_thread : parallel version with [nr_thread] threads (default 1; only for -s 0, 1, 2, 3, 11)
		-q : quiet mode (no outputs)
	"""
	prob, param = None, None
	if isinstance(arg1, (list, tuple)):
		assert isinstance(arg2, (list, tuple))
		y, x, options = arg1, arg2, arg3
		prob = problem(y, x)
		param = parameter(options)
	elif isinstance(arg1, problem):
		prob = arg1
		if isinstance(arg2, parameter):
			param = arg2
		else :
			param = parameter(arg2)
	if prob == None or param == None :
		raise TypeError("Wrong types for the arguments")

	prob.set_bias(param.bias)
	liblinear.set_print_string_function(param.print_func)
	err_msg = liblinear.check_parameter(prob, param)
	if err_msg :
		raise ValueError('Error: %s' % err_msg)

	if param.flag_find_C:
		nr_fold = param.nr_fold
		best_C = c_double()
		best_rate = c_double()		
		max_C = 1024
		if param.flag_C_specified:
			start_C = param.C
		else:
			start_C = -1.0
		liblinear.find_parameter_C(prob, param, nr_fold, start_C, max_C, best_C, best_rate)
		print("Best C = %lf  CV accuracy = %g%%\n"% (best_C.value, 100.0*best_rate.value))
		return best_C.value,best_rate.value


	elif param.flag_cross_validation:
		l, nr_fold = prob.l, param.nr_fold
		target = (c_double * l)()
		liblinear.cross_validation(prob, param, nr_fold, target)
		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
		if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
			print("Cross Validation Mean squared error = %g" % MSE)
			print("Cross Validation Squared correlation coefficient = %g" % SCC)
			return MSE
		else:
			print("Cross Validation Accuracy = %g%%" % ACC)
			return ACC
	else :
		m = liblinear.train(prob, param)
		m = toPyModel(m)

		return m

def predict(y, x, m, options=""):
	"""
	predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)

	Predict data (y, x) with the SVM model m.
	options:
	    -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
	    -q quiet mode (no outputs)

	The return tuple contains
	p_labels: a list of predicted labels
	p_acc: a tuple including  accuracy (for classification), mean-squared
	       error, and squared correlation coefficient (for regression).
	p_vals: a list of decision values or probability estimates (if '-b 1'
	        is specified). If k is the number of classes, for decision values,
	        each element includes results of predicting k binary-class
	        SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
	        is returned. For probabilities, each element contains k values
	        indicating the probability that the testing instance is in each class.
	        Note that the order of classes here is the same as 'model.label'
	        field in the model structure.
	"""

	def info(s):
		print(s)

	predict_probability = 0
	argv = options.split()
	i = 0
	while i < len(argv):
		if argv[i] == '-b':
			i += 1
			predict_probability = int(argv[i])
		elif argv[i] == '-q':
			info = print_null
		else:
			raise ValueError("Wrong options")
		i+=1

	solver_type = m.param.solver_type
	nr_class = m.get_nr_class()
	nr_feature = m.get_nr_feature()
	is_prob_model = m.is_probability_model()
	bias = m.bias
	if bias >= 0:
		biasterm = feature_node(nr_feature+1, bias)
	else:
		biasterm = feature_node(-1, bias)
	pred_labels = []
	pred_values = []

	if predict_probability:
		if not is_prob_model:
			raise TypeError('probability output is only supported for logistic regression')
		prob_estimates = (c_double * nr_class)()
		for xi in x:
			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
			xi[-2] = biasterm
			label = liblinear.predict_probability(m, xi, prob_estimates)
			values = prob_estimates[:nr_class]
			pred_labels += [label]
			pred_values += [values]
	else:
		if nr_class <= 2:
			nr_classifier = 1
		else:
			nr_classifier = nr_class
		dec_values = (c_double * nr_classifier)()
		for xi in x:
			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
			xi[-2] = biasterm
			label = liblinear.predict_values(m, xi, dec_values)
			values = dec_values[:nr_classifier]
			pred_labels += [label]
			pred_values += [values]
	if len(y) == 0:
		y = [0] * len(x)
	ACC, MSE, SCC = evaluations(y, pred_labels)
	l = len(y)
	if m.is_regression_model():
		info("Mean squared error = %g (regression)" % MSE)
		info("Squared correlation coefficient = %g (regression)" % SCC)
	else:
		info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))

	return pred_labels, (ACC, MSE, SCC), pred_values


================================================
FILE: code/DataProcessor/Feature/__init__.py
================================================
__author__ = 'wenqihe'

from token_feature import HeadFeature, EntityMentionTokenFeature, BetweenEntityMentionTokenFeature, ContextFeature, ContextGramFeature
from other_feature import PosFeature, DistanceFeature, EntityMentionOrderFeature, NumOfEMBetweenFeature, SpecialPatternFeature, EMTypeFeature
from dependency_feature import DependencyFeature
from brown_feature import BrownFeature
from em_token_feature import EMHeadFeature, EMTokenFeature, EMContextFeature, EMContextGramFeature
from em_other_feature import EMPosFeature, EMLengthFeature, EMWordShapeFeature, EMCharacterFeature
from em_dependency_feature import EMDependencyFeature
from em_brown_feature import EMBrownFeature


================================================
FILE: code/DataProcessor/Feature/abstract_feature.py
================================================
__author__ = 'wenqihe'


class AbstractFeature(object):
    def apply(self, sentence, mention, features):
        raise NotImplementedError('Should have implemented this')


================================================
FILE: code/DataProcessor/Feature/brown_feature.py
================================================
__author__ = 'wenqihe'

from abstract_feature import AbstractFeature
from token_feature import get_lemma


class BrownFeature(AbstractFeature):

    def __init__(self, brown_file):
        with open(brown_file) as f:
            self.len = [4, 8, 12, 20]
            self.mapping = {}
            for line in f:
                items = line.strip('\r\n').split('\t')
                self.mapping[items[1]] = items[0]

    def apply(self, sentence, mention, features):
        for i in xrange(len(sentence.tokens)):
            word = get_lemma(sentence.tokens[i], sentence.pos[i])
            if word in self.mapping:
                cluster = self.mapping[word]
                for l in self.len:
                    if len(cluster) >= l:
                        features.append('BROWN_%d_%s' % (l, cluster[0:l]))
                features.append('BROWN_ALL_%s' % cluster)


================================================
FILE: code/DataProcessor/Feature/dependency_feature.py
================================================
__author__ = 'wenqihe'

from abstract_feature import AbstractFeature
from token_feature import HeadFeature, get_lemma


class DependencyFeature(AbstractFeature):
    accepted_deps=[ "nn", "agent", "dobj", "nsubj", "amod", "nsubjpass", "poss", "appos"]

    """
    Universal Dependencies
    """
    def apply(self, sentence, mention, features):
        # head_index = HeadFeature.get_head(sentence, mention)
        # for dep_type, gov, dep in sentence.dep:
        #     if head_index == gov:
        #         token = 'root'
        #         if dep >= 0:
        #             token = get_lemma(sentence.tokens[dep], sentence.pos[dep])
        #         features.append('ROLE_gov:%s' % dep_type)
        #         features.append('PARENT_%s' % token)
        #     if head_index == dep:
        #         token = 'root'
        #         if gov >= 0:
        #             token = get_lemma(sentence.tokens[dep], sentence.pos[gov])
        #         features.append('ROLE_dep:%s' % dep_type)
        #         features.append('PARENT_%s' % token)
        start = mention.start
        end = mention.end
        for dep_type, gov, dep in sentence.dep:
            if start <= gov < end:
                if 0 <= dep <sentence.size():
                    token = get_lemma(sentence.tokens[dep], sentence.pos[dep])
                    pos = sentence.pos[dep]
                    if self.accept_pos(pos) and self.accept_dep(dep_type):
                        key = "gov:" + dep_type + ":" + token + "=" + pos[0]
                        features.append(("DEP_" + key))
            if start <= dep < end:
                if 0 <= gov < sentence.size():
                    token = get_lemma(sentence.tokens[gov], sentence.pos[gov])
                    pos = sentence.pos[gov]
                    if self.accept_pos(pos) and self.accept_dep(dep_type):
                        key = "dep:" + dep_type + ":" + token + "=" + pos[0]
                        features.append(("DEP_" + key))

    def accept_pos(self, pos):
        return pos[0] == 'N' or pos[0] == 'V'

    def accept_dep(self, dep):
        return dep.startswith('prep') or dep in self.accepted_deps



================================================
FILE: code/DataProcessor/Feature/em_brown_feature.py
================================================
__author__ = 'wenqihe'

from abstract_feature import AbstractFeature
from em_token_feature import get_lemma


class EMBrownFeature(AbstractFeature):

    def __init__(self, brown_file):
        with open(brown_file) as f:
            self.len = [4, 8, 12, 20]
            self.mapping = {}
            for line in f:
                items = line.strip('\r\n').split('\t')
                self.mapping[items[1]] = items[0]

    def apply(self, sentence, mention, features):
        for i in xrange(mention.start,mention.end):
            word = get_lemma(sentence.tokens[i], sentence.pos[i])
            if word in self.mapping:
                cluster = self.mapping[word]
                for l in self.len:
                    if len(cluster) >= l:
                        features.append('BROWN_%d_%s' % (l, cluster[0:l]))
                features.append('BROWN_ALL_%s' % cluster)


================================================
FILE: code/DataProcessor/Feature/em_dependency_feature.py
================================================
__author__ = 'wenqihe'

from abstract_feature import AbstractFeature
from em_token_feature import EMHeadFeature, get_lemma


class EMDependencyFeature(AbstractFeature):
    accepted_deps=[ "nn", "agent", "dobj", "nsubj", "amod", "nsubjpass", "poss", "appos"]

    """
    Universal Dependencies
    """
    def apply(self, sentence, mention, features):
        # head_index = HeadFeature.get_head(sentence, mention)
        # for dep_type, gov, dep in sentence.dep:
        #     if head_index == gov:
        #         token = 'root'
        #         if dep >= 0:
        #             token = get_lemma(sentence.tokens[dep], sentence.pos[dep])
        #         features.append('ROLE_gov:%s' % dep_type)
        #         features.append('PARENT_%s' % token)
        #     if head_index == dep:
        #         token = 'root'
        #         if gov >= 0:
        #             token = get_lemma(sentence.tokens[dep], sentence.pos[gov])
        #         features.append('ROLE_dep:%s' % dep_type)
        #         features.append('PARENT_%s' % token)
        start = mention.start
        end = mention.end
        for dep_type, gov, dep in sentence.dep:
            if start <= gov < end:
                if 0 <= dep <sentence.size():
                    token = get_lemma(sentence.tokens[dep], sentence.pos[dep])
                    pos = sentence.pos[dep]
                    if self.accept_pos(pos) and self.accept_dep(dep_type):
                        key = "gov:" + dep_type + ":" + token + "=" + pos[0]
                        features.append(("DEP_" + key))
            if start <= dep < end:
                if 0 <= gov < sentence.size():
                    token = get_lemma(sentence.tokens[gov], sentence.pos[gov])
                    pos = sentence.pos[gov]
                    if self.accept_pos(pos) and self.accept_dep(dep_type):
                        key = "dep:" + dep_type + ":" + token + "=" + pos[0]
                        features.append(("DEP_" + key))

    def accept_pos(self, pos):
        return pos[0] == 'N' or pos[0] == 'V'

    def accept_dep(self, dep):
        return dep.startswith('prep') or dep in self.accepted_deps



================================================
FILE: code/DataProcessor/Feature/em_other_feature.py
================================================
__author__ = 'wenqihe'

import re
from abstract_feature import AbstractFeature
from em_token_feature import EMHeadFeature

class EMPosFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        for i in xrange(mention.start, mention.end):
            features.append('POS_%s' % sentence.pos[i])


class EMLengthFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        length = mention.end - mention.start
        if length <= 5:
            features.append('LENGTH_%d' % length)
        else:
            features.append('LENGTH_>5')


class EMWordShapeFeature(AbstractFeature):
    def get_word_shape(self, token):
        result = re.sub('[a-z]+', 'a', token)
        result = re.sub('[A-Z]+', 'A', result)
        result = re.sub('[0-9]+', '0', result)
        result = re.sub(ur"\p{P}+", '.', result)
        return result

    def apply(self, sentence, mention, features):
        for i in xrange(mention.start, mention.end):
            features.append('SHAPE_%s' % self.get_word_shape(sentence.tokens[i]))


class EMCharacterFeature(AbstractFeature):
    def apply(self, sentence, mention, features):
        head_index = EMHeadFeature.get_head(sentence, mention)
        head = sentence.tokens[head_index]
        if len(head) >= 3:
            for i in xrange(0, len(head)-2):
                features.append('CHAR_%s' % head[i:(i + 3)])
            features.append('CHAR_:%s' % head[:2])
            features.append('CHAR_%s:' % head[(len(head)-2):])


================================================
FILE: code/DataProcessor/Feature/em_token_feature.py
================================================
__author__ = 'wenqihe'

import re
from nltk.stem.wordnet import WordNetLemmatizer
from abstract_feature import AbstractFeature


cached = {}
lmtzr = WordNetLemmatizer()


def get_lemma(word, pos):
    key = word + '_' + pos
    if key in cached:
        return cached[key]
    if re.match('[a-zA-Z]+$', word) is None:
        cached[key] = word
        return word
    lemma = word
    if pos.startswith('N'):
        lemma = lmtzr.lemmatize(word, 'n')
    elif pos.startswith('V'):
        lemma = lmtzr.lemmatize(word, 'v')
    cached[key] = lemma
    return lemma


class EMHeadFeature(AbstractFeature):

    @staticmethod
    def get_head(sentence, mention):
        head = mention.end - 1
        for i in xrange(mention.start, mention.end):
            pt = sentence.pos[i]
            if pt.startswith('N'):
                head = i
            elif pt == 'IN' or pt == ',':
                break
        return head

    def apply(self, sentence, mention, features):
        index = EMHeadFeature.get_head(sentence, mention)
        head = sentence.tokens[index]
        pos = sentence.pos[index]
        features.append('HEAD_%s' % get_lemma(head, pos))


class EMTokenFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        for i in xrange(mention.start, mention.end):
            features.append('TKN_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))


class EMContextFeature(AbstractFeature):

    def __init__(self, window_size=1):
        self.window_size = window_size

    def apply(self, sentence, mention, features):
        # left
        for i in xrange(max(0, mention.start-self.window_size), mention.start):
            features.append('CTXT_LEFT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
        # right
        for i in xrange(mention.end, min(sentence.size(), mention.end+self.window_size)):
            features.append('CTXT_RIGHT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))


class EMContextGramFeature(AbstractFeature):

    def __init__(self, window_size=1):
        self.window_size = window_size

    def apply(self, sentence, mention, features):
        start = max(0, mention.start-self.window_size)
        end = min(sentence.size()-1, mention.end - 1 + self.window_size)
        for i in xrange(start, end):
            token1 = get_lemma(sentence.tokens[i], sentence.pos[i])
            token2 = get_lemma(sentence.tokens[i+1], sentence.pos[i+1])
            if mention.start <= i < mention.end - 1:
                features.append('GRM_%s_%s'%(token1, token2))
            elif i < mention.start:
                features.append('CTXT_LEFT_GRM_%s_%s' % (token1, token2))
            else:
                features.append('CTXT_RIGHT_GRM_%s_%s' % (token1, token2))
        # left
        # if mention.start-2 >= 0:
        #     token1 = get_lemma(sentence.tokens[mention.start-2], sentence.pos[mention.start-2])
        #     token2 = get_lemma(sentence.tokens[mention.start-1], sentence.pos[mention.start-1])
        #     features.append('CTXT_LEFT_GRM_%s_%s' % (token1, token2))
        # # right
        # if mention.end + 1 < len(sentence.tokens):
        #     token1 = get_lemma(sentence.tokens[mention.end], sentence.pos[mention.end])
        #     token2 = get_lemma(sentence.tokens[mention.end+1], sentence.pos[mention.end+1])
        #     features.append('CTXT_RIGHT_GRM_%s_%s' % (token1, token2))


================================================
FILE: code/DataProcessor/Feature/other_feature.py
================================================
__author__ = 'wenqihe'

import re
from abstract_feature import AbstractFeature
from token_feature import HeadFeature

class PosFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        start = mention.em1End
        end = mention.em2Start
        if mention.em1Start > mention.em2Start:
            start = mention.em2End
            end = mention.em1Start
        for i in xrange(start, end):
            features.append('POS_%s' % sentence.pos[i])


class DistanceFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        dist = mention.em2Start - mention.em1End
        if mention.em2Start < mention.em1Start:
            dist = mention.em1Start - mention.em2End
        features.append('DISTANCE_%d' % dist)

class EntityMentionOrderFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        if mention.em1Start < mention.em2Start:
            features.append('EM1_BEFORE_EM2')
        elif mention.em1Start > mention.em2Start:
            features.append('EM2_BEFORE_EM1')

class NumOfEMBetweenFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        numOfEMBetween = mention.numOfEMBetween
        features.append('NUM_EMS_BTWEEN_%d' % numOfEMBetween)

class EMTypeFeature(AbstractFeature):
    def apply(self, sentence, mention, features):
        for em in sentence.entityMentions:
            if em.start == mention.em1Start and em.end == mention.em1End:
                for l in em.labels:
                    features.append('EM1_TYPE_%s' % l)
            if em.start == mention.em2Start and em.end == mention.em2End:
                for l in em.labels:
                    features.append('EM2_TYPE_%s' % l)

class SpecialPatternFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        if mention.em1End + 1 == mention.em2Start:
            if sentence.tokens[mention.em1End] == 'in':
                features.append('EM1_IN_EM2')
        if mention.em2End + 1 == mention.em1Start:
            if sentence.tokens[mention.em2End] == 'in':
                features.append('EM2_IN_EM1')



================================================
FILE: code/DataProcessor/Feature/token_feature.py
================================================
__author__ = 'wenqihe'

import re
from nltk.stem.wordnet import WordNetLemmatizer
from abstract_feature import AbstractFeature


cached = {}
lmtzr = WordNetLemmatizer()


def get_lemma(word, pos):
    key = word + '_' + pos
    if key in cached:
        return cached[key]
    if re.match('[a-zA-Z]+$', word) is None:
        cached[key] = word
        return word
    lemma = word
    if pos.startswith('N'):
        lemma = lmtzr.lemmatize(word, 'n')
    elif pos.startswith('V'):
        lemma = lmtzr.lemmatize(word, 'v')
    cached[key] = lemma
    return lemma


class HeadFeature(AbstractFeature):


    @staticmethod
    def get_head(sentence, start, end):
        head = end - 1
        for i in xrange(start, end):
            pt = sentence.pos[i]
            if pt.startswith('N'):
                head = i
            elif pt == 'IN' or pt == ',':
                break
        return head

    def apply(self, sentence, mention, features):
        em1index = HeadFeature.get_head(sentence, mention.em1Start, mention.em1End)
        em1head = sentence.tokens[em1index]
        em1pos = sentence.pos[em1index]
        features.append('HEAD_EM1_%s' % get_lemma(em1head, em1pos))
        em2index = HeadFeature.get_head(sentence, mention.em2Start, mention.em2End)
        em2head = sentence.tokens[em2index]
        em2pos = sentence.pos[em2index]
        features.append('HEAD_EM2_%s' % get_lemma(em2head, em2pos))



class EntityMentionTokenFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        for i in xrange(mention.em1Start, mention.em1End):
            features.append('TKN_EM1_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
        for i in xrange(mention.em2Start, mention.em2End):
            features.append('TKN_EM2_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))

class BetweenEntityMentionTokenFeature(AbstractFeature):

    def apply(self, sentence, mention, features):
        start = mention.em1End
        end = mention.em2Start
        if mention.em1Start > mention.em2Start:
            start = mention.em2End
            end = mention.em1Start
        for i in xrange(start, end):
            if i == start:
                features.append('FIRST_TKN_BTWN_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
            if i == (end - 1):
                features.append('LAST_TKN_BTWN_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
            features.append('TKN_BTWN_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))

class ContextFeature(AbstractFeature):

    def __init__(self, window_size=1):
        self.window_size = window_size

    def apply(self, sentence, mention, features):
        # left
        for i in xrange(max(0, mention.em1Start-self.window_size), mention.em1Start):
            features.append('CTXT_EM1_LEFT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
        # right
        for i in xrange(mention.em1End, min(sentence.size(), mention.em1End+self.window_size)):
            features.append('CTXT_EM1_RIGHT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))

        # left
        for i in xrange(max(0, mention.em2Start-self.window_size), mention.em2Start):
            features.append('CTXT_EM2_LEFT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))
        # right
        for i in xrange(mention.em2End, min(sentence.size(), mention.em2End+self.window_size)):
            features.append('CTXT_EM2_RIGHT_%s' % get_lemma(sentence.tokens[i], sentence.pos[i]))


class ContextGramFeature(AbstractFeature):

    def __init__(self, window_size=1):
        self.window_size = window_size

    def apply(self, sentence, mention, features):
        start = max(0, mention.em1Start-self.window_size)
        end = min(sentence.size()-1, mention.em1End - 1 + self.window_size)
        for i in xrange(start, end):
            token1 = get_lemma(sentence.tokens[i], sentence.pos[i])
            token2 = get_lemma(sentence.tokens[i+1], sentence.pos[i+1])
            if mention.em1Start <= i < mention.em1End - 1:
                features.append('GRM_EM1_%s_%s'%(token1, token2))
            elif i < mention.em1Start:
                features.append('CTXT_EM1_LEFT_GRM_%s_%s' % (token1, token2))
            else:
                features.append('CTXT_EM1_RIGHT_GRM_%s_%s' % (token1, token2))

        start = max(0, mention.em2Start-self.window_size)
        end = min(sentence.size()-1, mention.em2End - 1 + self.window_size)
        for i in xrange(start, end):
            token1 = get_lemma(sentence.tokens[i], sentence.pos[i])
            token2 = get_lemma(sentence.tokens[i+1], sentence.pos[i+1])
            if mention.em2Start <= i < mention.em2End - 1:
                features.append('GRM_EM2_%s_%s'%(token1, token2))
            elif i < mention.em2Start:
                features.append('CTXT_EM2_LEFT_GRM_%s_%s' % (token1, token2))
            else:
                features.append('CTXT_EM2_RIGHT_GRM_%s_%s' % (token1, token2))
        # left
        # if mention.start-2 >= 0:
        #     token1 = get_lemma(sentence.tokens[mention.start-2], sentence.pos[mention.start-2])
        #     token2 = get_lemma(sentence.tokens[mention.start-1], sentence.pos[mention.start-1])
        #     features.append('CTXT_LEFT_GRM_%s_%s' % (token1, token2))
        # # right
        # if mention.end + 1 < len(sentence.tokens):
        #     token1 = get_lemma(sentence.tokens[mention.end], sentence.pos[mention.end])
        #     token2 = get_lemma(sentence.tokens[mention.end+1], sentence.pos[mention.end+1])
        #     features.append('CTXT_RIGHT_GRM_%s_%s' % (token1, token2))


================================================
FILE: code/DataProcessor/__init__.py
================================================
__author__ = 'wenqihe'



================================================
FILE: code/DataProcessor/feature_generation.py
================================================
__author__ = 'ZeqiuWu'
import sys
import os
import math
from multiprocessing import Process, Lock
from nlp_parse import parse
#from postagger_parse import parse
from ner_feature import pipeline, filter, pipeline_test
from pruning_heuristics import prune
from statistic import supertype

def get_number(filename):
    with open(filename) as f:
        count = 0
        for line in f:
            count += 1
        return count

if __name__ == "__main__":
    if len(sys.argv) != 5:
        print 'Usage:feature_generation.py -DATA -numOfProcesses -emtypeFlag(0 or 1) -negWeight (1.0)'
        exit(1)
    indir = 'data/source/%s' % sys.argv[1]
    if int(sys.argv[3]) == 1:
        outdir = 'data/intermediate/%s_emtype/rm' % sys.argv[1]
        requireEmType = True
    elif int(sys.argv[3]) == 0:
        outdir = 'data/intermediate/%s/rm' % sys.argv[1]
        requireEmType = False
    else:
        print 'Usage:feature_generation.py -DATA -numOfProcesses -emtypeFlag(0 or 1)'
        exit(1)
    outdir_em = 'data/intermediate/%s/em' % sys.argv[1]
    # NLP parse
    raw_train_json = indir + '/train.json'
    raw_test_json = indir + '/test.json'
    train_json = outdir + '/train_new.json'
    test_json = outdir + '/test_new.json'

    ### Generate features using Python wrapper (disabled if using run_nlp.sh)
    print 'Start nlp parsing'

    file = open(raw_train_json, 'r')
    sentences = file.readlines()
    numOfProcesses = int(sys.argv[2])
    sentsPerProc = int(math.floor(len(sentences)*1.0/numOfProcesses))
    lock = Lock()
    processes = []
    train_json_file = open(train_json, 'w', 0)

    for i in range(numOfProcesses):
        if i == numOfProcesses - 1:
            p = Process(target=parse, args=(sentences[i*sentsPerProc:], train_json_file, lock, i, True))
        else:
            p = Process(target=parse, args=(sentences[i*sentsPerProc:(i+1)*sentsPerProc], train_json_file, lock, i, True))
        p.start()
        processes.append(p)
    for proc in processes:
        proc.join()
    train_json_file.close()

    print 'Train set parsing done'

    file = open(raw_test_json, 'r')
    numOfProcesses = int(sys.argv[2])
    sentences = file.readlines()
    sentsPerProc = int(math.floor(len(sentences)*1.0/numOfProcesses))
    processes = []
    lock = Lock()
    test_json_file = open(test_json, 'w', 0)
    for i in range(numOfProcesses):
        if i == numOfProcesses - 1:
            p = Process(target=parse, args=(sentences[i*sentsPerProc:], test_json_file, lock, i, False))
        else:
            p = Process(target=parse, args=(sentences[i*sentsPerProc:(i+1)*sentsPerProc], test_json_file, lock, i, False))
        p.start()
        processes.append(p)
    for proc in processes:
        proc.join()

    test_json_file.close()
    print 'Test set parsing done'

    print 'Start em feature extraction'
    pipeline(train_json, indir + '/brown', outdir_em, requireEmType=requireEmType, isEntityMention=True)

    filter(outdir_em+'/feature.map', outdir_em+'/train_x.txt', outdir_em+'/feature.txt', outdir_em+'/train_x_new.txt')

    pipeline_test(test_json, indir + '/brown', outdir_em+'/feature.txt',outdir_em+'/type.txt', outdir_em, requireEmType=requireEmType, isEntityMention=True)
    supertype(outdir_em)

    ### Perform no pruning to generate training data
    print 'Start em training and test data generation'
    feature_number = get_number(outdir_em + '/feature.txt')
    type_number = get_number(outdir_em + '/type.txt')
    prune(outdir_em, outdir_em, 'no', feature_number, type_number, neg_label_weight=float(sys.argv[4]), isRelationMention=False, emDir='')

    print 'Start rm feature extraction'
    pipeline(train_json, indir + '/brown', outdir, requireEmType=requireEmType, isEntityMention=False)
    filter(outdir+'/feature.map', outdir+'/train_x.txt', outdir+'/feature.txt', outdir+'/train_x_new.txt')

    pipeline_test(test_json, indir + '/brown', outdir+'/feature.txt',outdir+'/type.txt', outdir, requireEmType=requireEmType, isEntityMention=False)

    ### Perform no pruning to generate training data
    print 'Start rm training and test data generation'
    feature_number = get_number(outdir + '/feature.txt')
    type_number = get_number(outdir + '/type.txt')
    prune(outdir, outdir, 'no', feature_number, type_number, neg_label_weight=float(sys.argv[4]), isRelationMention=True, emDir=outdir_em)



================================================
FILE: code/DataProcessor/liblinear_processor.py
================================================
__author__ = 'xiang'
import sys
reload(sys)
sys.setdefaultencoding('utf8')

def load_as_list(filename):
    """
    Load data as a list of list.
    e.g.[[0,1,2],[1,2]]
    """
    with open(filename) as f:
        data = []
        indexes = []
        line = f.readline()
        seg = line.strip('\r\n').split('\t')
        index = int(seg[0])
        features = [int(seg[1])]
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if index == int(seg[0]):  # Still in the same mention
                features.append(int(seg[1]))
            else:
                # Append to train_x
                data.append(sorted(features))
                indexes.append(index)
                features = [int(seg[1])]
                index = int(seg[0])
        if len(features) > 0:
            data.append(sorted(features))
            indexes.append(index)
        return indexes, data

def write_train_as_liblinear(train_x, train_y, filename):
    with open(filename, 'w') as f:
        for i in range(len(train_x)):
            label = str(train_y[i][0])
            f.write(label + ' ')
            tmp = []
            for feature in train_x[i]:
                tmp.append(str(feature + 1) + ':1.0')
            f.write(' '.join(tmp) + '\n')

def write_test_as_liblinear(test_x, filename):
    with open(filename, 'w') as f:
        for i in range(len(test_x)):
            f.write('-1 ')
            tmp = []
            for feature in test_x[i]:
                tmp.append(str(feature + 1) + ':1.0')
            f.write(' '.join(tmp) + '\n')

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print 'Usage: liblinear_processor.py -DATA(nyt_candidates)'
        exit(-1)

    indir = 'data/intermediate/' + sys.argv[1] + '/rm'

    train_x_file = indir + '/mention_feature.txt'
    train_y_file = indir + '/mention_type.txt'
    test_x_file = indir + '/mention_feature_test.txt'

    lib_train_file = indir + '/liblinear_train.txt'
    lib_test_file = indir + '/liblinear_test.txt'

    ### Train
    train_x = load_as_list(train_x_file)
    train_y = load_as_list(train_y_file)

    write_train_as_liblinear(train_x[1], train_y[1], lib_train_file)

    ### Test
    indexes, test_x = load_as_list(test_x_file)
    write_test_as_liblinear(test_x, lib_test_file)






================================================
FILE: code/DataProcessor/mention.py
================================================
__author__ = 'ZeqiuWu'


class RelationMention(object):
    """
    Wrap a relation mention. Each entity mention text of the relation mention is sentence.tokens[start:end].
    Attributes
    ==========
    em1Start : int
        The start index of the first entity mention.
    em1nd : int
        The end index of the first entity mention.
    em2Start : int
        The start index of the second entity mention.
    em2End : int
        The end index of the second entity mention.
    labels : string
        The label.
    """
    def __init__(self, em1Start, em1End, em2Start, em2End, numOfEMBetween, labels):
        self.em1Start = em1Start
        self.em1End = em1End
        self.em2Start = em2Start
        self.em2End = em2End
        self.numOfEMBetween = numOfEMBetween
        self.labels = labels

    def __str__(self):
        result = 'EM1 : start: %d, end: %d ; EM2 : start: %d, end: %d\n' % (self.em1Start, self.em1End, self.em2Start, self.em2End)
        for label in self.labels:
            result += label
        return result

class EntityMention(object):
    """
    Wrap a relation mention. Each entity mention text of the relation mention is sentence.tokens[start:end].
    Attributes
    ==========
    em1Start : int
        The start index of the first entity mention.
    em1nd : int
        The end index of the first entity mention.
    em2Start : int
        The start index of the second entity mention.
    em2End : int
        The end index of the second entity mention.
    labels : string
        The label.
    """
    def __init__(self, start, end, labels):
        self.start = start
        self.end = end
        self.labels = labels

    def __str__(self):
        result = 'start: %d, end: %d\n' % (self.start, self.end)
        for label in self.labels:
            result += label
        return result


class Sentence(object):
    """
    Wrap a sentence.
    Attributes
    ==========
    articleId : string
        The article id.
    sentid : string
        The sentence id.
    tokens : list
        The token list of this sentence.
    """
    def __init__(self, articleId, sentId, tokens):
        self.articleId = articleId
        self.sentId = sentId
        self.tokens = tokens
        self.entityMentions = []
        self.relationMentions = []
        self.pos = []

    def __str__(self):
        result = 'articleId: %s, sentId: %s\n'%(self.articleId, self.sentId)
        for token in self.tokens:
            result += token + ' '
        result += '\n'
        for m in self.mentions:
            result += m.__str__() + '\n'
        return result

    def add_relationMention(self, relationMention):
        assert isinstance(relationMention, RelationMention)
        self.relationMentions.append(relationMention)

    def add_entityMention(self, entityMention):
        assert isinstance(entityMention, EntityMention)
        self.entityMentions.append(entityMention)

    def size(self):
        return min(len(self.tokens),len(self.pos))




================================================
FILE: code/DataProcessor/mention_reader.py
================================================
__author__ = 'wenqihe'

import json
from mention import RelationMention, EntityMention, Sentence


class MentionReader:
    """
    Mention reader. Cache one sentence in advance.
    Attributes
    ==========
    mention_file : string
        mention file.
    current : Sentence
        current sentence.
    input : File
        input stream.
    """
    def __init__(self, mention_file):
        self.mention_file = mention_file
        self.input = open(mention_file, 'rb')
        self.current = self._decode(self.input.readline())

    def close(self):
        self.input.close()

    def has_next(self):
        """
        Check if there is more sentence to read.
        :return: true if there is more sentence to read
        """
        return self.current is not None

    def next(self):
        """
        :return: the next sentence object
        """
        result = self.current
        self.current = self._decode(self.input.readline())
        return result

    @staticmethod
    def _decode(mention_json):
        """
        Decode a json string of a sentence.
        e.g.,  {"senid":40,
                "mentions":[{"start":0,"end":2,"labels":["/person"]},
                            {"start":6,"end":8,"labels":["/location/city","/location"]}],
                "tokens":["Raymond","Jung",",","51",",","of","Federal","Way",";",
                         "accused","of","leasing","apartments","where","the","women",
                         "were","housed","."],
                "fileid":""}
        :param mention_json: string
        :return: a sentence instance with all mentions appearing in this sentence
        """
        if mention_json == '':
            return None
        # try:
        decoded = json.loads(mention_json)
        sentence = Sentence(decoded['articleId'], int(decoded['sentId']), decoded['tokens'])
        for rm in decoded['relationMentions']:
            #if len(rm['labels']) > 1:
                #print decoded['articleId'], decoded['sentId']
            sentence.add_relationMention(RelationMention(int(rm['em1Start']), int(rm['em1End']), int(rm['em2Start']), int(rm['em2End']), rm['numOfEMBetween'], rm['labels']))
        for em in decoded['entityMentions']:
            sentence.add_entityMention(EntityMention(int(em['start']), int(em['end']), em['labels']))
        if 'pos' in decoded:
            sentence.pos = decoded['pos']
        """
        if 'dep' in decoded:
            for dep in decoded['dep']:
                sentence.dep.append((dep['type'], dep['gov'], dep['dep']))
        """
        # except ValueError:
        #     print 'error in decodig JSON'
        #     print mention_json
        #     return None
        return sentence


================================================
FILE: code/DataProcessor/ner_feature.py
================================================
__author__ = 'wenqihe'

from Feature import *
import sys
from mention_reader import MentionReader
reload(sys)
sys.setdefaultencoding('utf8')

class NERFeature(object):

    def __init__(self, is_train, brown_file, requireEmType, isEntityMention, feature_mapping={}, label_mapping={}):
        self.is_train = is_train
        self.feature_count = 0
        self.label_count = 0
        self.feature_list = []
        self.feature_mapping = feature_mapping # {feature_name: [feature_id, feature_frequency]}
        self.label_mapping = label_mapping # {label_name: [label_id, label_frequency]}
        if isEntityMention:
            # head feature
            self.feature_list.append(EMHeadFeature())
            # token feature
            self.feature_list.append(EMTokenFeature())
            # context unigram
            self.feature_list.append(EMContextFeature(window_size=3))
            # context bigram
            self.feature_list.append(EMContextGramFeature(window_size=3))
            # pos feature
            self.feature_list.append(EMPosFeature())
            # word shape feature
            self.feature_list.append(EMWordShapeFeature())
            # length feature
            self.feature_list.append(EMLengthFeature())
            # character feature
            self.feature_list.append(EMCharacterFeature())
            # brown clusters
            self.feature_list.append(EMBrownFeature(brown_file))
            # dependency feature
            #self.feature_list.append(EMDependencyFeature())
        else:
            # head feature
            self.feature_list.append(HeadFeature())
            # token feature
            self.feature_list.append(EntityMentionTokenFeature())
            self.feature_list.append(BetweenEntityMentionTokenFeature())
            # context unigram
            self.feature_list.append(ContextFeature(window_size=3))
            # context bigram
            self.feature_list.append(ContextGramFeature(window_size=3))
            # pos feature
            self.feature_list.append(PosFeature())
            # word shape feature
            self.feature_list.append(EntityMentionOrderFeature())
            # length feature
            self.feature_list.append(DistanceFeature())
            # character feature
            self.feature_list.append(NumOfEMBetweenFeature())
            self.feature_list.append(SpecialPatternFeature())
            # brown clusters
            self.feature_list.append(BrownFeature(brown_file))
            if requireEmType:
                self.feature_list.append(EMTypeFeature())


    def extract(self, sentence, mention):
        # extract feature strings
        feature_str = []
        for f in self.feature_list:
            f.apply(sentence, mention, feature_str)
        #print ' '.join(sentence.tokens), feature_str
            # print f
        # map feature_names and label_names
        feature_ids = set()
        label_ids = set()
        for s in feature_str:
            if s in self.feature_mapping:
                feature_ids.add(self.feature_mapping[s][0])
                self.feature_mapping[s][1] += 1  # add frequency
            elif self.is_train:
                feature_ids.add(self.feature_count)
                self.feature_mapping[s] = [self.feature_count, 1]
                self.feature_count += 1
        #if (mention.labels) > 1:
            #print sentence.articleId, sentence.sentId
        for l in mention.labels:
            if l in self.label_mapping:
                label_ids.add(self.label_mapping[l][0])
                self.label_mapping[l][1] += 1  # add frequency
            elif self.is_train:
                label_ids.add(self.label_count)
                self.label_mapping[l] = [self.label_count, 1]
                self.label_count += 1

        return feature_ids, label_ids


def pipeline(json_file, brown_file, outdir, requireEmType, isEntityMention):
    reader = MentionReader(json_file)
    ner_feature = NERFeature(is_train=True, brown_file=brown_file, requireEmType=requireEmType, isEntityMention=isEntityMention, feature_mapping={}, label_mapping={})
    count = 0
    gx = open(outdir+'/train_x.txt', 'w')
    gy = open(outdir+'/train_y.txt', 'w')
    f = open(outdir+'/feature.map', 'w')
    t = open(outdir+'/type.txt', 'w')
    label_counts_file = open(outdir+'/label_counts.txt', 'w')
    print 'start train feature generation'
    mention_count = 0
    mentionCountByNumOfLabels = {}
    while reader.has_next():
        if count%10000 == 0:
            sys.stdout.write('process ' + str(count) + ' lines\r')
            sys.stdout.flush()
        sentence = reader.next()
        if isEntityMention:
            mentions = sentence.entityMentions
        else:
            mentions = sentence.relationMentions
        for mention in mentions:
            try:
                if isEntityMention:
                    m_id = '%s_%s_%d_%d'%(sentence.articleId, sentence.sentId, mention.start, mention.end)
                else:
                    m_id = '%s_%d_%d_%d_%d_%d'%(sentence.articleId, sentence.sentId, mention.em1Start, mention.em1End, mention.em2Start, mention.em2End)
                feature_ids, label_ids = ner_feature.extract(sentence, mention)
                if len(label_ids) not in mentionCountByNumOfLabels:
                    mentionCountByNumOfLabels[len(label_ids)] = 1
                else:
                    mentionCountByNumOfLabels[len(label_ids)] += 1
                gx.write(m_id+'\t'+','.join([str(x) for x in feature_ids])+'\n')
                gy.write(m_id+'\t'+','.join([str(x) for x in label_ids])+'\n')
                mention_count += 1
                count += 1
            except Exception as e:
                print e.message, e.args
                print sentence.articleId, sentence.sentId, len(sentence.tokens)
                print mention
                raise
    print '\n'
    print 'mention :%d'%mention_count
    print 'feature :%d'%len(ner_feature.feature_mapping)
    print 'label :%d'%len(ner_feature.label_mapping)
    sorted_map = sorted(mentionCountByNumOfLabels.items(),cmp=lambda x,y:x[0]-y[0])
    for item in sorted_map:
        label_counts_file.write(str(item[0])+'\t'+str(item[1])+'\n')
    write_map(ner_feature.feature_mapping, f)
    write_map(ner_feature.label_mapping, t)
    reader.close()
    gx.close()
    gy.close()
    f.close()
    t.close()


def pipeline_test(json_file, brown_file, featurefile, labelfile, outdir, requireEmType, isEntityMention):
    #  load feature mapping and label mapping
    feature_map = load_map(featurefile)
    label_map = load_map(labelfile)

    reader = MentionReader(json_file)
    ner_feature = NERFeature(is_train=False, brown_file=brown_file, requireEmType=requireEmType, isEntityMention=isEntityMention, feature_mapping=feature_map, label_mapping=label_map)
    count = 0
    gx = open(outdir+'/test_x.txt', 'w')
    gy = open(outdir+'/test_y.txt', 'w')

    print 'start test feature generation'
    while reader.has_next():
        if count%10000 == 0 and count != 0:
            sys.stdout.write('process ' + str(count) + ' lines\r')
            sys.stdout.flush()
        sentence = reader.next()
        if isEntityMention:
            mentions = sentence.entityMentions
        else:
            mentions = sentence.relationMentions
        for mention in mentions:
            try:
                if isEntityMention:
                    m_id = '%s_%s_%d_%d'%(sentence.articleId, sentence.sentId, mention.start, mention.end)
                else:
                    m_id = '%s_%d_%d_%d_%d_%d'%(sentence.articleId, sentence.sentId, mention.em1Start, mention.em1End, mention.em2Start, mention.em2End)
                #print mention.em1Start, mention.em1End, mention.em2Start, mention.em2End
                feature_ids, label_ids = ner_feature.extract(sentence, mention)
                gx.write(m_id+'\t'+','.join([str(x) for x in feature_ids])+'\n')
                gy.write(m_id+'\t'+','.join([str(x) for x in label_ids])+'\n')
                count += 1
            except Exception as e:
                print e.message, e.args
                print sentence.articleId, sentence.sentId
                print mention
                continue
    type_test = open(outdir+'/type_test.txt', 'w')
    write_map(ner_feature.label_mapping, type_test)
    print '\n'
    reader.close()
    gx.close()
    gy.close()


def load_map(input):
    f = open(input)
    mapping = {}
    for line in f:
        seg = line.strip('\r\n').split('\t')
        mapping[seg[0]] = [int(seg[1]), 0]
    f.close()
    return mapping


def write_map(mapping, output):
    sorted_map = sorted(mapping.items(),cmp=lambda x,y:x[1][0]-y[1][0])
    for tup in sorted_map:
        output.write(tup[0]+'\t'+str(tup[1][0])+'\t'+str(tup[1][1])+'\n')


def filter(featurefile, trainfile, featureout,trainout):
    f = open(featurefile)
    featuremap = {}
    old2new = {}
    count = 0
    for line in f:
        seg = line.strip('\r\n').split('\t')
        frequency = int(seg[2])
        if frequency>=1:
            featuremap[seg[0]] = (count,seg[2])
            old2new[seg[1]] = count
            count+=1
    print 'Feature after filter: %d'%count
    f.close()
    g = open(featureout,'w')
    write_map2(featuremap, g)
    g.close()

    # scan the training set and filter features
    f = open(trainfile)
    g = open(trainout,'w')
    for line in f:
        seg = line.strip('\r\n').split('\t')
        # features = line.strip('\r\n').split(',')
        features = seg[1].split(',')
        newfeatures = set()
        for feature in features:
            if feature in old2new:
                newfeatures.add(old2new[feature])
        g.write(seg[0]+'\t'+','.join([str(x) for x in newfeatures])+'\n')
        # g.write(','.join([str(x) for x in newfeatures])+'\n')

    f.close()
    g.close()


def write_map2(mapping, output):
    sorted_map = sorted(mapping.items(),cmp=lambda x,y:x[1][0]-y[1][0])
    for tup in sorted_map:
        output.write(tup[0]+'\t'+str(tup[1][0])+'\n')

if __name__ == "__main__":
    if len(sys.argv) != 5:
        print 'Usage:ner_feature.py -TRAIN_JSON -TEST_JSON -BROWN_FILE -OUTDIR'
        exit(1)
    train_json = sys.argv[1]
    test_json = sys.argv[2]
    brown_file = sys.argv[3]
    outdir = sys.argv[4]
    pipeline(train_json, brown_file, outdir)
    filter(featurefile=outdir+'/feature.map', trainfile=outdir+'/train_x.txt', featureout=outdir+'/feature.txt',trainout=outdir+'/train_x_new.txt')
    pipeline_test(test_json, brown_file, outdir+'/feature.txt',outdir+'/type.txt', outdir)


================================================
FILE: code/DataProcessor/nlp_parse.py
================================================
__author__ = 'ZeqiuWu'

import ujson as json
from stanza.nlp.corenlp import CoreNLPClient
from tqdm import tqdm
import sys
import time
import unicodedata
import re
from unidecode import unidecode
#from corenlp import StanfordCoreNLP


class NLPParser(object):
    """
    NLP parse, including Part-Of-Speech tagging.
    Attributes
    ==========
    parser: StanfordCoreNLP
        the Staford Core NLP parser
    """
    def __init__(self):
        self.parser = CoreNLPClient(default_annotators=['ssplit', 'tokenize', 'pos'])

        #self.parser = POSTagger(corenlp_dir+'/models/english-bidirectional-distsim.tagger', corenlp_dir+'/stanford-postagger.jar')
    def parse(self, sent):
        result = self.parser.annotate(sent)
        tuples = []
        for sent in result.sentences:
            tokens, pos = [], []
            for token in sent:
                tokens += [token.word]
                pos += [token.pos]
            tuples.append((tokens, pos))
        return tuples


def parse(sentences, g, lock, procNum, isTrain, parsePOSBeforehand=False):
    rmCount = 0
    discardRmCount = 0
    parser = NLPParser()
    posAndTokensMap = None
    if parsePOSBeforehand:
        posAndTokensMap = createPosAndTokensMap(sentences, parser)
    count=0
    buffered = []
    start = time.time()
    for line in sentences:
        sentence = json.loads(line.strip('\r\n'))
        buffered.append(sentence)
        count += 1
        if(len(buffered) == 5):
            rmCount, discardRmCount = process(buffered, parser, g, lock, isTrain, rmCount, discardRmCount)
            buffered = []
            sys.stdout.write("Process %d, parsed %d sentences, Time: %d sec\r" % (procNum, count, time.time() - start) )
            sys.stdout.flush()
    if(len(buffered) > 0):
        rmCount, discardRmCount = process(buffered, parser, g, lock, isTrain, rmCount, discardRmCount, posAndTokensMap)
    print procNum, rmCount, discardRmCount, '\n'


def process(buffered, parser, g, lock, isTrain, rmCount, discardRmCount, posAndTokensMap=None):

    for sent in buffered:
        sentText = sent['sentText']
        try:
            if not posAndTokensMap:
                tuples = parser.parse(sentText)
                pos = tuples[0][1]
                tokens = tuples[0][0]
            else:
                key = (sent['articleId'],sent['sentId'])
                pos = posAndTokensMap[key][1]
                tokens = posAndTokensMap[key][0]

            relationMentions = []
            entityMentions = []
            emStartIndexes = set()
            emIndexByText = {}
            for em in sent['entityMentions']:
                emText = unicodedata.normalize('NFKD', em['text']).encode('ascii','ignore')
                if emText not in emIndexByText:
                    start, end = find_index(tokens, emText.split())
                else:
                    offset = emIndexByText[emText][-1][1]
                    start, end = find_index(tokens[offset:], emText.split())
                    start += offset
                    end += offset
                if start != -1 and end != -1:
                    if end <= start:
                        continue
                    emStartIndexes.add(start)
                    if emText not in emIndexByText:
                        emIndexByText[emText] = [(start, end)]
                    else:
                        emIndexByText[emText].append((start, end))
                    entityMentions.append({'start':start, 'end':end, 'labels':em['label'].split(',')})
            emStartIndexes = sorted(list(emStartIndexes))
            orderByStartIdxMap = {}
            for i in range(len(emStartIndexes)):
                orderByStartIdxMap[emStartIndexes[i]] = i
            visitedEmPairs = {}
            numOfEMBetweenMap = {}
            for rm in sent['relationMentions']:
                rmCount += 1
                try:
                    start1 = -1
                    end1 = -1
                    start2 = -1
                    end2 = -1
                    em1 = unicodedata.normalize('NFKD', rm['em1Text']).encode('ascii','ignore')
                    em2 = unicodedata.normalize('NFKD', rm['em2Text']).encode('ascii','ignore')
                    if isTrain:
                        start1 = emIndexByText[em1][-1][0]
                        end1 = emIndexByText[em1][-1][1]
                        start2 = emIndexByText[em2][-1][0]
                        end2 = emIndexByText[em2][-1][1]
                    else:
                        for em1Index in emIndexByText[em1]:
                            flag = False
                            for em2Index in emIndexByText[em2]:
                                if (em1Index, em2Index) not in visitedEmPairs:
                                    start1 = em1Index[0]
                                    end1 = em1Index[1]
                                    start2 = em2Index[0]
                                    end2 = em2Index[1]
                                    flag = True
                                    break
                            if flag:
                                break
                    numOfEMBetween = 0
                    if start2 > start1:
                        numOfEMBetween = orderByStartIdxMap[start2] - orderByStartIdxMap[start1] - 1
                    elif start2 < start1:
                        numOfEMBetween = orderByStartIdxMap[start1] - orderByStartIdxMap[start2] - 1
                    if start1 != -1 and end1 != -1 and start2 != -1 and end2 != -1:
                        numOfEMBetweenMap[(start1, end1), (start2, end2)] = numOfEMBetween
                        if ((start1, end1), (start2, end2)) in visitedEmPairs:
                            visitedEmPairs[((start1, end1), (start2, end2))].update(set(rm['label'].split(',')))
                        else:
                            visitedEmPairs[((start1, end1), (start2, end2))] = set(rm['label'].split(','))
                except Exception as e:
                    discardRmCount += 1
            if len(visitedEmPairs) > 0:
                for emPair in visitedEmPairs:
                    relationMentions.append({'em1Start':emPair[0][0], 'em1End':emPair[0][1], 'em2Start':emPair[1][0], 'em2End':emPair[1][1], 'numOfEMBetween':numOfEMBetweenMap[emPair], 'labels':list(visitedEmPairs[emPair])})
            newsent = dict()
            newsent['articleId'] = sent['articleId']
            newsent['sentId'] = sent['sentId']
            newsent['tokens'] = tokens
            newsent['pos'] = pos
            newsent['relationMentions'] = relationMentions
            newsent['entityMentions'] = entityMentions
            lock.acquire()
            g.write(json.dumps(newsent)+'\n')
            lock.release()
        except Exception as e:
            print 'parse error: ', e.message, e.args
            print sent['articleId'], sent['sentId']
    return rmCount, discardRmCount

def find_index(sen_split, word_split):
    index1 = -1
    index2 = -1
    for i in range(len(sen_split)):
        if str(sen_split[i]) == str(word_split[0]):
            flag = True
            k = i
            for j in range(len(word_split)):
                if word_split[j] != sen_split[k]:
                    flag = False
                if k < len(sen_split) - 1:
                    k+=1
            if flag:
                index1 = i
                index2 = i + len(word_split)
                break
    return index1, index2

def createPosAndTokensMap(sentences, parser):
    text = ''
    ids = []
    for line in sentences:
        sent = json.loads(line.strip('\r\n'))
        ids.append((sent['articleId'],sent['sentId']))
        text += sent['sentText'].strip('\r\n')
        text += '\n'
    tuples = parser.parse(text)
    map = {}
    if len(ids) != len(tuples):
        print(len(ids),len(tuples))
        raise Exception('ids and parsed sentenses should have the same size!!!')
    for i in range(len(ids)):
        if ids[i] in map:
            raise Exception('duplicate id found: %s' % str(ids[i]))
        map[ids[i]] = (tuples[i][0], tuples[i][1])
    return map

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print('Usage: nlp_parse.py -INPUT -OUTPUT')
        exit(1)
    parse(sys.argv[1], sys.argv[2])


================================================
FILE: code/DataProcessor/pruning_heuristics.py
================================================
__author__ = 'wenqihe'

import os
import operator
import sys
from collections import defaultdict
reload(sys)
sys.setdefaultencoding('utf8')

class PruneStrategy:
    def __init__(self, strategy):
        self._strategy = strategy
        self.pruner = self.no_prune

    def no_prune(self, fileid, is_ground, labels):
        new_labels = set(labels)
        return list(new_labels)

def prune(indir, outdir, strategy, feature_number, type_number, neg_label_weight, isRelationMention, emDir):
    prune_strategy = PruneStrategy(strategy=strategy)

    type_file = open((os.path.join(indir+'/type.txt')), 'r')
    negLabelIndex = -1
    for line in type_file:
        seg = line.strip('\r\n').split('\t')
        if seg[0] == "None":
            negLabelIndex = int(seg[1])
            print "neg label : ", negLabelIndex
            break

    mids = {}
    ground_truth = set()
    count = 0
    train_y = os.path.join(indir+'/train_y.txt')
    train_x = os.path.join(indir+'/train_x_new.txt')
    test_x = os.path.join(indir+'/test_x.txt')
    test_y = os.path.join(indir+ '/test_y.txt')
    mention_file = os.path.join(outdir+ '/mention.txt')
    mention_type = os.path.join(outdir+ '/mention_type.txt')
    mention_feature = os.path.join(outdir+ '/mention_feature.txt')
    mention_type_test = os.path.join(outdir+'/mention_type_test.txt')
    mention_feature_test = os.path.join(outdir+ '/mention_feature_test.txt')
    feature_type = os.path.join(outdir+ '/feature_type.txt')
    # generate mention_type, and mention_feature for the training corpus
    with open(train_x) as fx, open(train_y) as fy, open(test_y) as ft, \
        open(mention_type,'w') as gt, open(mention_feature,'w') as gf:
        for line in ft:
            seg = line.strip('\r\n').split('\t')
            ground_truth.add(seg[0])
        # generate mention_type and mention_feature
        for line in fy:
            line2 = fx.readline()
            seg = line.strip('\r\n').split('\t')
            seg_split = seg[0].split('_')
            fileid = '_'.join(seg_split[:-3])
            labels = [int(x) for x in seg[1].split(',')]
            new_labels = prune_strategy.pruner(fileid=fileid, is_ground=(seg[0] in ground_truth), labels=labels)
            if new_labels is not None:
                seg2 = line2.strip('\r\n').split('\t')
                if len(seg2) != 2:
                    print seg2
                features = seg2[1].split(',')
                if seg[0] in mids:
                    continue
                for l in new_labels:
                    if l == negLabelIndex:  # discount weight for None label (index is 1)
                        gt.write(str(count)+'\t'+str(l)+'\t' + str(neg_label_weight) + '\n')
                    else:
                        gt.write(str(count)+'\t'+str(l)+'\t1\n')
                for f in features:
                    gf.write(str(count)+'\t'+f+'\t1\n')
                mids[seg[0]] = count
                count += 1
                if count%200000==0:
                    print count
    # generate mention_type_test, and mention_feature_test for the test corpus
    print count
    print 'start test'
    with open(test_x) as fx, open(test_y) as fy,\
        open(mention_type_test,'w') as gt, open(mention_feature_test, 'w') as gf:
        # generate mention_type and mention_feature
        for line in fy:
            line2 = fx.readline()
            seg = line.strip('\r\n').split('\t')
            try:
                labels = [int(x) for x in seg[1].split(',')]
            except:
                labels = [] ### if it's negative example (no type label), make it a []
            seg2 = line2.strip('\r\n').split('\t')
            features = seg2[1].split(',')
            if seg[0] in mids:
                mid = mids[seg[0]]
            else:
                mid = count
               # print line2
                mids[seg[0]] = count
                count += 1
            for l in labels:
                gt.write(str(mid)+'\t'+str(l)+'\t1\n')
            for f in features:
                gf.write(str(mid)+'\t'+f+'\t1\n')
    print count
    print 'start mention part'
    # generate mention.txt
    with open(mention_file,'w') as m:
        sorted_mentions = sorted(mids.items(), key=operator.itemgetter(1))
        for tup in sorted_mentions:
            m.write(tup[0]+'\t'+str(tup[1])+'\n')
    if isRelationMention:
        entity_mention_file = os.path.join(emDir+ '/mention.txt')
        triples_file = os.path.join(outdir+ '/triples.txt')
        with open(entity_mention_file, 'r') as emFile, open(triples_file, 'w') as triplesFile:
            emIdByString ={}
            for line in emFile.readlines():
                seg = line.strip('\r\n').split('\t')
                emIdByString[seg[0]] = seg[1]
            for tup in sorted_mentions:
                seg = tup[0].split('_')
                em1id = emIdByString['_'.join(seg[:-2])]
                em2id = emIdByString['_'.join(seg[:2]+seg[-2:])]
                rmid = tup[1]
                triplesFile.write(em1id+'\t'+em2id+'\t'+str(rmid)+'\n')

    print 'start feature_type part'
    with open(mention_feature) as f1, open(mention_type) as f2,\
        open(feature_type,'w') as g:
        fm = defaultdict(set)
        tm = defaultdict(set)
        for line in f1:
            seg = line.strip('\r\n').split('\t')
            i = int(seg[0])
            j = int(seg[1])
            fm[j].add(i)
        for line in f2:
            seg = line.strip('\r\n').split('\t')
            i = int(seg[0])
            j = int(seg[1])
            tm[j].add(i)
        for i in xrange(feature_number):
            for j in xrange(type_number):
                if j == negLabelIndex:  ### discount weight for None label "1"
                    temp = len(fm[i]&tm[j]) * neg_label_weight
                else:
                    temp = len(fm[i]&tm[j])
                if temp > 0:
                    g.write(str(i)+'\t'+str(j)+'\t'+str(temp)+'\n')


================================================
FILE: code/DataProcessor/statistic.py
================================================
__author__ = 'wenqihe'
import json
import sys
from collections import defaultdict

reload(sys)
sys.setdefaultencoding('utf8')


def distribution(indir):
    with open(indir+'/train_y.txt') as f, \
         open(indir+'/distribution_per_doc.txt','w') as g:
        d = defaultdict(dict)
        for line in f:
            sent = line.strip('\r\n').split('\t')
            fileid = sent[0].split('_')
            fileid = '_'.join(fileid[:-3])
            labels = sent[1].split(',')
            for index in labels:
                if index in d[fileid]:
                    d[fileid][index] +=1
                else:
                    d[fileid][index] =1
        for key in d:
            labels = [i for i in d[key] if d[key][i] >=2]
            if len(labels)>0:
                g.write(key+'\t'+",".join(labels)+'\n') 


def supertype(indir):
    with open(indir+'/type.txt') as f, \
         open(indir+'/supertype.txt','w') as g:
        mm = {}
        for line in f:
            seg = line.strip('\r\n').split('\t')
            mm[seg[0]] = seg[1] 

        for key1 in mm:
            for key2 in mm:
                if key1!=key2:
                    seg1 = key1[1:].split('/')
                    seg2 = key2[1:].split('/')
                    if len(seg1)==len(seg2)+1:
                        flag = True
                        for k in xrange(len(seg2)):
                            if seg1[k]!=seg2[k]:
                                flag = False
                                break
                        if flag:
                            g.write(mm[key1]+'\t'+mm[key2]+'\n')


    


================================================
FILE: code/Evaluation/convertPredictionToJson.py
================================================
import sys
import json

data = sys.argv[1]
predictionFile = 'data/results/'+data+'/rm/prediction_emb_retype_cosine.txt'
testJson = 'data/intermediate/'+data+'/rm/test_new.json'
predictionJson = 'data/results/'+data+'/rm/prediction.json'
mentionMapFile = 'data/intermediate/'+data+'/rm/mention.txt'
typeMapFile = 'data/intermediate/'+data+'/rm/type.txt'
threshold = float(sys.argv[2])

tid2Name = {}
with open(typeMapFile) as typeF:
  for line in typeF:
    seg = line.strip('\r\n').split('\t')
    tid2Name[seg[1]] = seg[0]
    if seg[0] == 'None':
      noneid = seg[1]

mention2id = {}
with open(mentionMapFile) as menF:
  for line in menF:
    seg = line.strip('\r\n').split('\t')
    mention2id[seg[0]] = seg[1]

mid2tid = {}
with open(predictionFile, 'r') as predF:
  for line in predF:
    seg = line.strip('\r\n').split('\t')
    mid = seg[0]
    tid = seg[1]
    if float(seg[2]) < threshold:
      tid = noneid
    mid2tid[mid] = tid

with open(testJson) as testJ, open(predictionJson, 'w') as predJ:
  for line in testJ:
    sentDic = json.loads(line.strip('\r\n'))
    aid = str(sentDic['articleId'])
    sid = str(sentDic['sentId'])
    tokens = sentDic['tokens']
    new_rms = []
    for rm in sentDic['relationMentions']:
      new_rm = dict()
      new_rm['em1Text'] = tokens[rm['em1Start']: rm['em1End']]
      new_rm['em2Text'] = tokens[rm['em2Start']: rm['em2End']]
      mention = '_'.join([aid, sid, str(rm['em1Start']), str(rm['em1End']), str(rm['em2Start']), str(rm['em2End'])])
      mid = mention2id[mention]
      if mid not in mid2tid:
        tid = noneid
      else:
        tid = mid2tid[mid]
      predicted_type = tid2Name[tid]
      new_rm['label'] = predicted_type
      new_rms.append(new_rm)
    newSentDic = dict()
    newSentDic['sentText'] = ' '.join(tokens)
    newSentDic['sentId'] = sentDic['sentId']
    newSentDic['articleId'] = sentDic['articleId']
    newSentDic['relationMentions'] = new_rms
    predJ.write(json.dumps(newSentDic)+'\n')



================================================
FILE: code/Evaluation/emb_prediction.py
================================================
__author__ = 'xiang'

import sys
reload(sys)
sys.setdefaultencoding('utf8')
sys.path.append('code/Classifier/')

import json
import os
from collections import defaultdict
from math import sqrt
import operator
from DataIO import *
from Classifier import casestudy

def sim_func(v1, v2, _MODE):
    val = 0.0
    if _MODE == 'dot':
        ### dot product:
        val = sum( [v1[i]*v2[i] for i in range(len(v1))] )
    elif _MODE == 'cosine':
        ### cosine sim:
        norm1 = sqrt(sum( [v1[i]*v1[i] for i in range(len(v1))] ))
        norm2 = sqrt(sum( [v2[i]*v2[i] for i in range(len(v1))] ))
        val = sum( [v1[i]*v2[i]/norm1/norm2 for i in range(len(v1))] )
    return val

# Embedding of different nodes
class Embedding:
    def __init__(self, file_name):
        self._embs = []
        self._node_size = 0
        self._vector_size = 0
        # load file to embedding array
        with open(file_name) as f:
            seg = f.readline().split(' ')
            self._node_size = int(seg[0])
            self._vector_size = int(seg[1])
            # self._embs = [np.zeros(self._vector_size) for i in range(self._node_size)]
            self._embs = [[] for i in range(self._node_size)]
            for line in f:
                seg = line.strip().split('\t')
                idx = int(seg[1])
                _emb = seg[2].split(' ')
                # self._embs[idx] = np.array([float(x) for x in _emb])
                self._embs[idx] = [float(x) for x in _emb]
        print 'emb:', self._node_size, self._vector_size


    def get_embedding(self, index):
        return self._embs[index]

class Network:
    def __init__(self, file_name):
        self._network = defaultdict(list)
        # load file to network dictionary
        cnt = 0
        with open(file_name) as f:
            for line in f:
                seg = line.strip('\r\n').split('\t')
                self._network[int(seg[0])].append(int(seg[1]))
                cnt += 1
        # print 'edges:', cnt

    # return list of features
    def get_neighbors(self, idx):
        return self._network[idx]


# Predict types from feature embeddings
class Predicter_useFeatureEmb:
    def __init__(self, embs_feature, embs_type, network_mention_feature, typefile, sim_func):
        self._embs_feature = Embedding(embs_feature)
        self._embs_type = Embedding(embs_type)
        assert self._embs_feature._vector_size == self._embs_type._vector_size
        self._network_mention_feature = Network(network_mention_feature)
        self._sim_func = sim_func

    # get embedding vector for a mention
    def get_mention_embedding(self, mention_id):
        # from _network_mention_feature & _emb_feature
        feature_list = self._network_mention_feature.get_neighbors(mention_id)
        if len(feature_list) == 0:
            print 'No feature for this test mention!'
        _emb_mention = [0.0 for i in range(self._embs_feature._vector_size)]
        for feature_id in feature_list:
            for i in range(self._embs_feature._vector_size):
                _emb_mention[i] += self._embs_feature.get_embedding(feature_id)[i] / float(len(feature_list))
        return _emb_mention

    # predict types given a mention embedding
    def predict_types_for_rm_maximum(self, mention_id, candidate):
        _type_size = self._embs_type._node_size
        _emb_mention = self.get_mention_embedding(mention_id)
        # calculate scores and find maximum score
        max_index = -1
        max_score = -sys.maxint
        for i in candidate:
            _emb_type = self._embs_type.get_embedding(i)
            score = sim_func(_emb_mention, _emb_type, self._sim_func)
            if  score > max_score:
                    max_index = i
                    max_score = score

        return max_index, max_score


def predict(indir, outdir, _method, _sim_func, _threshold, output, none_label_index):

    predicter = Predicter_useFeatureEmb(\
        embs_feature=os.path.join(outdir + '/emb_' + _method + '_feature.txt'), \
        embs_type=os.path.join(outdir + '/emb_' + _method + '_type.txt'), \
        network_mention_feature=os.path.join(indir + '/mention_feature_test.txt'), \
        typefile=os.path.join(indir + '/type.txt'), \
        sim_func=_sim_func)

    with open(os.path.join(indir + '/mention_feature_test.txt')) as f,\
         open(output, 'w') as g:
        mentions_ids = load_mentionids(os.path.join(indir + '/mention_feature_test.txt'))
        all_candidates = load_all_candidates(os.path.join(indir + '/type.txt'), mentions_ids)
        cnt = 0
        pos_cnt = 0
        mentions_tested = set()
        labels = []
        scores = []
        mentions = []
        for line in f:
            seg = line.strip('\r\n').split('\t')
            mention_id = int(seg[0])
            if mention_id not in mentions_tested:
                mentions_tested.add(mention_id)
                label, score = predicter.predict_types_for_rm_maximum(mention_id, all_candidates[mention_id])
                if none_label_index != None and score == 0.0:
                    label = none_label_index
                    # print 'No Feature!'
                mentions.append(mention_id)
                labels.append(label)
                scores.append(score)
                cnt += 1

        scores_normalized = min_max_normalization(scores)
        # print scores_normalized
        for i in range(len(mentions)):
            if scores_normalized[i] > _threshold:
                g.write(str(mentions[i])+'\t'+str(labels[i])+'\t'+ str(scores_normalized[i]) + '\n')
                pos_cnt += 1

        f.close()
        g.close()
    # print pos_cnt, '/', cnt, 'are detected as mentions'

def min_max_normalization(scores):
    min_score = 0.0
    max_score = 0.0
    for score in scores:
        if score > max_score:
            max_score = score
        if score < min_score:
            min_score = score
    scores_normalized = []
    for score in scores:
        score_normalized = (score - min_score) / (max_score - min_score + 1e-8)
        scores_normalized.append(score_normalized)
    return scores_normalized

def load_mentionids(filename):
    """
    Load mention id as a set.
    e.g.[[0,1,2],[1,2]]
    """
    with open(filename) as f:
        indexes = set()
        for line in f:
            seg = line.strip('\r\n').split('\t')
            indexes.add(int(seg[0]))
        return indexes

def load_candidates(filename, indexes):
    """
    Load data as a dict of list.
    e.g.{0:[0,1,2],1:[1,2]}
    """
    with open(filename) as f:
        data = defaultdict(list)
        for line in f:
            seg = line.strip('\r\n').split('\t')
            index = int(seg[0])
            if index in indexes:
                data[index].append(int(seg[1]))
        return data

def load_all_candidates(filename, indexes):
    """
    Load data as a dict of list.
    e.g.{0:[0,1,2],1:[1,2]}
    """
    type_list = []
    with open(filename) as f:
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if len(seg) == 3:
                tid = int(seg[1])
                type_list.append(tid)
    # print 'all tid: ', type_list

    data = defaultdict(list)
    for index in indexes:
        data[index] = type_list
    return data




if __name__ == "__main__":

    if len(sys.argv) != 5:
        print 'Usage: emb_prediction.py -DATA(nyt_candidates) -METHOD(retypeRM) -SIM(cosine/dot) -THRESHOLD'
        exit(-1)

    # do prediction here
    _data = sys.argv[1]
    _method = sys.argv[2]
    _sim_func = sys.argv[3]
    _threshold = float(sys.argv[4])

    indir = 'data/intermediate/' + _data + '/rm'
    outdir = 'data/results/' + _data + '/rm'

    ### Prediction
    type_file = indir + '/type.txt'
    mention_file = indir + '/mention.txt'
    json_file = indir + '/test_new.json'
    output = outdir +'/prediction_emb_' + _method + '_' + _sim_func + '.txt'
    # intext_output = outdir +'/predictionInText_emb_' + _method + '_' + _sim_func + '.txt'

    if '_neg' in _data:
        none_label_index = find_none_index(indir + '/type.txt')
        predict(indir, outdir, _method, _sim_func, _threshold, output, none_label_index)
    else:
        predict(indir, outdir, _method, _sim_func, _threshold, output, None)

    ### Write inText Results
    # mention_mapping = load_map(mention_file, 'mention')
    # label_mapping = load_map(type_file, 'label')
    # clean_mentions = load_mention_type(output)
    # casestudy(json_file, intext_output, mention_mapping, label_mapping, clean_mentions)


================================================
FILE: code/Evaluation/emb_test.py
================================================
# Script to predict and evaluate in a pipeline
__author__ = 'xiang'

import sys
from collections import  defaultdict
from evaluation import *
from emb_prediction import *

if __name__ == "__main__":

    if len(sys.argv) != 6:
        print 'Usage: emb_test.py -TASK (classify/extract) \
        -DATA(BioInfer/NYT/Wiki) -METHOD(retype) -SIM(cosine/dot) -THRESHOLD'
        exit(-1)

    # do prediction here
    _task = sys.argv[1]
    _data = sys.argv[2]
    _method = sys.argv[3]
    _sim_func = sys.argv[4]
    _threshold = float(sys.argv[5])

    indir = 'data/intermediate/' + _data + '/rm'
    outdir = 'data/results/' + _data + '/rm'

    output = outdir +'/prediction_emb_' + _method + '_' + _sim_func + '.txt'
    ground_truth = load_labels(indir + '/mention_type_test.txt')

    ### Prediction
    if _task == 'extract':
        none_label_index = find_none_index(indir + '/type.txt')
        predict(indir, outdir, _method, _sim_func, _threshold, output, none_label_index)
    elif _task == 'classify':
        predict(indir, outdir, _method, _sim_func, _threshold, output, None)
    else:
        print 'wrong TASK argument!'
        exit(1)

    ### Evluate embedding predictions
    predictions = load_labels(output)
    print 'Evalaution:'
    if _task == 'extract':
        none_label_index = find_none_index(indir + '/type.txt')
        prec, rec, f1 = evaluate_rm_neg(predictions, ground_truth, none_label_index)
        # print 'precision:', prec
        # print 'recall:', rec
        # print 'f1:', f1
    elif _task == 'classify':
        prec, rec, f1 = evaluate_rm(predictions, ground_truth)
        # print 'accuracy:', prec
    else:
        print 'wrong TASK argument.'
        exit(1)


================================================
FILE: code/Evaluation/evaluation.py
================================================
__author__ = 'xiang'
import sys
from collections import  defaultdict

def find_none_index(file_name):
    with open(file_name) as f:
        for line in f:
            entry = line.strip('\r\n').split('\t')
            if entry[0] == 'None':
                return int(entry[1])
        print 'No None label!!!'
        return

def load_labels(file_name):
    ### To Do: "None" RMs should NOT in ground_truth (double check whether we will have that)
    labels = defaultdict(set)
    with open(file_name) as f:
        for line in f:
            seg = line.strip('\r\n').split('\t')
            try:
                labels[int(seg[0])].add(int(seg[1]))
            except:
                labels[int(seg[0])].add(int(float(seg[1])))
        f.close()
    return labels

def load_raw_labels(file_name, ground_truth):
    labels = defaultdict(set)
    with open(file_name) as f:
        for line in f:
            seg = line.strip('\r\n').split('\t')
            if int(seg[0]) in ground_truth:
                labels[int(seg[0])].add(int(seg[1]))
        f.close()
    return labels

def load_label_score(file_name):
    labels = defaultdict(tuple)
    with open(file_name) as f:
        for line in f:
            seg = line.strip('\r\n').split('\t')
            try:
                if seg[2] == '-Infinity':
                    labels[int(seg[0])] = (int(float(seg[1])), 0.0)
                else:
                    labels[int(seg[0])] = (int(seg[1]), float(seg[2]))
            except:
                if seg[2] == '-Infinity':
                    labels[int(seg[0])] = (int(float(seg[1])), 0.0)
                else:
                    labels[int(seg[0])] = (int(float(seg[1])), float(seg[2]))
        f.close()
    return labels

###
def evaluate_rm(prediction, ground_truth):
    """
    Evaluation matrix.
    :param prediction: a dictionary of labels. e.g {0:[1,0],1:[2],2:[3,4],3:[5,6,7]}
    :param ground_truth: a dictionary of labels
    :return:
    """
    pos_pred = 0.0
    pos_gt = len(ground_truth) + 0.0
    true_pos = 0.0

    for i in prediction:
        # classified as pos example (Is-A-Relation)
        pos_pred += 1.0
        if i in ground_truth and prediction[i] == ground_truth[i]:
            true_pos += 1.0

    precision = true_pos / (pos_pred + 1e-8)
    recall = true_pos / (pos_gt + 1e-8)
    f1 = 2 * precision * recall / (precision + recall + 1e-8)

    # print "predicted # Pos RMs:%d, ground-truth #Pos RMs:%d"%(int(pos_pred), int(pos_gt))

    return precision,recall,f1

###
def evaluate_rm_neg(prediction, ground_truth, none_label_index):
    """
    Evaluation matrix.
    :param prediction: a dictionary of labels. e.g {0:[1,0],1:[2],2:[3,4],3:[5,6,7]}
    :param ground_truth: a dictionary of labels
    :return:
    """
    # print '[None] label index:', none_label_index

    pos_pred = 0.0
    pos_gt = 0.0
    true_pos = 0.0
    for i in ground_truth:
        if ground_truth[i] != set([none_label_index]):
            pos_gt += 1.0

    for i in prediction:
        if prediction[i] != set([none_label_index]):
            # classified as pos example (Is-A-Relation)
            pos_pred += 1
            if prediction[i] == ground_truth[i]:
                true_pos += 1.0

    precision = true_pos / (pos_pred + 1e-8)
    recall = true_pos / (pos_gt + 1e-8)
    f1 = 2 * precision * recall / (precision + recall + 1e-8)

    # print "predicted # Pos RMs:%d, ground-truth #Pos RMs:%d"%(int(pos_pred), int(pos_gt))

    return precision,recall,f1


if __name__ == "__main__":

    if len(sys.argv) != 6:
        print 'Usage: evaluation.py  -TASK (classify/extract) -DATA(nyt_candidates) -MODE(classifier/emb) -METHOD(retypeRM) -SIM(cosine/dot)'
        exit(-1)

    # do prediction here
    _task = sys.argv[1] # classifer / extract
    _data = sys.argv[2]
    _mode = sys.argv[3] # emb or classifier/method name
    _method = sys.argv[4] # emb method or null
    _sim_func = sys.argv[5] # similarity functin or null

    indir = 'data/intermediate/' + _data + '/rm'
    outdir = 'data/results/' + _data + '/rm'

    output = outdir +'/prediction_' + _mode + '_' + _method + '_' + _sim_func + '.txt'
    ground_truth = load_labels(indir + '/mention_type_test.txt')
    predictions = load_labels(output)

    if _task == 'extract':
        none_label_index = find_none_index(indir + '/type.txt')
        prec, rec, f1 = evaluate_rm_neg(predictions, ground_truth, none_label_index)
        print 'precision:', prec
        print 'recall:', rec
        print 'f1:', f1
    elif _task == 'classify':
        prec, rec, f1 = evaluate_rm(predictions, ground_truth)
        print 'accuracy:', prec
    else:
        print 'wrong TASK argument.'
        exit(1)
 






================================================
FILE: code/Evaluation/tune_threshold.py
================================================
__author__ = 'xiang'

import sys, os
from collections import  defaultdict
from emb_prediction import *
from evaluation import *

def min_max_nomalization(prediction):
	min_val = sys.maxint
	max_val = -sys.maxint
	prediction_normalized = defaultdict(tuple)
	for i in prediction:
		if prediction[i][1] < min_val:
			min_val = prediction[i][1]
		if prediction[i][1] > max_val:
			max_val = prediction[i][1]
	for i in prediction:
		score_normalized = (prediction[i][1] - min_val) / (max_val - min_val + 1e-8)
		prediction_normalized[i] = (prediction[i][0], score_normalized)
	return prediction_normalized

def evaluate_threshold(_threshold, ground_truth):
	# print 'threshold = ', _threshold
	prediction_cutoff = defaultdict(set)
	for i in prediction:
		if prediction[i][1] > _threshold:
			prediction_cutoff[i] = set([prediction[i][0]])
	result = evaluate_rm(prediction_cutoff, ground_truth)
	# print result
	return result

def evaluate_threshold_neg(_threshold, ground_truth, none_label_index):
	# print 'threshold = ', _threshold
	prediction_cutoff = defaultdict(set)
	for i in prediction:
		if prediction[i][1] > _threshold:
			prediction_cutoff[i] = set([prediction[i][0]])
	result = evaluate_rm_neg(prediction_cutoff, ground_truth, none_label_index)
	# print result
	return result

def tune_threshold(_threshold_list, ground_truth, none_label_index):
	result = defaultdict(tuple)
	for _threshold in _threshold_list:
		if none_label_index == None:
			result[_threshold] = evaluate_threshold(_threshold, ground_truth)
		else:
			result[_threshold] = evaluate_threshold_neg(_threshold, ground_truth, none_label_index)
	return result

if __name__ == "__main__":

	if len(sys.argv) != 6:
		print 'Usage: tune_threshold.py -TASK (classifer/extract) -DATA(KBP/NYT/BioInfer) -MODE (emb) -METHOD(retype) -SIM(cosine/dot)'
		exit(-1)

	# do prediction here
	_task = sys.argv[1]
	_data = sys.argv[2]
	_mode = sys.argv[3]
	_method = sys.argv[4]
	_sim_func = sys.argv[5]

	indir = 'data/intermediate/' + _data + '/rm'
	outdir = 'data/results/' + _data + '/rm'
	ground_truth = load_labels(indir + '/mention_type_test.txt')
	prediction = load_label_score(outdir + '/prediction_' + _mode + '_' + _method + '_' + _sim_func + '.txt')
	file_name = outdir + '/tune_thresholds_' + _mode + '_' + _method + '_' + _sim_func +'.txt'
	# print _data, _mode, _method, _sim_func


	step_size = 1
	prediction = min_max_nomalization(prediction)
	threshold_list = [float(i)/100.0 for i in range(0, 101, step_size)]
	# print threshold_list[0], 'to', threshold_list[-1], ', step-size:', step_size / 100.0

	if _task == 'extract':
		none_label_index = find_none_index(indir + '/type.txt')
		# print '[None] label index: ', none_label_index
		result = tune_threshold(threshold_list, ground_truth, none_label_index)
	else:
		result = tune_threshold(threshold_list, ground_truth, None)


	### Output
	prec_list = []
	recall_list = []
	f1_list = []
	threshold_list_str = []
	max_f1 = -sys.maxint
	max_prec = -sys.maxint
	max_recall = -sys.maxint
	max_threshold = -sys.maxint
	for _threshold in threshold_list:
		threshold_list_str.append(str(_threshold))
		precision, recall, f1 = result[_threshold]
		prec_list.append(str(precision))
		recall_list.append(str(recall))
		f1_list.append(str(f1))
		if max_f1 < f1:
			max_f1 = f1
			max_prec = precision
			max_recall = recall
			max_threshold = _threshold

	with open(file_name, 'w') as f0:
		for i in range(len(threshold_list_str)):
			if _method == 'line':
				f0.write(recall_list[i] + '\t' + str(float(prec_list[i])) + '\n')
			elif _method == 'retype':
				f0.write(str(float(recall_list[i])) + '\t' + str(float(prec_list[i])) + '\n')
			else:
				f0.write(recall_list[i] + '\t' + prec_list[i] + '\n')

	print 'Best threshold:', max_threshold, '.\tPrecision:', max_prec, '.\tRecall:', max_recall, '.\tF1:', max_f1


================================================
FILE: code/Model/FCM/README.md
================================================
FCM_nips_workshop
=================
Basic version of FCT model for relation extraction.
The package has two executable files: RE_FCT_fixed corresponds to the log-linear model, RE_FCT is the log-quadratic (log-binear) model
The data directory

Install:
make

Usage:
./RE_FCT[_fixed] trainfile devfile resfile baseline_embfile num-iter learning-rate

Use the following command to reproduce the results I reported:
./RE_FCT ../data/SemEval.train.fea.sst ../data/SemEval.test.fea.sst predict.fea.fullnerpair.onlyne.txt ../data/vectors.nyt2011.cbow.semeval.filtered 30 0.005 &> training.log &

Sorry that currently early-stopping should be done manually :)

I did not actually tune the learning rate much. You can try a grid search and hopefully better results can be achieved

When running on SemEval, it is better to close the sub-models with sst-pair features, 
    since when WordNet super sense tags are used instead of NE tags,
    the model will have much more number of entity type pairs and will lead to overfitting.



================================================
FILE: code/Model/FCM/all.sen
================================================
he was captured in baghdad late monday night . 
mostly they believe in those northern iraq areas brent sadler was just talking about before the break . 
that is especially true if you have a loved one in the war zone . 
he was convicted of hijacking the `` achille lauro '' in 1985 . 
it was a complete whitewash that would never have been possible without his father 's help . 
reporter : who was that nice man strolling with his wife ? 
is a key japanese life insurance unit of us conglomerate general electric co . 
one of our viewers , though , thinks there 's a sure fire way to get that al qaeda ring leader to talk . 
disabled american tourist kling klinghoffer was on board . 
but as our candy crowley reminds us , life does and must go on . 
north korea also hinted during the meetings that they have nuclear weapons and may conduct a test , a u.s. official said . 
there , you see , the big picture of the southern part of asia . 
it lies just 12 miles from baghdad and will be a key forward base for u.s. troops as they prepare for a push on the capital . 
the cruel and ruthless iraqi dictator , saddam hussein . 
no officials at the aig tokyo office or ge edison were immediately available to comment on the reports . 
tim ewart of itn news reports on evidence of atrocities at an abandoned iraqi military base . 
kennedy entered the navy earning hero status for saving his ship mates when his boat pt-109 was sunk by the japanese . 
basson was accused of directing south africa 's former apartheid regime 's biochemical efforts aimed at destroying its opponents . 
some of these ladies and gentlemen are going to be saying hello to their families . 
but the more important presence is to go after what intelligence delivers and that 's where these key leaders are located . 
tim cross , the top british member of garner 's team . 
toyota 's top posts held by cho and chairman hiroshi okuda remain unchanged . 
founded by former mayor gholamhossein karbaschi , hamshahri was quick to become iran 's biggest-selling daily with a circulation of 450,000 . 
but first , we want to go somewhere else in the region to tell you that dozens were injured today in the first suicide bombing in israel in three weeks . 
u.s. forces in baghdad are staging spot raids around that city , searching for holdout members of saddam 's regime . 
they need to send a message to their families abroad just to tell them , `` we are alive , '' says this woman . 
more than 300 iraqi soldiers have been left dead . 
romanticly strolling with his first wife on a snowy path , trendy leather coat , furry hat . 
u.s.-led forces are approaching iraq 's capital after destroying the medina and baghdad divisions of the republican guard . 
you know , as a music lover that i am , there 's five acts all playing at the same time on five different stages that i want to see . 
ge edison would be aig 's third life insurance unit in japan following alico japan and aig star life insurance co . 
here , they wait outside a restaurant where journalists hang out . 
cnn 's kelly wallace reports on today 's attack in netanya . 
and as part of that effort , u.s. special forces today raided the home of the iraqi microbiologist known as dr . 
british troops found the bodies at an abandoned iraqi military base on the outskirts of azubayr . 
i 'm christa demore ( ph ) from monrovia , maryland . 
some republican guard troops fought fiercely on the northern side of the airport complex . 
kennedy told a friend before he entered politics , you 've got to live every day like it 's your last day on earth . 
gently kissing her good-bye like a good husband at the now famous saddam hussein international airport . 
but they do believe the pictures out of baghdad today , on the one hand , send as powerful a message as you can send to the people in the arab world who have criticized this war . 
reporter : there are some suggestions after the viz it yesterday of colin powell meeting with ariel sharon , palestinian prime minister abbas said more progress may be made on implementation of the u.s. administration 's road map to peace . 
british bank barclays announced on thursday it had agreed to buy spanish rival banco zaragozano for 1.14 billion euros ( 1.29 billion dollars ) in cash . 
beijing remains the most affected part of the country , but figures show this disease is also spreading in northern china . 
we happen to be at a very nice spot by the beach where this is a chance for people to get away from cnn coverage , everything , and kind of relax 
jonathan karl , cnn , washington . 
people in baghdad celebrating . 
well , like , alabama from here , my sister 's is about six hundred miles from me , the one down there . 
central command says it is investigating iraqi allegations that u.s. attacks are to blame for hundreds of civilians casualties in hillah . 
police say a suicide bomber blew himself up outside this cafe , called the london cafe , earlier sunday afternoon . 
kiichiro toyoda founded the automaker in 1937 , transforming the loom manufacturer started by his father into an automaker . 
at least ten people have been injured and at least five homes have burned in benton harbor since the rioting began . 
20 people were killed in that state . 
happily chatting with peasants behind the wheel of his white mercedes . 
the barber at hinsville georgia larries that . 
the woman who ran iraq 's secret biological warfare laboratory . 
she wants to call her pregnant daughter saba ( ph ) in sweden to see if she has delivered . 
we are now between the lines , a no man 's land of abandoned iraqi bunkers and an unnerving silence , broken by the roar of warplanes and the distant surge of air strikes . 
it 's not clear how long the bodies had lain here , but they were clearly not from this war . 
baghdad , iraq ( ap ) 
20 million years ago , they were formed when india broke off what was then a piece of africa and slammed into that portion of asia , creating this tremendous mountain range , with of course mt . 
iraq was due to present a 30-page report on biological weapons it said it destroyed in 1991 , according to diplomatic sources in baghdad , who said a second batch of documents on anthrax would follow in the next few days . 
but city officials say several people have been arrested on civil disobedience charges . 
and i 'm curious , how can democrats claim that president bush is ignoring the war on terror if al qaeda 's second key operative was just caught ? 
u.s. troops assisting in pulling down that statue of saddam hussein . 
germ , who directed iraq 's biological weapons program . 
that 's about 60 miles south of baghdad . 
well , uh , you know , colin powell is a -- pretty moderate guy and would 've been nice to see him there or maybe -- the , uh , deputy -- secret of state . 
barry diller resigned as co-chief executive of vivendi universal entertainment , saying it was appropriate for him to step down while paris-based vivendi universal entertains bids for universal studios , universal 's theme parks and other entertainment assets . 
the highest peak of them all . 
earlier , secretary of state colin powell had told a gathering of experts from the asia-pacific region that the talks had concluded , leaving the impression there would not be a third day of discussions on friday . 
in kansas , authorities say seven people were killed when tornadoes cut across the southeastern part of the state . 
people here tell us the cafe was filled with people enjoying the spring weather . 
yeah , you had to cross a little walking , swinging bridge to get to this man 's house , you know . 
'' sooner or later iraq will fall and russia and the united sates will resume normal relations , '' the senior moscow official , who was reported to be close to putin , told the paper . 
garner 's office of reconstruction and humanitarian assistance is charged with coordinating humanitarian assistance , rebuilding infrastructure shattered by years of war and u.n. sanctions , and gradually handing back power to iraqis leading a democratically elected government . 
there was no immediate claim of responsibility for monday 's attack at the shaarei amakim mall in the working class town of afula , which has been targeted repeatedly by palestinian militants because of its proximity to the west bank . 
her research was conducted here at a location well-known to u.n. arms inspectors . 
inside a neighboring building , there was evidence of cells and a catalog of photographs of the dead . 
across the fence , a 21-year-old u.s. marine named jason cook ( ph ) is after the same phones . 
another key mission for the 173rd could be to keep order in northern iraq , which is controlled by two semi-autonomous kurdish factions but also includes several splinter groups and a base for the al-qaida-linked ansar al-islam . 
even as the secretary of homeland security was putting his people on high alert last month , a 30-foot cuban patrol boat with four heavily armed men landed on american shores , utterly undetected by the coast guard secretary ridge now leads . 
these are videotapes from the '80s found in a palace of saddam hussein 's first wife . 
in the second apache helicopter airstrike that occurred within the past hour or so , we 're now told by palestinians , three people died , 33 others were wounded , six are in critical condition , three dead were civilians , according to palestinian sources . 
one person was arrested for assault with a deadly weapon . 
as we told you at the top of this newscast , an oklahoma judge has ruled that terry nichols will indeed stand trial in that state court on 160 counts of first-degree murder . 
do you travel to meet up with family or friends during the holidays ? 
'' the acquisition of banco zaragozano builds on our existing business creating the sixth largest private sector banking group in spain '' by assets , added jacobo gonzalez-robatto , chief executive of barclays spain . 
many here though say they are relieved only one person dead , the suicide bomber . 
life on the homefront seems the same at farmer 's market in los angeles , but it 's more angst ridden . 
they 're going to be defended fanatically but intelligence is so crucial from aerial platforms and special ops personnel to give the three assets the intel which they need to close with and eliminate final remnants of the regime in those citys . 
but u.s. state department officials , asking not to be identified , said there may be more meetings on friday . 
he wants to call his mom in houston . 
reporter : taha then described the success of their tests using an anthrax sim lant and artillery rockets . 
the acquisition of ge edison is expected to boost aig 's annual life insurance premium revenue in japan to about 1.1 trillion yen , ranking it sixth in the japanese life insurance business . 
the oil capital of northern iraq is visible behind me , just over there , a harmless flare of burning gases is easily identifiable . 
i 'm kate in pennsylvania . 
two southwest airline pilots were reportedly fired for stripping down . 
the oklahoma city bombing conspirator is already serving a life term in federal prison . 
the pentagon says that they do have air dominance as they now call it over the vast majority of the country that there are certain areas and only certain areas of downtown iraq where battle management radars linked to surface to air missiles are still in tact . 
this is judy from mayfield , kentucky . 
what i should tell you is there are about 15 possible routes to get you to the top of mt . 
one of the sites proving to be the most deadly was the law enforcement center in the western tennessee city of jackson . 
a member of the international committee of red cross visited the local hospital there , and he says it 's a horrible scene . 
a `` new york times '' reporter lends cook his mobile sat phone . 
diller was appointed interim ceo of vivendi universal 's u.s.-based entertainment assets last year while vivendi universal concentrated on repaying a massive debt . 
dozens , though , injured , including some israeli soldiers , because according to an eyewitness , he said many soldiers were here also enjoying the spring weather . 
reporter : and she reveals success in a 1989 test , using aerial bombs to disperse biological agents . 
`` usa today '' says the pilots were dismissed for allegedly taking off all or most of their clothes in the cockpit . 
but , uh , at at least for the ceremony in egypt i think , uh , i can understand that no one would want to go into -- palestinian territory . 
the members of the board of zaragozano have voted in favour of the transaction and have committed to sell their shares to barclays . 
sheik ayad jamal al din , a shiite religious leader from nasiriyah , urged the delegates to craft a secular government , according to a pool report . 
the ruling by judges piet streicher and mohamed navsa of the supreme court of appeal , the highest appeals court in the country , means that basson can not be tried again and his acquittal stands , the south african press association reported . 
my boyfriend has been watching the tv like every single day , every time it comes on , he 's watching it . 
iraq 's precious oil field remain intact . 
( on camera ) : today 's discovery seems to provide shocking evidence of atrocities under saddam hussein 's regime . 
mike boettcher , cnn , baghdad . 
an emergency official tells cnn that ten people died there . 
now , why has our president placed homeland security in the hands of republican political hacks instead of professionals , by the way ? 
across the arab world , the administration hopes to send a signal that the united states is there to help the iraqi people regain control of their own country , as a liberty , not as any imperial or occupying force . 
a u.s. official said the north koreans implied during the talks that they have nuclear weapons and that they may conduct a test . 
tariq aziz 's sun ziad looks worried . 
u.s. forces tightened their grip throughout baghdad , entering the city for the first time from the north , seizing a military airport and battling snipers and republican guard fighters . 
a texas woman is in jail this morning accused of killing her two sons and severely beating another the day before mother 's day . 
iraqi exile hatem mukhliss quoted president john f . 
a father in new york is accused of dropping his baby out of a seven - story apartment window , here 's jordena ginsberg with more from affiliate news-12 westchester . 
the associated press is reporting that u.s. troops have launched an attack on saddam international airport . 
i 'm ben charlton ( ph ) from east lansing , michigan . 
sam waksal has been sentenced to more that seven years in prison , ordered to pay $ 4 million in fines and back taxes in connection with the insider trading scandal . 
but when they do , british soldiers believe , more horrors will come to light . 
prosecutors say longo stood his family because they stood before him and the life he craved . 
tense of thousands of additional u.s. troops are getting ready to join coalition forces fighting the war in iraq . 
amid talk of delaying or even shelving a us-british resolution giving iraqi president saddam hussein until monday to disarm or face war , oil prices in asia pulled back , shares rose and the us dollar eased . 
as we continue to watch air force one , now on the ground outside belfast , northern ireland , we 'll be watching president bush emerge from the plane momentarily . 
about nine miles from the iraqi capital . 
joining us on the phone is the mayor of jackson , tennessee , charles farmer . 
according to the official , a north korean nuclear detonation would deplete by half their estimated stockpile of two weapons . 
tim ewart , itv news , azubayr . 
the afula explosion _ the 95th in 32 months of fighting _ went off at 5:14 p.m. ( 1414 gmt ) at the entrance to the mall , reportedly as shoppers were waiting in line for a security check . 
vivendi also is holding talks with viacom , mgm and other buyers interested in acquiring all or parts of its entertainment companies . 
shareholders , including board members , owning 54 percent of the ordinary share capital of zaragozano have given irrevocable commitments to accept the offer . 
iraq 's armed opposition , including these kurds , waits on the sidelines to join the fray , if america asks . 
the comments indicate that russia 's nuanced position over the war in iraq was becoming ever more scrambled , with putin pushing to protect his budding friendship with us president george w . 
turkey has said it may send more troops into northern iraq to prevent refugees from moving north , while u.s. officials have said they advised turkey against sending large additional forces into northern iraq . 
the alleged intended victims included former president nelson mandela and several african national congress leaders who are now high-ranking government officials . 
meanwhile , the u.s. military faced sharp criticism over assaults that killed three foreign journalists . 
the baby 's aunt did n't want her face shown on tv , but told us she was stunned . 
i 've talked to people from pennsylvania , yes . 
secretary of homeland security tom ridge is another career politician who was passed over by mr . 
mayor farmer , can you give us the latest on your town . 
the first departures are already underway at fort hood texas . 
pentagon officials briefed reporters who refused to comment about any activity at the airport . 
he says his father 's night-time handover was dignified , that u.s. forces offered medical support for his father 's heart condition , that has caused two heart attacks recently . 
u.s. troops work to clear that area of iraqi weapons , coming up . 
copenhagen , denmark ( ap ) 
death of arafat ( 2 ) plo leader plo leader yasser arafat died in a paris hospital last week , after a lot of controversy surrounding his illness and death . 
diller served as co-ceo with vivendi universal chairman jean-rene fourtou . 
defense secretary rumsfeld and vice president cheney among those saying the toughest fighting might still lie ahead , still unanswered questions about the whereabouts of saddam hussein and other key members of the iraqi leadership . 
hanoi , vietnam ( ap ) 
there had been many terrorist attacks here in netanya , including the deadliest suicide bombing ever in israel almost exactly a year ago , march 27 , at the park hotel during passover , 29 people killed in that attack . 
and , says the iraqi opposition , in winning this war . 
by u.s. forces and which appears to be run by members of the the war began in iraq several weeks ago . 
outside , u.s. troops provide occasional protection . 
reporter : now , willie williams , the little girl 's father is charged with attempted murder . 
cnn 's jamie kolbe is there reporting , jamie on these families who are saying good-bye to their loved ones . 
bush in the face of strident opposition from the russian media and other top kremlin officials . 
in gaza , a pair of israeli gun ship attack , aerial attacks that have threatened to derail the road map to peace , as it is called . 
now to central iraq , where coalition forces say they are now in charge . 
i actually responded a few hours after the sheriff 's department did . 
palestinian security forces returned monday to the positions they held in the gaza strip before the outbreak of the 33-month palestinian uprising as israel removed all major checkpoints in the coastal territory , a palestinian security source said . 
pentagon officials said tuesday they did n't know whether iraqi president saddam hussein died in the u.s. airstrike on a baghdad neighborhood the day before . 
our founder here at cnn , ted turner , has sold more than half 0 his stake in aol time warner . 
the reason that they 're still in tact according to today 's briefing was that they had not turned them on and as long as they do n't turn them on , the u.s. has trouble locating them . 
witnesses say iraqi troops are dug in trying to fend off coalition troops making their way into the capital 's main airport . 
and we must not underestimate the desperation of whatever forces remain loyal to the dictator . 
earlier monday , a 19-year-old palestinian riding a bicycle detonated a 30-kilo ( 66-pound ) bomb near a military jeep in the gaza strip , injuring three soldiers . 
should president bush or a senior us official have attended arafat 's funeral ? 
another neighbor comes to our car to give his opinion . 
brent sadler , cnn , near kirkuk in northern iraq . 
the area around mosul described as uncertain in terms of its stability with deliberate agitation . 
as for this attack , the palestinian militant group islamic jihad is claiming responsibility . 
here , the remains of an iraqi armored car , and by its side the body of an iraqi soldier , all that remains of a column that pushed up the airport road last night in a final effort to defend the airport . 
but first , our own little moonwalk across the political landscape , the `` crossfire political alert . '' the foreign ministers of france , germany and russia today issued a joint declaration that they will not allow a second american-backed resolution to pass the united nations security council . 
and deputy secretary of homeland security asa hutchinson is yet another career politician and a graduate of the disgraceful bob jones university . 
the caches were apparently abandoned by retreating iraqi forces . 
the takeover , which is subject to approval by 75.01 percent of zaragozano shareholders , is expected to be completed in july . 
in beijing , chief u.s. negotiator , assistant secretary of state james kelly , declined to answer questions , saying only the sides `` had talks . '' 
iraqi opposition leaders fear the united states is trying to force ahmed chalabi , head of the london-based umbrella iraqi national congress , on them as leader of a new iraqi administration . 
`` the los angeles times '' reporting that israeli and palestinian officials may be very close to reaching a deal for security control , not just of a small part of gaza , but returning all of the gaza strip to palestinian control , that 's a very significant step , indeed , plus the west bank city of bethlehem . 
should israel have let arafat be buried in jerusalem ? 
what sir edmund hillary did and norgay did was go along this route that takes them across this valley -- you have to turn the picture this way a little bit , shoot back over this range , and bring you up in here . 
i 'm in the middle of baghdad , just patrolling up and down the streets . 
u.s. marines battled snipers as they fought deeper into the capital from the east . 
they were collecting evidence as you were there , the law enforcement authorities ? 
his friend adds , if all the leaders stayed in baghdad , then the americans will catch them . 
former danish international stig toefting dropped his appeal of an assault conviction and will serve a four-month jail sentence , denmark 's tv2 reported monday . 
we 're paying attention to all those details for us , chris plante at the pentagon . 
seven people convicted last week in vietnam 's biggest-ever criminal trial , including two former senior government officials , have requested an appeal of the verdicts , a court official said tuesday . 
u.s. troops are now knocking on the door of baghdad . 
hello , th - i 'm marco from illinois . 
police say it all started at around 11:30 saturday night when williams got into a fight with the baby 's mother jasmine gil yo . 
175 people have died trying to reach the summit of mt . 
they seized the rasheed airport and captured enough ammunition for an estimated 3,000 troops . 
in the first attack , a car carrying rantissi , a senior member of the hamas group , a militant organization , was targeted . 
cook has n't spoken to his family since super bowl sunday , january 26 . 
the group is also claiming that islamic jihad militants are already inside iraq to carry out attacks against u.s. and british forces . 
a reuters correspondent said dozens of iraqi civilians and soldiers were killed in what witnesses called a barrage of u.s. artillery . 
with u.s. forces rolling closer and closer to baghdad , the military seems to be zeroing in on the ground war . 
hi , i 'm mike from new jersey . 
who is in kathmandu . 
`` now that vivendi universal has begun a formal process in reviewing options for its entertainment assets , it is appropriate to step aside from any direct management responsibility . '' 
he wanted to date her again and she was n't interested . 
one such cache turned up in the city of karbala , a strategic military point about 60 miles to the south of baghdad . 
now in your administration , the clinton administration , there were these members of the cabinet who by your definition were professional politicians -- lloyd bentsen , les aspin , william s . 
israel holds the palestinian leader responsible for the latest violence , even though the recent attacks were carried out by islamic militants . 
a bomb exploded today in a courtroom in southern yemen , injuring four people , one of them a judge . 
british efforts to provide a way out of the diplomatic mire at the united nations ended without agreement late thursday , but with london 's ambassador to the un , jeremy greenstock , saying he expected talks to go on into the weekend . 
malaysia 's prison department has agreed to allow jailed politician anwar ibrahim to attend his daughter 's wedding ceremony friday after his bail hearing , his lawyer said thursday . 
that 's when police say he grabbed their daughter . 
coming up , coalition forces control several parts of southern iraq but they still face some serious problems including pockets of resistance . 
police officers took up their positions across the gaza strip , enjoying a freedom of movement they have not known since the intifada exploded in september 2000 , the source said . 
let 's check in with cnn 's gary tuchman at an air base near iraq . 
( on camera ) : the patchy , somewhat disorganized defense of the airport last night has left coalition commanders with several questions . 
well , what - where - how far do you travel for your holiday ? 
ominously , they also took a prison where they found u.s. army uniforms and chemical weapons suits possibly belonging to american pows . 
allan chernoff live from the new york stock exchange with more . 
president bush wants the security council to explicitly authorize force against iraq , and his press secretary today remained optimistic that in the end , that 's just what he 'll get . 
on this one sidewalk in baghdad , they were all doing the most natural thing during war , trying to reach their loved ones . 
toefting was convicted in october 2002 of assaulting a pair of restaurant workers during a night out with national squad teammates in the capital , copenhagen . 
when walter rodgers , our embedded reporter with the 3-7th cavalry says three battalions of troops are on the ground , inside baghdad itself , have taken up positions they 're going to spend the night there presumably , how many soldiers are we talking about right now ? 
then the baby 's mother made a frantic call to the police . 
we come to you from a base near the iraqi border . 
chinese foreign ministry spokesman liu jianchao refused to provide details , but said the talks were `` conducive to mutual understanding and finding ways to resolve the north korean nuclear question peacefully . '' 
aol time warner , of course , our parent company . 
analysts have long argued that putin is far keener to preserve friendly relations with the united states than the pro-european , anti-war camps embedded in much of the russian media and the foreign and defense ministries . 
cnn 's ryan chilcote is embedded with the 101st airborne division , and he has the story . 
go to tennessee to see my , uh , two sisters . 
a man with a pistol was arrested outside of the courthouse and police say he admits planning the bombs . 
the biggest problem tariq aziz ' family says they now face is that aziz is now safely in u.s. custody , they do n't know how safe they are out on baghdad 's streets . 
'' palestinian police have taken over all the posts and positions they held before september 28 , 2000 . 
since marching into iraq , coalition troops have engaged in some fierce battles with saddam hussein 's forces . 
and these bozos let four armed cubans land on our shores when they 're trying to make a high terrorist alert . 
chalabi was the first top iraqi opposition leader to be airlifted by the u.s. military into southern iraq as the fighting wound down , and he and other top members of his group plan to meet soon in baghdad . 
many pilots are flying these a-10 attack planes behind me . 
some army units routed iraqi fighters from a republican guard headquarters inside the capital . 
my brother go - we go together . 
i 'm kelly wallace , cnn , reporting from netanyahu , israel . 
then police say the baby 's mother pulled out a kitchen knife . 
the seven include former state radio director tran mai hanh and former vice national chief prosecutor pham sy chien , both convicted of receiving bribes , the court official said on condition of anonymity . 
nic robertson , cnn , baghdad . 
the unrest continues there and this follows up the shooting we talked about yesterday in which at least seven iraqis were killed by u.s. fire after they apparently came under fire trying to secure a certain part of the central part of mosul . 
the president , quoted by ari fleischer , his spokesman , is saying he 's concerned the strike will undermine efforts by palestinian authorities to bring an end to terrorist attacks and does not contribute to the security of israel . 
hamas said monday it has no intention of halting attacks , despite egypt 's efforts to have palestinian militant groups agree to a one-year suspension of shootings and bombings . 
it is the equipment of choice for a task force like this to go into baghdad . 
they also fly f-16s . 
bill gates and dick parson , the ceos of these two companies , smoking a peace pipe right now . 
chitchat in the time life building here at aol and all over wall street , ted turner selling 60 million shares for $ 800 million , more than half of his stake in the company , he opens 40 million plus now . 
it was the same courtroom where a suspected al qaeda militant got a death sentence last week for killing three american missionaries . 
my brother , his wife , my husband and myself . 
discussions were scheduled to end friday , when kelly was to fly to seoul the same day to meet with south korean officials . 
our president has put homeland security in the hands of failed republican hacks . 
u.s. officials said chalabi was brought in because he offered forces to the coalition . 
tv2 , quoting toefting 's lawyer anders nemeth , reported that that 33-year-old midfielder was ready to serve his sentence immediately . 
at that point the baby 's mother stabbed him right in the hand . 
reporter david bowden is with the british forces in umm qasr in southern iraq . 
it can detect enemy radar or missiles and destroy them instantly . 
and you were talking about the makers , where some of these weapons are coming from , and you were expressing surprise to me . 
it is the first time they have had freedom of movement with cars and weapons since the start of the intifada , '' the source said . 
anwar will be taken to the appeal court early friday for a bail application pending his appeal to the country 's highest federal court againt his sodomy conviction , counsel sankara nair said . 
others discovered a 12-room complex inside a cave , complete with white marble floors and fluorescent lighting . 
up next , pitching great roger clemens talks about his experiences meeting u.s. service members in afghanistan and the persian gulf . 
then you spend the day there ? 
aol time warnerings our corporate parent , will get $ 750 million from microsoft . 
these are probably mechanized heavy task forces that have overhead aerial platforms to provide direct support . 
earlier this month , toefting said english premier league club bolton let him go so he could prepare for the appeal . 
a majority of these are coming from -- ( unintelligible ) on this box , but jordan and france . 
still bleeding , he ran downstairs and scooped up the baby , and took the baby to the hospital . 
several news organizations have filed for -- their reporters had with scott peterson . 
clearly some frustration with the sharon government in israel in the wake of these attacks in gaza . 
also , news about another half-broth of saddam hussein apparently now apprehended overnight in baghdad with the i assistance of local iraqis . 
we can tell you that a lot of pilots of this base are not flying to another base in iraq . 
so it 's ok -- it 's ok to have professional politicians at the justice department and the pentagon ... 
they were a big part in `` operation iraqi freedom . '' you are seeing the pictures , everybody is home . 
diller , 61 , remains chief executive officer of usa interactive , which operates the home shopping network , ticketmaster , match.com and expedia , among other companies , and has a 5.4 percent stake in vue . 
president bush is said to be focusing on one fence-sitting security council member , the african nation of guinea , because he said -- quote -- `` i used to have one of their pigs . '' so -- ok , i made that up . 
the protesters held banners written in english and arabic saying the `` hawza , '' or the shiite religious seminary in najaf , represents them . 
in a renewed mediation attempt , egyptian envoys have been holding meetings in gaza and damascus in recent days with leaders of the militant groups . 
the trial by a ho chi minh city court was seen as a litmus test of the communist government 's resolve to fight widespread corruption . 
the question , i 'm an aol shareholder sitting at home , hearing this news , done this set off a few alarms ? 
it was unclear whether toefting would return to bolton after serving his sentence . 
the north retaliated by withdrawing from the nuclear non-proliferation treaty , expelling u.n. inspectors and restarting a plutonium-producing reactor . 
his us counterpart , john negroponte , said : `` i ca n't say we are much farther along today than we were yesterday , '' after hours of talks on six british benchmarks to test iraq 's compliance with council demands . 
i got an older brother and a little brother and sister . 
every 26 months , mars and earth fly in close orbital formation . 
i always think about that like -- or a lot of i think college students think about that like with some office jobs , you know ? 
14 fathers have not seen their babies . 
not sure where in baghdad , personal the person western neighborhood of mansour . 
yes , and , uh , christmas eve we spend - my husband and i spend with my mother-in-law and her - and his family . 
have they done so much damage now to iraqi command and control centers that they can no longer put up a proper fight , or are the best troops simply withdrawing into the city , to fight there street by street ? 
it 's a base that was taken over by the coalition last week . 
diller will continue to play a critical role in the future of vivendi 's entertainment arm . 
toefting transferred to bolton in february 2002 from german club hamburg . 
rula amin , cnn , baghdad . 
a man in new york is facing attempted murder charges after allegedly throwing his baby seven stories to the ground below . 
my husband 's family . 
`` we will press for any iraqi civilian administration regardless of what the americans say . 
they all reunited with their families . 
secretary of state colin powell today responded to saddam hussein 's latest attempts to fool the world . 
peterson was arrested for the murder of his wife and unborn son after their bodies washed ashore in california . 
and we spent part of the day at that base . 
he personally owns 1.5 percent of the joint venture . 
we have a very scattered , limited , and russian equipment , but like i said , france and jordan are the main suppliers right here . 
russia 's foreign minister expressed outrage at suggestions from a top washington official last week that moscow should forgive the eight billion dollars in soviet-era debt that baghdad owes it , as a gesture of good will . 
he was sentenced to face a firing squad , and his lawyer has said he also plans to appeal . 
jordeena ginsburg from news 12 westchester has the story . 
as of an hour or so ago , we got word from the battlefield that u.s. forces were just 15 miles away from the southern edge of baghdad . 
welcome back to `` american morning . '' overcaes -- overseas , president bush will leave france in a few hour 's time after a second meeting face-to-face in private with french president jacque chirac today . 
this is talil , iraq . 
the answer to that could well decide how quickly they make a final push into the city . 
the armed groups say they might agree to a truce if israel promises to stop hunting militants _ a proposal israeli prime minister ariel sharon has turned down . 
interesting point here also , people are suggesting he may be raising this money to buy those atlanta sports teams that apparently are on the block . 
diller also owns preferred stock in vivendi universal entertainment . 
two americans who gave their lives to operation iraqi freedom were buried at arlington national cemetery today . 
an administration by garner is not acceptable , '' said mowaffak al-rubaie , an iraqi physician and opposition activist . 
in 1999 he was given a 20-day suspended sentence for assaulting a fan who berated him for playing with german club duisburg . 
yeah , well , you got to make your employees happy , otherwise you 're not going to come out with the next product . 
so that means we can bring more pictures , more information about mars back to the people of the earth , in this particular landing opportunity . 
welcome back n case you 're just tuning in , our top story this hour , the discovery of suspect chemical weapons materials at an agricultural complex in central iraq . 
the toughest fight , though , may lie ahead in the heart of the iraqi capital . 
earlier we talk about a new book claiming that president john kennedy had an affair with a white house intern early 1960s . 
house clearance royal marine style , troops from ( unintelligible ) commando task to seek out the last pockets of iraqi resistance in umm qasr go in hard to arrest suspected regime sympathizers and search for weapons . 
but the are no more iraqis here , the coalition took over this base last week . 
prison authorities have given the nod for anwar to be taken home later in the afternoon to marry his eldest daughter , nurul izzah , to engineer raja ahmad sharir iskandar in a traditional malay ceremony , he said . 
james mates , itv news , with the u.s. marines , south of baghdad . 
it 's -- yes , it 's extremely dangerous , because once you enter these buildings , we found , actually , enemy , you know , guarding these sites . 
it 's ok to put democratic career politicians at the pentagon and the justice department if they 're democrats but not if they 're republicans , is that right ? 
reporter : police measure the spot where ten months old miana williams landed after she was thrown from a seventh floor window at this apartment building . 
the tests were designed to sway six uncommitted council members behind a draft resolution which would declare that iraq has missed its last chance to disarm peacefully unless the council decides otherwise by monday . 
powell called iraqi steps toward disarmament -- quote -- `` too little , too late '' gestures , meant to split international resolve to force baghdad to give up its weapons of mass destruction . 
marine lance corporal patrick nixon was one of 17 killed in an ambush near nasiriyah on march 23 . 
they 're not going to work so hard for ya . 
a hamas leader , abdel aziz rantisi , said monday that `` legitimate resistance will continue as long as occupation exists on our holy land . '' 
diller also is believed to be interested in his own bid for one or more of the entertainment unit 's assets , most likely the film studio . 
coalition fighters , though , use a different kind of shield . 
you might remember last week when another half-brother of saddam hussein was picked up trying to cross into syria and that time , we have been told syria has closed its border with iraq to prevent any of that ongoing . 
embattled imclone founder sam waksal sentenced to more than seven years in prison for insider trading . 
following the withdrawal of israeli troops from the northern town of beit hanoun late sunday , police took up positions there and in the neighbouring town of beit lahiya , he said . 
this is the first time her father got to hold her and look in her eyes since he came back from `` operation iraqi freedom . '' 130 sailors really excited to be home back on land after being at sea for nearly 10 months carrying out `` operation iraqi freedom . '' some remarkable stories . 
he said american officials have outlined what garner 's administration would look like : each ministry would be headed by an american , either military or civilian . 
pentagon sources say the first phase of the final push into baghdad is now under way . 
he and his loyal finance minister suggested friday that moscow might be prepared to waiver some of the iraqi debts in return for better russia-us relations . 
and idaho air national guardsman major gregory stone was buried with honors at arlington . 
talking about bill bennet this morning one of the country 's leading crusaders for morality author of `` virtues '' and admits he has a gambling problem . 
troops from the u.s. army 's 101st airborne division went to the site on friday , finding a number of large drums buried in bunkers . 
in today 's daily news , a 60-year-old woman acknowledges she was the 19-year-old white house intern who slept with jfk . 
the baby 's aunt did n't want her face shown on tv but told us she was stunned . 
north korea , which is seeking a guarantee from the united states that it wo n't be attacked , ratcheted up the threat of conflict in a statement issued by its official korean central news agency accusing washington of escalating tensions . 
you know you are in iraq when you drive down the road coming into the base and see the portrait of saddam hussein . 
they also deployed along the border with israel . 
diller has maintained his top interest is serving as head of usa interactive . 
they are coalition bodyguards over baghdad , an airborne shield to u.s. marines . 
first , to the pentagon and barbara starr to pick up what 's happening . 
reporter : now willie williams the girl 's father is qharthd attempted murder . 
we 're looking at now , a sailor seeing his baby for the first time , just remarkable . 
the north 's leaders fear they could be the next target for u.s. military action . 
attacking on the ground and calling in air strikes , kurdish fighters and u.s. troops have dislodged stubborn iraqi soldiers at a key bridge in northern iraq . 
their more intriguing argument is that one of the charges against malvo is under virginia 's anti-terrorism statute and they argue that every resident of fairfax county is potentially a member of the victim 's class . 
we have given an assurance that there will not be a huge crowd at the house as the guests will only comprise of family members , '' he added . 
waksal admitted advising his daughter to dump her stock , stewart is being accused of unloading her stock after also l

Download .txt

gitextract_ogmutb0y/

├── .gitignore
├── LICENSE.txt
├── README.md
├── code/
│   ├── Classifier/
│   │   ├── CLPL.py
│   │   ├── Classifier.py
│   │   ├── Classifier_em.py
│   │   ├── DataIO.py
│   │   ├── HierarchySVM.py
│   │   ├── Logistic.py
│   │   ├── MulticlassSVM.py
│   │   ├── PLSVM.py
│   │   ├── Perceptron.py
│   │   ├── SVM.py
│   │   ├── TypeHierarchy.py
│   │   ├── liblinear.py
│   │   ├── liblinear.so.3
│   │   └── liblinearutil.py
│   ├── DataProcessor/
│   │   ├── Feature/
│   │   │   ├── __init__.py
│   │   │   ├── abstract_feature.py
│   │   │   ├── brown_feature.py
│   │   │   ├── dependency_feature.py
│   │   │   ├── em_brown_feature.py
│   │   │   ├── em_dependency_feature.py
│   │   │   ├── em_other_feature.py
│   │   │   ├── em_token_feature.py
│   │   │   ├── other_feature.py
│   │   │   └── token_feature.py
│   │   ├── __init__.py
│   │   ├── feature_generation.py
│   │   ├── liblinear_processor.py
│   │   ├── mention.py
│   │   ├── mention_reader.py
│   │   ├── ner_feature.py
│   │   ├── nlp_parse.py
│   │   ├── pruning_heuristics.py
│   │   └── statistic.py
│   ├── Evaluation/
│   │   ├── convertPredictionToJson.py
│   │   ├── emb_prediction.py
│   │   ├── emb_test.py
│   │   ├── evaluation.py
│   │   └── tune_threshold.py
│   └── Model/
│       ├── FCM/
│       │   ├── README.md
│       │   ├── all.sen
│       │   ├── code/
│       │   │   ├── BaseComponentModel.cpp
│       │   │   ├── BaseComponentModel.h
│       │   │   ├── Commons.h
│       │   │   ├── EmbeddingModel.cpp
│       │   │   ├── EmbeddingModel.h
│       │   │   ├── FctCoarseModel.cpp
│       │   │   ├── FctCoarseModel.h
│       │   │   ├── FctConvolutionModel.cpp
│       │   │   ├── FctConvolutionModel.h
│       │   │   ├── FctDeepModel.cpp
│       │   │   ├── FctDeepModel.h
│       │   │   ├── FeatureModel.cpp
│       │   │   ├── FeatureModel.h
│       │   │   ├── FullFctModel.cpp
│       │   │   ├── FullFctModel.h
│       │   │   ├── Instances.cpp
│       │   │   ├── Instances.h
│       │   │   ├── RE_FCT
│       │   │   ├── RE_FCT.cpp
│       │   │   ├── RE_FCT_fixed
│       │   │   ├── RE_FCT_fixed.cpp
│       │   │   ├── makefile
│       │   │   └── predict.fea.fullnerpair.onlyne.txt
│       │   ├── data/
│       │   │   ├── SemEval.test.fea.sst
│       │   │   ├── SemEval.test.keys
│       │   │   ├── SemEval.train.fea.sst
│       │   │   ├── semeval2010_task8_scorer-v1.2.pl
│       │   │   └── vectors.nyt2011.cbow.semeval.filtered
│       │   ├── filter.py
│       │   ├── gen_fmt.py
│       │   ├── gen_sen.py
│       │   ├── predict.txt
│       │   ├── process.py
│       │   ├── run.sh
│       │   ├── test.fmt
│       │   ├── test.fmt.tmp
│       │   ├── test.id
│       │   ├── test.sen
│       │   ├── train.fmt
│       │   ├── train.id
│       │   ├── train.sen
│       │   ├── vec.emb
│       │   ├── word2vec
│       │   └── word2vec.cpp
│       ├── baselines/
│       │   ├── hypenet/
│       │   │   ├── README.md
│       │   │   ├── data/
│       │   │   │   └── README.md
│       │   │   ├── evaluation.py
│       │   │   ├── helper.py
│       │   │   ├── lemmatize.py
│       │   │   ├── model.h5
│       │   │   ├── plot.py
│       │   │   ├── postprocess.py
│       │   │   ├── preprocess.py
│       │   │   ├── sdp.py
│       │   │   ├── sentence_normalize.py
│       │   │   ├── sentence_tokens.py
│       │   │   ├── shortest_dep.py
│       │   │   ├── split_baseline_data.py
│       │   │   └── test_corenlp.py
│       │   ├── sdp-lstm/
│       │   │   ├── README.md
│       │   │   ├── dependency/
│       │   │   │   ├── analyze.py
│       │   │   │   ├── data_utils.py
│       │   │   │   ├── emb_utils.py
│       │   │   │   ├── eval.py
│       │   │   │   ├── scorer.py
│       │   │   │   ├── sprnn_model.py
│       │   │   │   ├── train.py
│       │   │   │   ├── tree.py
│       │   │   │   └── utils.py
│       │   │   └── dependency-kbp/
│       │   │       ├── __init__.py
│       │   │       ├── analyze.py
│       │   │       ├── data_utils.py
│       │   │       ├── emb_utils.py
│       │   │       ├── eval.py
│       │   │       ├── scorer.py
│       │   │       ├── sprnn_model.py
│       │   │       ├── train-cv.py
│       │   │       └── utils.py
│       │   └── sentence-level-models/
│       │       ├── README.md
│       │       ├── cotype2json.py
│       │       ├── model.py
│       │       ├── models/
│       │       │   ├── bgru.py
│       │       │   ├── cnn.py
│       │       │   ├── lstm.py
│       │       │   ├── pcnn.py
│       │       │   └── position_aware_lstm.py
│       │       ├── tacred2json.py
│       │       ├── train-cv.py
│       │       ├── train.py
│       │       ├── utils.py
│       │       └── vocab.py
│       ├── dw/
│       │   ├── deepwalk-bipa.cpp
│       │   ├── deepwalk-hete-em.cpp
│       │   ├── deepwalk-hete.cpp
│       │   └── makefile
│       ├── multir/
│       │   ├── .classpath
│       │   ├── .project
│       │   ├── .settings/
│       │   │   └── org.eclipse.jdt.core.prefs
│       │   ├── LICENSE.txt
│       │   ├── README.txt
│       │   ├── annotations/
│       │   │   ├── sentential-byrelation.txt
│       │   │   └── sentential.txt
│       │   ├── lib/
│       │   │   └── protobuf-java-2.3.0.jar
│       │   ├── multiR.jar
│       │   ├── multiR_new.jar
│       │   ├── multir.jar
│       │   ├── run.sh
│       │   └── src/
│       │       ├── cc/
│       │       │   └── factorie/
│       │       │       └── protobuf/
│       │       │           └── DocumentProtos.java
│       │       └── edu/
│       │           └── uw/
│       │               └── cs/
│       │                   └── multir/
│       │                       ├── learning/
│       │                       │   ├── algorithm/
│       │                       │   │   ├── AveragedPerceptron.java
│       │                       │   │   ├── ConditionalInference.java
│       │                       │   │   ├── FullInference.java
│       │                       │   │   ├── Model.java
│       │                       │   │   ├── Parameters.java
│       │                       │   │   ├── Parse.java
│       │                       │   │   ├── Scorer.java
│       │                       │   │   └── Viterbi.java
│       │                       │   └── data/
│       │                       │       ├── Dataset.java
│       │                       │       ├── MILDocument.java
│       │                       │       └── MemoryDataset.java
│       │                       ├── main/
│       │                       │   ├── AggregatePrecisionRecallCurve.java
│       │                       │   ├── Main.java
│       │                       │   ├── Preprocess.java
│       │                       │   ├── ResultWriter.java
│       │                       │   ├── SententialPrecisionRecallByRelation.java
│       │                       │   ├── SententialPrecisionRecallCurve.java
│       │                       │   ├── Test.java
│       │                       │   └── Train.java
│       │                       ├── preprocess/
│       │                       │   ├── ConvertProtobufToMILDocument.java
│       │                       │   └── Mappings.java
│       │                       └── util/
│       │                           ├── DenseVector.java
│       │                           └── SparseBinaryVector.java
│       ├── pte/
│       │   ├── line.cpp
│       │   ├── linelib.cpp
│       │   ├── linelib.h
│       │   ├── makefile
│       │   ├── pte-hete.cpp
│       │   ├── ransampl.c
│       │   └── ransampl.h
│       ├── retype/
│       │   ├── hplelib.cpp
│       │   ├── hplelib.h
│       │   ├── makefile
│       │   ├── ransampl.c
│       │   ├── ransampl.h
│       │   ├── retype
│       │   ├── retype-rm
│       │   ├── retype-rm.cpp
│       │   └── retype.cpp
│       ├── seq-kernel/
│       │   ├── gen_data.py
│       │   ├── gen_fmt.py
│       │   ├── gen_lb.py
│       │   ├── gen_sen.py
│       │   ├── libsvm/
│       │   │   ├── COPYRIGHT
│       │   │   ├── FAQ.html
│       │   │   ├── Makefile
│       │   │   ├── Makefile.win
│       │   │   ├── README
│       │   │   ├── heart_scale
│       │   │   ├── java/
│       │   │   │   ├── Makefile
│       │   │   │   ├── libsvm/
│       │   │   │   │   ├── svm.java
│       │   │   │   │   ├── svm.m4
│       │   │   │   │   ├── svm_model.java
│       │   │   │   │   ├── svm_node.java
│       │   │   │   │   ├── svm_parameter.java
│       │   │   │   │   ├── svm_print_interface.java
│       │   │   │   │   └── svm_problem.java
│       │   │   │   ├── libsvm.jar
│       │   │   │   ├── svm_predict.java
│       │   │   │   ├── svm_scale.java
│       │   │   │   ├── svm_toy.java
│       │   │   │   ├── svm_train.java
│       │   │   │   └── test_applet.html
│       │   │   ├── matlab/
│       │   │   │   ├── Makefile
│       │   │   │   ├── README
│       │   │   │   ├── libsvmread.c
│       │   │   │   ├── libsvmwrite.c
│       │   │   │   ├── make.m
│       │   │   │   ├── svm_model_matlab.c
│       │   │   │   ├── svm_model_matlab.h
│       │   │   │   ├── svmpredict.c
│       │   │   │   └── svmtrain.c
│       │   │   ├── model.txt
│       │   │   ├── predict.txt
│       │   │   ├── python/
│       │   │   │   ├── Makefile
│       │   │   │   ├── README
│       │   │   │   ├── svm.py
│       │   │   │   └── svmutil.py
│       │   │   ├── svm-predict
│       │   │   ├── svm-predict.c
│       │   │   ├── svm-scale
│       │   │   ├── svm-scale.c
│       │   │   ├── svm-toy/
│       │   │   │   ├── gtk/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   ├── callbacks.cpp
│       │   │   │   │   ├── callbacks.h
│       │   │   │   │   ├── interface.c
│       │   │   │   │   ├── interface.h
│       │   │   │   │   ├── main.c
│       │   │   │   │   └── svm-toy.glade
│       │   │   │   ├── qt/
│       │   │   │   │   ├── Makefile
│       │   │   │   │   └── svm-toy.cpp
│       │   │   │   └── windows/
│       │   │   │       └── svm-toy.cpp
│       │   │   ├── svm-train
│       │   │   ├── svm-train.c
│       │   │   ├── svm.cpp
│       │   │   ├── svm.def
│       │   │   ├── svm.h
│       │   │   ├── tools/
│       │   │   │   ├── README
│       │   │   │   ├── checkdata.py
│       │   │   │   ├── easy.py
│       │   │   │   ├── grid.py
│       │   │   │   └── subset.py
│       │   │   └── windows/
│       │   │       ├── libsvmread.mexw64
│       │   │       ├── libsvmwrite.mexw64
│       │   │       ├── svmpredict.mexw64
│       │   │       └── svmtrain.mexw64
│       │   ├── process.py
│       │   ├── run.sh
│       │   ├── ssk_core/
│       │   │   ├── base.txt
│       │   │   ├── hs_err_pid12502.log
│       │   │   ├── hs_err_pid16669.log
│       │   │   ├── hs_err_pid18953.log
│       │   │   ├── infer.txt
│       │   │   ├── libsvm/
│       │   │   │   ├── CustomKernel.java
│       │   │   │   ├── Makefile
│       │   │   │   ├── libsvm.jar
│       │   │   │   ├── svm.java
│       │   │   │   ├── svm_model.java
│       │   │   │   ├── svm_node.java
│       │   │   │   ├── svm_parameter.java
│       │   │   │   └── svm_problem.java
│       │   │   ├── readme.txt
│       │   │   └── ssk/
│       │   │       ├── FeatureDictionary.java
│       │   │       ├── InstanceExample.java
│       │   │       ├── Makefile
│       │   │       ├── SubsequenceKernel.java
│       │   │       ├── intex_node.java
│       │   │       └── ssk.jar
│       │   ├── test.lb
│       │   ├── test.sen
│       │   ├── test.txt
│       │   ├── train.shuf
│       │   ├── train.smp
│       │   ├── train.smp.lb
│       │   └── train.smp.sen
│       └── warp/
│           ├── makefile
│           └── warp.cpp
├── data/
│   └── source/
│       ├── BioInfer/
│       │   └── README.md
│       ├── KBP/
│       │   └── README.md
│       └── NYT/
│           └── README.md
└── run.sh

Download .txt

SYMBOL INDEX (1906 symbols across 163 files)

FILE: code/Classifier/CLPL.py
  class CLPL (line 8) | class CLPL:
    method __init__ (line 10) | def __init__(self, feature_size, label_size, type_hierarchy, lambda_re...
    method fit (line 18) | def fit(self, train_x, train_y):
    method predict (line 60) | def predict(self, x):
  class Pegasos (line 102) | class Pegasos:
    method __init__ (line 104) | def __init__(self, feature_size, lambda_reg=0.1, max_iter=500, batch_s...
    method fit (line 111) | def fit(self, train_x, val_x, train_y):
    method predict (line 138) | def predict(self, x, val):
    method L2_regularize (line 145) | def L2_regularize(self, eta_t):
    method predict_prob (line 152) | def predict_prob(self, x, val):

FILE: code/Classifier/Classifier.py
  function classify (line 15) | def classify(classifier, feature_size, label_size, train_x, train_y, lea...
  function predict_em (line 41) | def predict_em(model, test_x, type_hierarchy, _threshold):
  function predict (line 65) | def predict(model, test_x, type_hierarchy, _threshold):
  function casestudy (line 84) | def casestudy(filename, output, mention_mapping, label_mapping, clean_me...
  function putback (line 93) | def putback(sent_json, mention_mapping, label_mapping, clean_mentions):
  function compare (line 124) | def compare(item1, item2):

FILE: code/Classifier/Classifier_em.py
  function classify (line 15) | def classify(classifier, feature_size, label_size, train_x, train_y, lea...
  function predict_em (line 37) | def predict_em(model, test_x, type_hierarchy, _threshold):

FILE: code/Classifier/DataIO.py
  function load_as_list (line 4) | def load_as_list(filename):
  function save_from_tuples (line 31) | def save_from_tuples(filename, indexes, data):
  function save_from_list (line 47) | def save_from_list(filename, indexes, data):
  function load_as_dict (line 62) | def load_as_dict(filename):
  function load_map (line 86) | def load_map(filename, mode):
  function load_mention_type (line 97) | def load_mention_type(filename):
  function file_len (line 105) | def file_len(filename):

FILE: code/Classifier/HierarchySVM.py
  class HierarchySVM (line 6) | class HierarchySVM:
    method __init__ (line 8) | def __init__(self, feature_size, type_hierarchy, current_types, level=...
    method fit_em (line 32) | def fit_em(self, train_x, train_y):
    method predict_em (line 72) | def predict_em(self, x):

FILE: code/Classifier/Logistic.py
  class Logistic (line 6) | class Logistic:
    method __init__ (line 7) | def __init__(self, feature_size, label_size, threshold):
    method fit (line 15) | def fit(self, train_x, train_y):
    method fit_em (line 36) | def fit_em(self, train_x, train_y):
    method predict (line 40) | def predict(self, train_x):
    method predict_em (line 53) | def predict_em(self, train_x):

FILE: code/Classifier/MulticlassSVM.py
  class MulticlassSVM (line 6) | class MulticlassSVM:
    method __init__ (line 8) | def __init__(self, feature_size, label_size, lambda_reg=0.1, max_iter=...
    method fit (line 28) | def fit(self, train_x, train_y):
    method predict (line 62) | def predict(self, x):

FILE: code/Classifier/PLSVM.py
  class PLSVM (line 9) | class PLSVM:
    method __init__ (line 11) | def __init__(self, feature_size, label_size, type_hierarchy, lambda_re...
    method fit (line 24) | def fit(self, train_x, train_y):
    method predict (line 52) | def predict(self, x):
    method find_max (line 91) | def find_max(self, Y, x):
    method update_weight (line 102) | def update_weight(self, dW, eta_t, m):
    method inner_prod (line 115) | def inner_prod(weight, x):
    method kernel (line 122) | def kernel(x1, x2):

FILE: code/Classifier/Perceptron.py
  class MultilabelPerceptron (line 6) | class MultilabelPerceptron:
    method __init__ (line 8) | def __init__(self, feature_size, label_size, weights=None, learning_ra...
    method fit (line 20) | def fit(self, train_x, train_y):
    method fit_em (line 42) | def fit_em(self, train_x, train_y):
    method predict (line 61) | def predict(self, x):
    method predict_em (line 76) | def predict_em(self, x):

FILE: code/Classifier/SVM.py
  class SVM (line 9) | class SVM:
    method __init__ (line 13) | def __init__(self, feature_size, lambda_reg=0.1, max_iter=50):
    method fit (line 20) | def fit(self, train_x, train_y):
    method predict (line 49) | def predict(self, x):
    method L2_regularize (line 56) | def L2_regularize(self, eta_t):
    method predict_prob (line 63) | def predict_prob(self, x):
    method kernel (line 71) | def kernel(x1, x2):

FILE: code/Classifier/TypeHierarchy.py
  class TypeSet (line 5) | class TypeSet:
    method __init__ (line 6) | def __init__(self, file_name, number_of_types):
    method get_type_path (line 20) | def get_type_path(self, label):
    method get_subtypes (line 31) | def get_subtypes(self, label):
  class TypeHierarchy (line 37) | class TypeHierarchy:
    method __init__ (line 38) | def __init__(self, file_name, number_of_types):
    method get_type_path (line 50) | def get_type_path(self, label):
    method get_subtypes (line 61) | def get_subtypes(self, label):

FILE: code/Classifier/liblinear.py
  function print_null (line 42) | def print_null(s):
  function genFields (line 45) | def genFields(names, types):
  function fillprototype (line 48) | def fillprototype(f, restype, argtypes):
  class feature_node (line 52) | class feature_node(Structure):
    method __str__ (line 57) | def __str__(self):
  function gen_feature_nodearray (line 60) | def gen_feature_nodearray(xi, feature_max=None, issparse=True):
  class problem (line 87) | class problem(Structure):
    method __init__ (line 92) | def __init__(self, y, x, bias = -1):
    method set_bias (line 114) | def set_bias(self, bias):
  class parameter (line 129) | class parameter(Structure):
    method __init__ (line 134) | def __init__(self, options = None):
    method __str__ (line 139) | def __str__(self):
    method set_to_default_values (line 149) | def set_to_default_values(self):
    method parse_options (line 168) | def parse_options(self, options):
  class model (line 258) | class model(Structure):
    method __init__ (line 263) | def __init__(self):
    method __del__ (line 266) | def __del__(self):
    method get_nr_feature (line 271) | def get_nr_feature(self):
    method get_nr_class (line 274) | def get_nr_class(self):
    method get_labels (line 277) | def get_labels(self):
    method get_decfun_coef (line 283) | def get_decfun_coef(self, feat_idx, label_idx=0):
    method get_decfun_bias (line 286) | def get_decfun_bias(self, label_idx=0):
    method get_decfun (line 289) | def get_decfun(self, label_idx=0):
    method is_probability_model (line 294) | def is_probability_model(self):
    method is_regression_model (line 297) | def is_regression_model(self):
  function toPyModel (line 300) | def toPyModel(model_ptr):

FILE: code/Classifier/liblinearutil.py
  function svm_read_problem (line 13) | def svm_read_problem(data_file_name):
  function load_model (line 35) | def load_model(model_file_name):
  function save_model (line 48) | def save_model(model_file_name, model):
  function evaluations (line 56) | def evaluations(ty, pv):
  function train (line 85) | def train(arg1, arg2=None, arg3=None):
  function predict (line 186) | def predict(y, x, m, options=""):

FILE: code/DataProcessor/Feature/abstract_feature.py
  class AbstractFeature (line 4) | class AbstractFeature(object):
    method apply (line 5) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/brown_feature.py
  class BrownFeature (line 7) | class BrownFeature(AbstractFeature):
    method __init__ (line 9) | def __init__(self, brown_file):
    method apply (line 17) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/dependency_feature.py
  class DependencyFeature (line 7) | class DependencyFeature(AbstractFeature):
    method apply (line 13) | def apply(self, sentence, mention, features):
    method accept_pos (line 46) | def accept_pos(self, pos):
    method accept_dep (line 49) | def accept_dep(self, dep):

FILE: code/DataProcessor/Feature/em_brown_feature.py
  class EMBrownFeature (line 7) | class EMBrownFeature(AbstractFeature):
    method __init__ (line 9) | def __init__(self, brown_file):
    method apply (line 17) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/em_dependency_feature.py
  class EMDependencyFeature (line 7) | class EMDependencyFeature(AbstractFeature):
    method apply (line 13) | def apply(self, sentence, mention, features):
    method accept_pos (line 46) | def accept_pos(self, pos):
    method accept_dep (line 49) | def accept_dep(self, dep):

FILE: code/DataProcessor/Feature/em_other_feature.py
  class EMPosFeature (line 7) | class EMPosFeature(AbstractFeature):
    method apply (line 9) | def apply(self, sentence, mention, features):
  class EMLengthFeature (line 14) | class EMLengthFeature(AbstractFeature):
    method apply (line 16) | def apply(self, sentence, mention, features):
  class EMWordShapeFeature (line 24) | class EMWordShapeFeature(AbstractFeature):
    method get_word_shape (line 25) | def get_word_shape(self, token):
    method apply (line 32) | def apply(self, sentence, mention, features):
  class EMCharacterFeature (line 37) | class EMCharacterFeature(AbstractFeature):
    method apply (line 38) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/em_token_feature.py
  function get_lemma (line 12) | def get_lemma(word, pos):
  class EMHeadFeature (line 28) | class EMHeadFeature(AbstractFeature):
    method get_head (line 31) | def get_head(sentence, mention):
    method apply (line 41) | def apply(self, sentence, mention, features):
  class EMTokenFeature (line 48) | class EMTokenFeature(AbstractFeature):
    method apply (line 50) | def apply(self, sentence, mention, features):
  class EMContextFeature (line 55) | class EMContextFeature(AbstractFeature):
    method __init__ (line 57) | def __init__(self, window_size=1):
    method apply (line 60) | def apply(self, sentence, mention, features):
  class EMContextGramFeature (line 69) | class EMContextGramFeature(AbstractFeature):
    method __init__ (line 71) | def __init__(self, window_size=1):
    method apply (line 74) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/other_feature.py
  class PosFeature (line 7) | class PosFeature(AbstractFeature):
    method apply (line 9) | def apply(self, sentence, mention, features):
  class DistanceFeature (line 19) | class DistanceFeature(AbstractFeature):
    method apply (line 21) | def apply(self, sentence, mention, features):
  class EntityMentionOrderFeature (line 27) | class EntityMentionOrderFeature(AbstractFeature):
    method apply (line 29) | def apply(self, sentence, mention, features):
  class NumOfEMBetweenFeature (line 35) | class NumOfEMBetweenFeature(AbstractFeature):
    method apply (line 37) | def apply(self, sentence, mention, features):
  class EMTypeFeature (line 41) | class EMTypeFeature(AbstractFeature):
    method apply (line 42) | def apply(self, sentence, mention, features):
  class SpecialPatternFeature (line 51) | class SpecialPatternFeature(AbstractFeature):
    method apply (line 53) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/Feature/token_feature.py
  function get_lemma (line 12) | def get_lemma(word, pos):
  class HeadFeature (line 28) | class HeadFeature(AbstractFeature):
    method get_head (line 32) | def get_head(sentence, start, end):
    method apply (line 42) | def apply(self, sentence, mention, features):
  class EntityMentionTokenFeature (line 54) | class EntityMentionTokenFeature(AbstractFeature):
    method apply (line 56) | def apply(self, sentence, mention, features):
  class BetweenEntityMentionTokenFeature (line 62) | class BetweenEntityMentionTokenFeature(AbstractFeature):
    method apply (line 64) | def apply(self, sentence, mention, features):
  class ContextFeature (line 77) | class ContextFeature(AbstractFeature):
    method __init__ (line 79) | def __init__(self, window_size=1):
    method apply (line 82) | def apply(self, sentence, mention, features):
  class ContextGramFeature (line 98) | class ContextGramFeature(AbstractFeature):
    method __init__ (line 100) | def __init__(self, window_size=1):
    method apply (line 103) | def apply(self, sentence, mention, features):

FILE: code/DataProcessor/feature_generation.py
  function get_number (line 12) | def get_number(filename):

FILE: code/DataProcessor/liblinear_processor.py
  function load_as_list (line 6) | def load_as_list(filename):
  function write_train_as_liblinear (line 33) | def write_train_as_liblinear(train_x, train_y, filename):
  function write_test_as_liblinear (line 43) | def write_test_as_liblinear(test_x, filename):

FILE: code/DataProcessor/mention.py
  class RelationMention (line 4) | class RelationMention(object):
    method __init__ (line 20) | def __init__(self, em1Start, em1End, em2Start, em2End, numOfEMBetween,...
    method __str__ (line 28) | def __str__(self):
  class EntityMention (line 34) | class EntityMention(object):
    method __init__ (line 50) | def __init__(self, start, end, labels):
    method __str__ (line 55) | def __str__(self):
  class Sentence (line 62) | class Sentence(object):
    method __init__ (line 74) | def __init__(self, articleId, sentId, tokens):
    method __str__ (line 82) | def __str__(self):
    method add_relationMention (line 91) | def add_relationMention(self, relationMention):
    method add_entityMention (line 95) | def add_entityMention(self, entityMention):
    method size (line 99) | def size(self):

FILE: code/DataProcessor/mention_reader.py
  class MentionReader (line 7) | class MentionReader:
    method __init__ (line 19) | def __init__(self, mention_file):
    method close (line 24) | def close(self):
    method has_next (line 27) | def has_next(self):
    method next (line 34) | def next(self):
    method _decode (line 43) | def _decode(mention_json):

FILE: code/DataProcessor/ner_feature.py
  class NERFeature (line 9) | class NERFeature(object):
    method __init__ (line 11) | def __init__(self, is_train, brown_file, requireEmType, isEntityMentio...
    method extract (line 64) | def extract(self, sentence, mention):
  function pipeline (line 96) | def pipeline(json_file, brown_file, outdir, requireEmType, isEntityMenti...
  function pipeline_test (line 153) | def pipeline_test(json_file, brown_file, featurefile, labelfile, outdir,...
  function load_map (line 198) | def load_map(input):
  function write_map (line 208) | def write_map(mapping, output):
  function filter (line 214) | def filter(featurefile, trainfile, featureout,trainout):
  function write_map2 (line 250) | def write_map2(mapping, output):

FILE: code/DataProcessor/nlp_parse.py
  class NLPParser (line 14) | class NLPParser(object):
    method __init__ (line 22) | def __init__(self):
    method parse (line 26) | def parse(self, sent):
  function parse (line 38) | def parse(sentences, g, lock, procNum, isTrain, parsePOSBeforehand=False):
  function process (line 62) | def process(buffered, parser, g, lock, isTrain, rmCount, discardRmCount,...
  function find_index (line 162) | def find_index(sen_split, word_split):
  function createPosAndTokensMap (line 180) | def createPosAndTokensMap(sentences, parser):

FILE: code/DataProcessor/pruning_heuristics.py
  class PruneStrategy (line 10) | class PruneStrategy:
    method __init__ (line 11) | def __init__(self, strategy):
    method no_prune (line 15) | def no_prune(self, fileid, is_ground, labels):
  function prune (line 19) | def prune(indir, outdir, strategy, feature_number, type_number, neg_labe...

FILE: code/DataProcessor/statistic.py
  function distribution (line 10) | def distribution(indir):
  function supertype (line 30) | def supertype(indir):

FILE: code/Evaluation/emb_prediction.py
  function sim_func (line 16) | def sim_func(v1, v2, _MODE):
  class Embedding (line 29) | class Embedding:
    method __init__ (line 30) | def __init__(self, file_name):
    method get_embedding (line 50) | def get_embedding(self, index):
  class Network (line 53) | class Network:
    method __init__ (line 54) | def __init__(self, file_name):
    method get_neighbors (line 66) | def get_neighbors(self, idx):
  class Predicter_useFeatureEmb (line 71) | class Predicter_useFeatureEmb:
    method __init__ (line 72) | def __init__(self, embs_feature, embs_type, network_mention_feature, t...
    method get_mention_embedding (line 80) | def get_mention_embedding(self, mention_id):
    method predict_types_for_rm_maximum (line 92) | def predict_types_for_rm_maximum(self, mention_id, candidate):
  function predict (line 108) | def predict(indir, outdir, _method, _sim_func, _threshold, output, none_...
  function min_max_normalization (line 152) | def min_max_normalization(scores):
  function load_mentionids (line 166) | def load_mentionids(filename):
  function load_candidates (line 178) | def load_candidates(filename, indexes):
  function load_all_candidates (line 192) | def load_all_candidates(filename, indexes):

FILE: code/Evaluation/evaluation.py
  function find_none_index (line 5) | def find_none_index(file_name):
  function load_labels (line 14) | def load_labels(file_name):
  function load_raw_labels (line 27) | def load_raw_labels(file_name, ground_truth):
  function load_label_score (line 37) | def load_label_score(file_name):
  function evaluate_rm (line 56) | def evaluate_rm(prediction, ground_truth):
  function evaluate_rm_neg (line 82) | def evaluate_rm_neg(prediction, ground_truth, none_label_index):

FILE: code/Evaluation/tune_threshold.py
  function min_max_nomalization (line 8) | def min_max_nomalization(prediction):
  function evaluate_threshold (line 22) | def evaluate_threshold(_threshold, ground_truth):
  function evaluate_threshold_neg (line 32) | def evaluate_threshold_neg(_threshold, ground_truth, none_label_index):
  function tune_threshold (line 42) | def tune_threshold(_threshold_list, ground_truth, none_label_index):

FILE: code/Model/FCM/code/BaseComponentModel.h
  type std (line 22) | typedef std::tr1::unordered_map<string, int> feat2int;
  type std (line 23) | typedef std::tr1::unordered_map<string, string> word2clus;
  function class (line 25) | class BaseComponentModel

FILE: code/Model/FCM/code/Commons.h
  type word_info (line 21) | struct word_info {

FILE: code/Model/FCM/code/EmbeddingModel.cpp
  function pairCompare (line 25) | bool pairCompare(pair<int, string> a, pair<int, string> b) {

FILE: code/Model/FCM/code/EmbeddingModel.h
  type std (line 31) | typedef std::tr1::unordered_map<string, int> word2int;
  type real (line 32) | typedef float real;
  function class (line 78) | class EmbeddingModel

FILE: code/Model/FCM/code/FctCoarseModel.h
  function class (line 14) | class FctCoarseModel: public BaseComponentModel

FILE: code/Model/FCM/code/FctConvolutionModel.h
  function class (line 14) | class FctConvolutionModel: public BaseComponentModel

FILE: code/Model/FCM/code/FctDeepModel.h
  function class (line 14) | class FctDeepModel: public BaseComponentModel

FILE: code/Model/FCM/code/FeatureModel.h
  function class (line 21) | class FeatureModel

FILE: code/Model/FCM/code/FullFctModel.cpp
  function string (line 1325) | string FullFctModel::ProcSenseTag(string input_type) {
  function string (line 1331) | string FullFctModel::ProcNeTag(string input_type) {
  function string (line 1342) | string FullFctModel::ToLower(string& s) {

FILE: code/Model/FCM/code/FullFctModel.h
  function class (line 18) | class FeaParams {
  function class (line 58) | class FullFctModel

FILE: code/Model/FCM/code/Instances.h
  function class (line 22) | class BaseInstance{
  function class (line 78) | class CoarseFctInstance {
  function class (line 128) | class RealFctPathInstance {
  function class (line 164) | class FctConvolutionInstance {
  function class (line 186) | class FeatureInstance {

FILE: code/Model/FCM/code/RE_FCT.cpp
  function ArgPos (line 33) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 45) | int main(int argc, char **argv) {

FILE: code/Model/FCM/code/RE_FCT_fixed.cpp
  function ArgPos (line 33) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 45) | int main(int argc, char **argv) {

FILE: code/Model/FCM/word2vec.cpp
  type vocab_word (line 31) | struct vocab_word {
  type vocab_word (line 39) | struct vocab_word
  function InitUnigramTable (line 52) | void InitUnigramTable() {
  function ReadWord (line 71) | void ReadWord(char *word, FILE *fin) {
  function GetWordHash (line 95) | int GetWordHash(char *word) {
  function SearchVocab (line 103) | int SearchVocab(char *word) {
  function ReadWordIndex (line 114) | int ReadWordIndex(FILE *fin) {
  function AddWordToVocab (line 122) | int AddWordToVocab(char *word) {
  function VocabCompare (line 141) | int VocabCompare(const void *a, const void *b) {
  function SortVocab (line 146) | void SortVocab() {
  function ReduceVocab (line 177) | void ReduceVocab() {
  function CreateBinaryTree (line 200) | void CreateBinaryTree() {
  function LearnVocabFromTrainFile (line 269) | void LearnVocabFromTrainFile() {
  function SaveVocab (line 306) | void SaveVocab() {
  function ReadVocab (line 313) | void ReadVocab() {
  function InitNet (line 346) | void InitNet() {
  function TrainModel (line 589) | void TrainModel() {
  function ArgPos (line 669) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 681) | int main(int argc, char **argv) {

FILE: code/Model/baselines/hypenet/evaluation.py
  function find_none_index (line 5) | def find_none_index(file_name):
  function load_labels (line 15) | def load_labels(file_name):
  function load_raw_labels (line 29) | def load_raw_labels(file_name, ground_truth):
  function load_label_score (line 40) | def load_label_score(file_name):
  function evaluate_em (line 59) | def evaluate_em(prediction, ground_truth):
  function evaluate_rm (line 100) | def evaluate_rm(prediction, ground_truth):
  function evaluate_rm_gold (line 126) | def evaluate_rm_gold(prediction, ground_truth):
  function evaluate_rm_neg (line 152) | def evaluate_rm_neg(prediction, ground_truth, none_label_index):
  function min_max_nomalization (line 184) | def min_max_nomalization(prediction):
  function evaluate_threshold (line 199) | def evaluate_threshold(_threshold, ground_truth, prediction):
  function evaluate_threshold_neg (line 210) | def evaluate_threshold_neg(_threshold, ground_truth, none_label_index, p...
  function tune_threshold (line 221) | def tune_threshold(_threshold_list, ground_truth, none_label_index, pred...

FILE: code/Model/baselines/hypenet/helper.py
  function lst_2_dic (line 94) | def lst_2_dic(lst):
  function sequence_from_dic (line 121) | def sequence_from_dic(lst, dic):
  function train_val_test_split (line 130) | def train_val_test_split(X, id_file=None):
  function train_val_test_split_auto (line 142) | def train_val_test_split_auto(X, y):
  function get_none_id (line 148) | def get_none_id(type_filename):
  function get_class_num (line 156) | def get_class_num(type_filename):
  function evaluate_rm_neg (line 166) | def evaluate_rm_neg(prediction, ground_truth, none_label_index):

FILE: code/Model/baselines/hypenet/lemmatize.py
  function get_stanford_annotations (line 10) | def get_stanford_annotations(text, port=9000,

FILE: code/Model/baselines/hypenet/preprocess.py
  function is_overlap (line 4) | def is_overlap(a, b):
  function process_file (line 11) | def process_file(in_file, out_file, rel_types, training=True):
  function process (line 39) | def process(fin1, fin2, fout):

FILE: code/Model/baselines/hypenet/sentence_normalize.py
  function get_stanford_annotations (line 10) | def get_stanford_annotations(text, port=9000,

FILE: code/Model/baselines/hypenet/sentence_tokens.py
  function get_stanford_annotations (line 8) | def get_stanford_annotations(text, port=9000,

FILE: code/Model/baselines/hypenet/shortest_dep.py
  function get_vocab_dic_split (line 11) | def get_vocab_dic_split(filename):
  function get_vocab_dic (line 21) | def get_vocab_dic(filename):
  function get_stanford_annotations (line 30) | def get_stanford_annotations(text, port=9000,

FILE: code/Model/baselines/hypenet/test_corenlp.py
  function get_stanford_annotations (line 8) | def get_stanford_annotations(text, annotators='tokenize,ssplit,pos,lemma...

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/analyze.py
  function get_unk_count_in_dataset (line 18) | def get_unk_count_in_dataset(loader):
  function analyze_unk (line 34) | def analyze_unk():
  function main (line 54) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/data_utils.py
  function load_datasets (line 167) | def load_datasets(fname, labelfname, maxlen=MAX_SEQ_LEN, lowercase=True):
  function build_vocab (line 204) | def build_vocab(datasets, use_count):
  function build_vocab_for_field (line 222) | def build_vocab_for_field(datasets, fieldname):
  function convert_words_to_ids (line 239) | def convert_words_to_ids(datasets, word2id):
  function convert_fields_to_ids (line 250) | def convert_fields_to_ids(datasets, field2map):
  function preprocess (line 267) | def preprocess():
  function dump_to_file (line 318) | def dump_to_file(filename, obj):
  function load_from_dump (line 324) | def load_from_dump(filename):
  class DataLoader (line 330) | class DataLoader():
    method __init__ (line 331) | def __init__(self, dump_name, batch_size, pad_len, shuffle=True, subsa...
    method next_batch (line 346) | def next_batch(self):
    method get_residual (line 366) | def get_residual(self):
    method reset_pointer (line 380) | def reset_pointer(self):
    method corrupt_sentence (line 383) | def corrupt_sentence(self, tokens):
    method write_keys (line 392) | def write_keys(self, key_file, id2label=None, include_residual=False):
  function main (line 407) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/emb_utils.py
  function _load_glove_vec (line 13) | def _load_glove_vec(fname, vocab, dim):
  function _load_bin_vec (line 28) | def _load_bin_vec(fname, vocab):
  function _add_random_vec (line 52) | def _add_random_vec(word_vecs, vocab, dim=300, scale=1.0):
  function prepare_pretrained_embedding (line 61) | def prepare_pretrained_embedding(fname, word2id, dim):
  function main (line 74) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/eval.py
  function evaluate (line 36) | def evaluate():
  function _write_prediction_file (line 107) | def _write_prediction_file(preds, confs, all_probs, id2label, pred_file,...
  function _get_model (line 120) | def _get_model(is_train):
  function main (line 128) | def main(_):

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/scorer.py
  function score (line 6) | def score(key_file, pred_files, f_measure=1, verbose=False):

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/sprnn_model.py
  function _variable_on_cpu (line 34) | def _variable_on_cpu(name, shape, initializer):
  function _get_lstm_graph_info (line 39) | def _get_lstm_graph_info():
  function _create_embedding_layer (line 54) | def _create_embedding_layer(name, vocab_size, dim, inputs, is_train):
  function _get_rnn_cell (line 61) | def _get_rnn_cell(hidden_size, num_layers, is_train, dropout):
  function max_over_time (line 71) | def max_over_time(inputs, index, seq_lens):
  function attention_over_time (line 84) | def attention_over_time(inputs, hidden, params, index, seq_lens):
  function _create_attention_layer (line 106) | def _create_attention_layer(rnn_outputs, rnn_final_hidden, seq_lens, dim...
  function _create_rnn_along_subpath (line 121) | def _create_rnn_along_subpath(subpath_sent_batch, seq_lens, dim, batch_s...
  class SPRNNModel (line 155) | class SPRNNModel(object):
    method __init__ (line 161) | def __init__(self, vocab_size, is_train=True):
    method build_graph (line 166) | def build_graph(self):
    method assign_lr (line 247) | def assign_lr(self, session, lr_value):
    method assign_embedding (line 250) | def assign_embedding(self, session, pretrained):

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/train-cv.py
  function train (line 51) | def train():
  function _get_training_info (line 259) | def _get_training_info():
  function _get_model (line 272) | def _get_model(is_train, vocab_size):
  function _summary_for_scalar (line 281) | def _summary_for_scalar(name, value):
  function _write_prediction_file (line 285) | def _write_prediction_file(preds, confs, id2label, pred_file):
  function main (line 293) | def main(argv=None):

FILE: code/Model/baselines/sdp-lstm/dependency-kbp/utils.py
  function _get_feed_dict_for_others (line 3) | def _get_feed_dict_for_others(model, x_batch, y_batch, x_lens, use_pos=T...
  function _get_feed_dict_for_sprnn (line 13) | def _get_feed_dict_for_sprnn(model, x_batch, y_batch, x_lens, use_pos=Tr...

FILE: code/Model/baselines/sdp-lstm/dependency/analyze.py
  function get_unk_count_in_dataset (line 18) | def get_unk_count_in_dataset(loader):
  function analyze_unk (line 34) | def analyze_unk():
  function main (line 54) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency/data_utils.py
  function load_datasets (line 165) | def load_datasets(fnames, lowercase=True):
  function convert_to_dependency_path (line 177) | def convert_to_dependency_path(dataset):
  function add_dep_path_to_dataset (line 207) | def add_dep_path_to_dataset(node_indices, ancestor_index, original_row, ...
  function build_vocab (line 220) | def build_vocab(datasets, use_count):
  function build_vocab_for_field (line 238) | def build_vocab_for_field(datasets, fieldname):
  function filter_seqlen (line 256) | def filter_seqlen(fnames, maxlen):
  function convert_words_to_ids (line 271) | def convert_words_to_ids(datasets, word2id):
  function convert_fields_to_ids (line 282) | def convert_fields_to_ids(datasets, field2map):
  function create_dependency_path_datasets (line 299) | def create_dependency_path_datasets():
  function preprocess (line 312) | def preprocess():
  function dump_to_file (line 367) | def dump_to_file(filename, obj):
  function load_from_dump (line 373) | def load_from_dump(filename):
  class DataLoader (line 379) | class DataLoader():
    method __init__ (line 380) | def __init__(self, dump_name, batch_size, pad_len, shuffle=True, subsa...
    method next_batch (line 395) | def next_batch(self):
    method get_residual (line 415) | def get_residual(self):
    method reset_pointer (line 429) | def reset_pointer(self):
    method corrupt_sentence (line 432) | def corrupt_sentence(self, tokens):
    method write_keys (line 441) | def write_keys(self, key_file, id2label=None, include_residual=False):
  function main (line 456) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency/emb_utils.py
  function _load_glove_vec (line 15) | def _load_glove_vec(fname, vocab, dim):
  function _load_bin_vec (line 30) | def _load_bin_vec(fname, vocab):
  function _add_random_vec (line 54) | def _add_random_vec(word_vecs, vocab, dim=300, scale=1.0):
  function prepare_pretrained_embedding (line 63) | def prepare_pretrained_embedding(fname, word2id, dim):
  function main (line 76) | def main():

FILE: code/Model/baselines/sdp-lstm/dependency/eval.py
  function evaluate (line 36) | def evaluate():
  function _write_prediction_file (line 107) | def _write_prediction_file(preds, confs, all_probs, id2label, pred_file,...
  function _get_model (line 120) | def _get_model(is_train):
  function main (line 128) | def main(_):

FILE: code/Model/baselines/sdp-lstm/dependency/scorer.py
  function score (line 6) | def score(key_file, pred_files, f_measure=1, verbose=False):

FILE: code/Model/baselines/sdp-lstm/dependency/sprnn_model.py
  function _variable_on_cpu (line 34) | def _variable_on_cpu(name, shape, initializer):
  function _get_lstm_graph_info (line 39) | def _get_lstm_graph_info():
  function _create_embedding_layer (line 54) | def _create_embedding_layer(name, vocab_size, dim, inputs, is_train):
  function _get_rnn_cell (line 61) | def _get_rnn_cell(hidden_size, num_layers, is_train, dropout):
  function max_over_time (line 71) | def max_over_time(inputs, index, seq_lens):
  function attention_over_time (line 84) | def attention_over_time(inputs, hidden, params, index, seq_lens):
  function _create_attention_layer (line 106) | def _create_attention_layer(rnn_outputs, rnn_final_hidden, seq_lens, dim...
  function _create_rnn_along_subpath (line 121) | def _create_rnn_along_subpath(subpath_sent_batch, seq_lens, dim, batch_s...
  class SPRNNModel (line 155) | class SPRNNModel(object):
    method __init__ (line 161) | def __init__(self, is_train=True):
    method build_graph (line 165) | def build_graph(self):
    method assign_lr (line 246) | def assign_lr(self, session, lr_value):
    method assign_embedding (line 249) | def assign_embedding(self, session, pretrained):

FILE: code/Model/baselines/sdp-lstm/dependency/train.py
  function train (line 54) | def train():
  function _get_training_info (line 293) | def _get_training_info():
  function _get_model (line 306) | def _get_model(is_train):
  function _summary_for_scalar (line 315) | def _summary_for_scalar(name, value):
  function _write_prediction_file (line 319) | def _write_prediction_file(preds, confs, id2label, pred_file):
  function main (line 327) | def main(argv=None):

FILE: code/Model/baselines/sdp-lstm/dependency/tree.py
  class Node (line 18) | class Node:  # a node in the tree
    method __init__ (line 19) | def __init__(self, idx):
  class Tree (line 27) | class Tree:
    method __init__ (line 28) | def __init__(self, conll_data):
    method parse_from_conll (line 53) | def parse_from_conll(self, conll_fields):
    method get_shortest_path_through_root (line 75) | def get_shortest_path_through_root(self):
    method get_shortest_path_through_ancestor (line 88) | def get_shortest_path_through_ancestor(self):
    method copy_fields_at_index (line 110) | def copy_fields_at_index(self, idx, target_fields):
    method __repr__ (line 117) | def __repr__(self):
  function assign_node_levels (line 126) | def assign_node_levels(node, level):
  function get_entity_head (line 132) | def get_entity_head(entity_idx_seq, idx2node):
  function get_path_to_root (line 153) | def get_path_to_root(node):
  function get_path_to_node (line 163) | def get_path_to_node(current_node, target_node_idx):
  function get_common_ancestor (line 178) | def get_common_ancestor(root_to_node1, root_to_node2):

FILE: code/Model/baselines/sdp-lstm/dependency/utils.py
  function _get_feed_dict_for_others (line 3) | def _get_feed_dict_for_others(model, x_batch, y_batch, x_lens, use_pos=T...
  function _get_feed_dict_for_sprnn (line 13) | def _get_feed_dict_for_sprnn(model, x_batch, y_batch, x_lens, use_pos=Tr...

FILE: code/Model/baselines/sentence-level-models/cotype2json.py
  class NLPParser (line 18) | class NLPParser(object):
    method __init__ (line 26) | def __init__(self):
    method get_ner (line 29) | def get_ner(self, tokens):
  function find_index (line 37) | def find_index(sen_split, word_split):
  function read (line 56) | def read(data, in_dir, out_dir):

FILE: code/Model/baselines/sentence-level-models/model.py
  class Model (line 17) | class Model(object):
    method __init__ (line 18) | def __init__(self, args, device, rel2id, word_emb=None):
    method update (line 44) | def update(self, batch):
    method predict (line 56) | def predict(self, batch):
    method eval (line 66) | def eval(self, dset, vocab=None, output_false_file=None):
    method save (line 109) | def save(self, filename, epoch):
    method load (line 122) | def load(self, filename):

FILE: code/Model/baselines/sentence-level-models/models/bgru.py
  class BGRU (line 10) | class BGRU(nn.Module):
    method __init__ (line 11) | def __init__(self, args, rel2id, word_emb=None):
    method forward (line 62) | def forward(self, inputs):

FILE: code/Model/baselines/sentence-level-models/models/cnn.py
  class CNN (line 10) | class CNN(nn.Module):
    method __init__ (line 11) | def __init__(self, args, rel2id, word_emb=None):
    method forward (line 58) | def forward(self, inputs):

FILE: code/Model/baselines/sentence-level-models/models/lstm.py
  class LSTM (line 10) | class LSTM(nn.Module):
    method __init__ (line 11) | def __init__(self, args, rel2id, word_emb=None):
    method forward (line 63) | def forward(self, inputs):

FILE: code/Model/baselines/sentence-level-models/models/pcnn.py
  class PCNN (line 10) | class PCNN(nn.Module):
    method __init__ (line 11) | def __init__(self, args, rel2id, word_emb=None):
    method masked_conv (line 57) | def masked_conv(self, input, mask):
    method forward (line 74) | def forward(self, inputs):

FILE: code/Model/baselines/sentence-level-models/models/position_aware_lstm.py
  class PositionAwareLSTM (line 10) | class PositionAwareLSTM(nn.Module):
    method __init__ (line 11) | def __init__(self, args, rel2id, word_emb=None):
    method forward (line 61) | def forward(self, inputs):

FILE: code/Model/baselines/sentence-level-models/tacred2json.py
  function read (line 12) | def read(data, in_dir, out_dir, need_dependency = False):

FILE: code/Model/baselines/sentence-level-models/utils.py
  function ensure_dir (line 33) | def ensure_dir(d, verbose=True):
  class Dataset (line 39) | class Dataset(object):
    method __init__ (line 40) | def __init__(self, filename, args, word2id, device, rel2id=None, shuff...
    method get_id_maps (line 109) | def get_id_maps(self, instances):
  function get_padded_tensor (line 121) | def get_padded_tensor(tokens_list, batch_size):
  class CVDataset (line 131) | class CVDataset(object):
    method __init__ (line 132) | def __init__(self, instances, args, word2id, device, rel2id, shuffle=F...
  function get_cv_dataset (line 201) | def get_cv_dataset(filename, args, word2id, device, rel2id, dev_ratio=0.1):
  function map_to_ids (line 220) | def map_to_ids(tokens, vocab):
  function get_positions (line 224) | def get_positions(start_idx, end_idx, length):
  function sort_all (line 229) | def sort_all(batch, lens):
  function eval (line 236) | def eval(pred, labels):

FILE: code/Model/baselines/sentence-level-models/vocab.py
  function build_embedding (line 12) | def build_embedding(wv_file, vocab, wv_dim):
  function load_glove_vocab (line 26) | def load_glove_vocab(file, wv_dim):
  function normalize_glove (line 38) | def normalize_glove(token):
  class Vocab (line 49) | class Vocab(object):
    method __init__ (line 50) | def __init__(self, filename, load=False, word_counter=None, threshold=0):
    method load (line 72) | def load(self, filename):
    method save (line 78) | def save(self, filename):
    method map (line 87) | def map(self, token_list):
    method unmap (line 93) | def unmap(self, idx_list):
    method get_embeddings (line 99) | def get_embeddings(self, word_vectors=None, dim=100):
  function parse_args (line 110) | def parse_args():
  function main (line 123) | def main():
  function load_tokens (line 173) | def load_tokens(filename):
  function build_vocab (line 182) | def build_vocab(tokens, glove_vocab, min_freq):
  function count_oov (line 195) | def count_oov(tokens, vocab):
  function entity_masks (line 201) | def entity_masks():

FILE: code/Model/dw/deepwalk-bipa.cpp
  type vocab_word (line 20) | struct vocab_word {
  type vocab_word (line 28) | struct vocab_word
  function InitUnigramTable (line 49) | void InitUnigramTable() {
  function GetWordHash (line 68) | int GetWordHash(char *word) {
  function SearchVocab (line 76) | int SearchVocab(char *word) {
  function LearnVocabFromTrainFile (line 86) | void LearnVocabFromTrainFile() {
  function InitNet (line 171) | void InitNet() {
  function ReadNet (line 189) | void ReadNet()
  function TrainModel (line 314) | void TrainModel() {
  function ArgPos (line 394) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 406) | int main(int argc, char **argv) {

FILE: code/Model/dw/deepwalk-hete-em.cpp
  type vocab_word (line 20) | struct vocab_word {
  type vocab_word (line 28) | struct vocab_word
  function InitUnigramTable (line 49) | void InitUnigramTable() {
  function GetWordHash (line 68) | int GetWordHash(char *word) {
  function SearchVocab (line 76) | int SearchVocab(char *word) {
  function LearnVocabFromTrainFile (line 86) | void LearnVocabFromTrainFile() {
  function InitNet (line 207) | void InitNet() {
  function ReadNet (line 225) | void ReadNet()
  function TrainModel (line 379) | void TrainModel() {
  function ArgPos (line 483) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 495) | int main(int argc, char **argv) {

FILE: code/Model/dw/deepwalk-hete.cpp
  type vocab_word (line 20) | struct vocab_word {
  type vocab_word (line 28) | struct vocab_word
  function InitUnigramTable (line 49) | void InitUnigramTable() {
  function GetWordHash (line 68) | int GetWordHash(char *word) {
  function SearchVocab (line 76) | int SearchVocab(char *word) {
  function LearnVocabFromTrainFile (line 86) | void LearnVocabFromTrainFile() {
  function InitNet (line 207) | void InitNet() {
  function ReadNet (line 225) | void ReadNet()
  function TrainModel (line 379) | void TrainModel() {
  function ArgPos (line 483) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 495) | int main(int argc, char **argv) {

FILE: code/Model/multir/src/cc/factorie/protobuf/DocumentProtos.java
  class DocumentProtos (line 6) | public final class DocumentProtos {
    method DocumentProtos (line 7) | private DocumentProtos() {}
    method registerAllExtensions (line 8) | public static void registerAllExtensions(
    class Document (line 11) | public static final class Document extends
      method Document (line 14) | private Document() {
      method Document (line 17) | private Document(boolean noInit) {}
      method getDefaultInstance (line 20) | public static Document getDefaultInstance() {
      method getDefaultInstanceForType (line 24) | public Document getDefaultInstanceForType() {
      method getDescriptor (line 28) | public static final com.google.protobuf.Descriptors.Descriptor
      method internalGetFieldAccessorTable (line 33) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
      class Sentence (line 38) | public static final class Sentence extends
        method Sentence (line 41) | private Sentence() {
        method Sentence (line 44) | private Sentence(boolean noInit) {}
        method getDefaultInstance (line 47) | public static Sentence getDefaultInstance() {
        method getDefaultInstanceForType (line 51) | public Sentence getDefaultInstanceForType() {
        method getDescriptor (line 55) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 60) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method getTokensList (line 69) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Document...
        method getTokensCount (line 72) | public int getTokensCount() { return tokens_.size(); }
        method getTokens (line 73) | public cc.factorie.protobuf.DocumentProtos.Document.Token getToken...
        method getMentionsList (line 81) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Document...
        method getMentionsCount (line 84) | public int getMentionsCount() { return mentions_.size(); }
        method getMentions (line 85) | public cc.factorie.protobuf.DocumentProtos.Document.Mention getMen...
        method hasDepTree (line 93) | public boolean hasDepTree() { return hasDepTree; }
        method getDepTree (line 94) | public cc.factorie.protobuf.DocumentProtos.Document.DepTree getDep...
        method initFields (line 96) | private void initFields() {
        method isInitialized (line 99) | public final boolean isInitialized() {
        method writeTo (line 112) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 128) | public int getSerializedSize() {
        method parseFrom (line 150) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 155) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 162) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 166) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 173) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 177) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseDelimitedFrom (line 184) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseDelimitedFrom (line 193) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 204) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method parseFrom (line 209) | public static cc.factorie.protobuf.DocumentProtos.Document.Sentenc...
        method newBuilder (line 217) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 218) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 219) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 222) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 224) | public static final class Builder extends
          method Builder (line 229) | private Builder() {}
          method create (line 231) | private static Builder create() {
          method internalGetResult (line 237) | protected cc.factorie.protobuf.DocumentProtos.Document.Sentence ...
          method clear (line 241) | public Builder clear() {
          method clone (line 250) | public Builder clone() {
          method getDescriptorForType (line 254) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 259) | public cc.factorie.protobuf.DocumentProtos.Document.Sentence get...
          method isInitialized (line 263) | public boolean isInitialized() {
          method build (line 266) | public cc.factorie.protobuf.DocumentProtos.Document.Sentence bui...
          method buildParsed (line 273) | private cc.factorie.protobuf.DocumentProtos.Document.Sentence bu...
          method buildPartial (line 282) | public cc.factorie.protobuf.DocumentProtos.Document.Sentence bui...
          method mergeFrom (line 300) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 309) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Doc...
          method mergeFrom (line 330) | public Builder mergeFrom(
          method getTokensList (line 378) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Docume...
          method getTokensCount (line 381) | public int getTokensCount() {
          method getTokens (line 384) | public cc.factorie.protobuf.DocumentProtos.Document.Token getTok...
          method setTokens (line 387) | public Builder setTokens(int index, cc.factorie.protobuf.Documen...
          method setTokens (line 394) | public Builder setTokens(int index, cc.factorie.protobuf.Documen...
          method addTokens (line 398) | public Builder addTokens(cc.factorie.protobuf.DocumentProtos.Doc...
          method addTokens (line 408) | public Builder addTokens(cc.factorie.protobuf.DocumentProtos.Doc...
          method addAllTokens (line 415) | public Builder addAllTokens(
          method clearTokens (line 423) | public Builder clearTokens() {
          method getMentionsList (line 429) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Docume...
          method getMentionsCount (line 432) | public int getMentionsCount() {
          method getMentions (line 435) | public cc.factorie.protobuf.DocumentProtos.Document.Mention getM...
          method setMentions (line 438) | public Builder setMentions(int index, cc.factorie.protobuf.Docum...
          method setMentions (line 445) | public Builder setMentions(int index, cc.factorie.protobuf.Docum...
          method addMentions (line 449) | public Builder addMentions(cc.factorie.protobuf.DocumentProtos.D...
          method addMentions (line 459) | public Builder addMentions(cc.factorie.protobuf.DocumentProtos.D...
          method addAllMentions (line 466) | public Builder addAllMentions(
          method clearMentions (line 474) | public Builder clearMentions() {
          method hasDepTree (line 480) | public boolean hasDepTree() {
          method getDepTree (line 483) | public cc.factorie.protobuf.DocumentProtos.Document.DepTree getD...
          method setDepTree (line 486) | public Builder setDepTree(cc.factorie.protobuf.DocumentProtos.Do...
          method setDepTree (line 494) | public Builder setDepTree(cc.factorie.protobuf.DocumentProtos.Do...
          method mergeDepTree (line 499) | public Builder mergeDepTree(cc.factorie.protobuf.DocumentProtos....
          method clearDepTree (line 510) | public Builder clearDepTree() {
      class Token (line 528) | public static final class Token extends
        method Token (line 531) | private Token() {
        method Token (line 534) | private Token(boolean noInit) {}
        method getDefaultInstance (line 537) | public static Token getDefaultInstance() {
        method getDefaultInstanceForType (line 541) | public Token getDefaultInstanceForType() {
        method getDescriptor (line 545) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 550) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasWord (line 559) | public boolean hasWord() { return hasWord; }
        method getWord (line 560) | public java.lang.String getWord() { return word_; }
        method hasTag (line 566) | public boolean hasTag() { return hasTag; }
        method getTag (line 567) | public java.lang.String getTag() { return tag_; }
        method hasNer (line 573) | public boolean hasNer() { return hasNer; }
        method getNer (line 574) | public java.lang.String getNer() { return ner_; }
        method initFields (line 576) | private void initFields() {
        method isInitialized (line 578) | public final boolean isInitialized() {
        method writeTo (line 583) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 599) | public int getSerializedSize() {
        method parseFrom (line 621) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 626) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 633) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 637) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 644) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 648) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseDelimitedFrom (line 655) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseDelimitedFrom (line 664) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 675) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method parseFrom (line 680) | public static cc.factorie.protobuf.DocumentProtos.Document.Token p...
        method newBuilder (line 688) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 689) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 690) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 693) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 695) | public static final class Builder extends
          method Builder (line 700) | private Builder() {}
          method create (line 702) | private static Builder create() {
          method internalGetResult (line 708) | protected cc.factorie.protobuf.DocumentProtos.Document.Token int...
          method clear (line 712) | public Builder clear() {
          method clone (line 721) | public Builder clone() {
          method getDescriptorForType (line 725) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 730) | public cc.factorie.protobuf.DocumentProtos.Document.Token getDef...
          method isInitialized (line 734) | public boolean isInitialized() {
          method build (line 737) | public cc.factorie.protobuf.DocumentProtos.Document.Token build() {
          method buildParsed (line 744) | private cc.factorie.protobuf.DocumentProtos.Document.Token build...
          method buildPartial (line 753) | public cc.factorie.protobuf.DocumentProtos.Document.Token buildP...
          method mergeFrom (line 763) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 772) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Doc...
          method mergeFrom (line 787) | public Builder mergeFrom(
          method hasWord (line 826) | public boolean hasWord() {
          method getWord (line 829) | public java.lang.String getWord() {
          method setWord (line 832) | public Builder setWord(java.lang.String value) {
          method clearWord (line 840) | public Builder clearWord() {
          method hasTag (line 847) | public boolean hasTag() {
          method getTag (line 850) | public java.lang.String getTag() {
          method setTag (line 853) | public Builder setTag(java.lang.String value) {
          method clearTag (line 861) | public Builder clearTag() {
          method hasNer (line 868) | public boolean hasNer() {
          method getNer (line 871) | public java.lang.String getNer() {
          method setNer (line 874) | public Builder setNer(java.lang.String value) {
          method clearNer (line 882) | public Builder clearNer() {
      class Mention (line 900) | public static final class Mention extends
        method Mention (line 903) | private Mention() {
        method Mention (line 906) | private Mention(boolean noInit) {}
        method getDefaultInstance (line 909) | public static Mention getDefaultInstance() {
        method getDefaultInstanceForType (line 913) | public Mention getDefaultInstanceForType() {
        method getDescriptor (line 917) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 922) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasId (line 931) | public boolean hasId() { return hasId; }
        method getId (line 932) | public int getId() { return id_; }
        method hasEntityGuid (line 938) | public boolean hasEntityGuid() { return hasEntityGuid; }
        method getEntityGuid (line 939) | public java.lang.String getEntityGuid() { return entityGuid_; }
        method hasFrom (line 945) | public boolean hasFrom() { return hasFrom; }
        method getFrom (line 946) | public int getFrom() { return from_; }
        method hasTo (line 952) | public boolean hasTo() { return hasTo; }
        method getTo (line 953) | public int getTo() { return to_; }
        method hasLabel (line 959) | public boolean hasLabel() { return hasLabel; }
        method getLabel (line 960) | public java.lang.String getLabel() { return label_; }
        method initFields (line 962) | private void initFields() {
        method isInitialized (line 964) | public final boolean isInitialized() {
        method writeTo (line 972) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 994) | public int getSerializedSize() {
        method parseFrom (line 1024) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1029) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1036) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1040) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1047) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1051) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseDelimitedFrom (line 1058) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseDelimitedFrom (line 1067) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1078) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method parseFrom (line 1083) | public static cc.factorie.protobuf.DocumentProtos.Document.Mention...
        method newBuilder (line 1091) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 1092) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 1093) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 1096) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 1098) | public static final class Builder extends
          method Builder (line 1103) | private Builder() {}
          method create (line 1105) | private static Builder create() {
          method internalGetResult (line 1111) | protected cc.factorie.protobuf.DocumentProtos.Document.Mention i...
          method clear (line 1115) | public Builder clear() {
          method clone (line 1124) | public Builder clone() {
          method getDescriptorForType (line 1128) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 1133) | public cc.factorie.protobuf.DocumentProtos.Document.Mention getD...
          method isInitialized (line 1137) | public boolean isInitialized() {
          method build (line 1140) | public cc.factorie.protobuf.DocumentProtos.Document.Mention buil...
          method buildParsed (line 1147) | private cc.factorie.protobuf.DocumentProtos.Document.Mention bui...
          method buildPartial (line 1156) | public cc.factorie.protobuf.DocumentProtos.Document.Mention buil...
          method mergeFrom (line 1166) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 1175) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Doc...
          method mergeFrom (line 1196) | public Builder mergeFrom(
          method hasId (line 1243) | public boolean hasId() {
          method getId (line 1246) | public int getId() {
          method setId (line 1249) | public Builder setId(int value) {
          method clearId (line 1254) | public Builder clearId() {
          method hasEntityGuid (line 1261) | public boolean hasEntityGuid() {
          method getEntityGuid (line 1264) | public java.lang.String getEntityGuid() {
          method setEntityGuid (line 1267) | public Builder setEntityGuid(java.lang.String value) {
          method clearEntityGuid (line 1275) | public Builder clearEntityGuid() {
          method hasFrom (line 1282) | public boolean hasFrom() {
          method getFrom (line 1285) | public int getFrom() {
          method setFrom (line 1288) | public Builder setFrom(int value) {
          method clearFrom (line 1293) | public Builder clearFrom() {
          method hasTo (line 1300) | public boolean hasTo() {
          method getTo (line 1303) | public int getTo() {
          method setTo (line 1306) | public Builder setTo(int value) {
          method clearTo (line 1311) | public Builder clearTo() {
          method hasLabel (line 1318) | public boolean hasLabel() {
          method getLabel (line 1321) | public java.lang.String getLabel() {
          method setLabel (line 1324) | public Builder setLabel(java.lang.String value) {
          method clearLabel (line 1332) | public Builder clearLabel() {
      class DepTree (line 1350) | public static final class DepTree extends
        method DepTree (line 1353) | private DepTree() {
        method DepTree (line 1356) | private DepTree(boolean noInit) {}
        method getDefaultInstance (line 1359) | public static DepTree getDefaultInstance() {
        method getDefaultInstanceForType (line 1363) | public DepTree getDefaultInstanceForType() {
        method getDescriptor (line 1367) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 1372) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasRoot (line 1381) | public boolean hasRoot() { return hasRoot; }
        method getRoot (line 1382) | public int getRoot() { return root_; }
        method getHeadList (line 1388) | public java.util.List<java.lang.Integer> getHeadList() {
        method getHeadCount (line 1391) | public int getHeadCount() { return head_.size(); }
        method getHead (line 1392) | public int getHead(int index) {
        method getRelTypeList (line 1400) | public java.util.List<java.lang.String> getRelTypeList() {
        method getRelTypeCount (line 1403) | public int getRelTypeCount() { return relType_.size(); }
        method getRelType (line 1404) | public java.lang.String getRelType(int index) {
        method initFields (line 1408) | private void initFields() {
        method isInitialized (line 1410) | public final boolean isInitialized() {
        method writeTo (line 1415) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 1431) | public int getSerializedSize() {
        method parseFrom (line 1463) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1468) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1475) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1479) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1486) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1490) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseDelimitedFrom (line 1497) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseDelimitedFrom (line 1506) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1517) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method parseFrom (line 1522) | public static cc.factorie.protobuf.DocumentProtos.Document.DepTree...
        method newBuilder (line 1530) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 1531) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 1532) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 1535) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 1537) | public static final class Builder extends
          method Builder (line 1542) | private Builder() {}
          method create (line 1544) | private static Builder create() {
          method internalGetResult (line 1550) | protected cc.factorie.protobuf.DocumentProtos.Document.DepTree i...
          method clear (line 1554) | public Builder clear() {
          method clone (line 1563) | public Builder clone() {
          method getDescriptorForType (line 1567) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 1572) | public cc.factorie.protobuf.DocumentProtos.Document.DepTree getD...
          method isInitialized (line 1576) | public boolean isInitialized() {
          method build (line 1579) | public cc.factorie.protobuf.DocumentProtos.Document.DepTree buil...
          method buildParsed (line 1586) | private cc.factorie.protobuf.DocumentProtos.Document.DepTree bui...
          method buildPartial (line 1595) | public cc.factorie.protobuf.DocumentProtos.Document.DepTree buil...
          method mergeFrom (line 1613) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 1622) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Doc...
          method mergeFrom (line 1643) | public Builder mergeFrom(
          method hasRoot (line 1691) | public boolean hasRoot() {
          method getRoot (line 1694) | public int getRoot() {
          method setRoot (line 1697) | public Builder setRoot(int value) {
          method clearRoot (line 1702) | public Builder clearRoot() {
          method getHeadList (line 1709) | public java.util.List<java.lang.Integer> getHeadList() {
          method getHeadCount (line 1712) | public int getHeadCount() {
          method getHead (line 1715) | public int getHead(int index) {
          method setHead (line 1718) | public Builder setHead(int index, int value) {
          method addHead (line 1722) | public Builder addHead(int value) {
          method addAllHead (line 1729) | public Builder addAllHead(
          method clearHead (line 1737) | public Builder clearHead() {
          method getRelTypeList (line 1743) | public java.util.List<java.lang.String> getRelTypeList() {
          method getRelTypeCount (line 1746) | public int getRelTypeCount() {
          method getRelType (line 1749) | public java.lang.String getRelType(int index) {
          method setRelType (line 1752) | public Builder setRelType(int index, java.lang.String value) {
          method addRelType (line 1759) | public Builder addRelType(java.lang.String value) {
          method addAllRelType (line 1769) | public Builder addAllRelType(
          method clearRelType (line 1777) | public Builder clearRelType() {
      class RelationMention (line 1794) | public static final class RelationMention extends
        method RelationMention (line 1797) | private RelationMention() {
        method RelationMention (line 1800) | private RelationMention(boolean noInit) {}
        method getDefaultInstance (line 1803) | public static RelationMention getDefaultInstance() {
        method getDefaultInstanceForType (line 1807) | public RelationMention getDefaultInstanceForType() {
        method getDescriptor (line 1811) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 1816) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasId (line 1825) | public boolean hasId() { return hasId; }
        method getId (line 1826) | public int getId() { return id_; }
        method hasSource (line 1832) | public boolean hasSource() { return hasSource; }
        method getSource (line 1833) | public int getSource() { return source_; }
        method hasDest (line 1839) | public boolean hasDest() { return hasDest; }
        method getDest (line 1840) | public int getDest() { return dest_; }
        method hasLabel (line 1846) | public boolean hasLabel() { return hasLabel; }
        method getLabel (line 1847) | public java.lang.String getLabel() { return label_; }
        method initFields (line 1849) | private void initFields() {
        method isInitialized (line 1851) | public final boolean isInitialized() {
        method writeTo (line 1859) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 1878) | public int getSerializedSize() {
        method parseFrom (line 1904) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1909) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1916) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1920) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1927) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1931) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseDelimitedFrom (line 1938) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseDelimitedFrom (line 1947) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1958) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method parseFrom (line 1963) | public static cc.factorie.protobuf.DocumentProtos.Document.Relatio...
        method newBuilder (line 1971) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 1972) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 1973) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 1976) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 1978) | public static final class Builder extends
          method Builder (line 1983) | private Builder() {}
          method create (line 1985) | private static Builder create() {
          method internalGetResult (line 1991) | protected cc.factorie.protobuf.DocumentProtos.Document.RelationM...
          method clear (line 1995) | public Builder clear() {
          method clone (line 2004) | public Builder clone() {
          method getDescriptorForType (line 2008) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 2013) | public cc.factorie.protobuf.DocumentProtos.Document.RelationMent...
          method isInitialized (line 2017) | public boolean isInitialized() {
          method build (line 2020) | public cc.factorie.protobuf.DocumentProtos.Document.RelationMent...
          method buildParsed (line 2027) | private cc.factorie.protobuf.DocumentProtos.Document.RelationMen...
          method buildPartial (line 2036) | public cc.factorie.protobuf.DocumentProtos.Document.RelationMent...
          method mergeFrom (line 2046) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 2055) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Doc...
          method mergeFrom (line 2073) | public Builder mergeFrom(
          method hasId (line 2116) | public boolean hasId() {
          method getId (line 2119) | public int getId() {
          method setId (line 2122) | public Builder setId(int value) {
          method clearId (line 2127) | public Builder clearId() {
          method hasSource (line 2134) | public boolean hasSource() {
          method getSource (line 2137) | public int getSource() {
          method setSource (line 2140) | public Builder setSource(int value) {
          method clearSource (line 2145) | public Builder clearSource() {
          method hasDest (line 2152) | public boolean hasDest() {
          method getDest (line 2155) | public int getDest() {
          method setDest (line 2158) | public Builder setDest(int value) {
          method clearDest (line 2163) | public Builder clearDest() {
          method hasLabel (line 2170) | public boolean hasLabel() {
          method getLabel (line 2173) | public java.lang.String getLabel() {
          method setLabel (line 2176) | public Builder setLabel(java.lang.String value) {
          method clearLabel (line 2184) | public Builder clearLabel() {
      method hasFilename (line 2206) | public boolean hasFilename() { return hasFilename; }
      method getFilename (line 2207) | public java.lang.String getFilename() { return filename_; }
      method getSentencesList (line 2213) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Document.S...
      method getSentencesCount (line 2216) | public int getSentencesCount() { return sentences_.size(); }
      method getSentences (line 2217) | public cc.factorie.protobuf.DocumentProtos.Document.Sentence getSent...
      method initFields (line 2221) | private void initFields() {
      method isInitialized (line 2223) | public final boolean isInitialized() {
      method writeTo (line 2231) | public void writeTo(com.google.protobuf.CodedOutputStream output)
      method getSerializedSize (line 2244) | public int getSerializedSize() {
      method parseFrom (line 2262) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method parseFrom (line 2267) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method parseFrom (line 2274) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom...
      method parseFrom (line 2278) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method parseFrom (line 2285) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom...
      method parseFrom (line 2289) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method parseDelimitedFrom (line 2296) | public static cc.factorie.protobuf.DocumentProtos.Document parseDeli...
      method parseDelimitedFrom (line 2305) | public static cc.factorie.protobuf.DocumentProtos.Document parseDeli...
      method parseFrom (line 2316) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method parseFrom (line 2321) | public static cc.factorie.protobuf.DocumentProtos.Document parseFrom(
      method newBuilder (line 2329) | public static Builder newBuilder() { return Builder.create(); }
      method newBuilderForType (line 2330) | public Builder newBuilderForType() { return newBuilder(); }
      method newBuilder (line 2331) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProtos...
      method toBuilder (line 2334) | public Builder toBuilder() { return newBuilder(this); }
      class Builder (line 2336) | public static final class Builder extends
        method Builder (line 2341) | private Builder() {}
        method create (line 2343) | private static Builder create() {
        method internalGetResult (line 2349) | protected cc.factorie.protobuf.DocumentProtos.Document internalGet...
        method clear (line 2353) | public Builder clear() {
        method clone (line 2362) | public Builder clone() {
        method getDescriptorForType (line 2366) | public com.google.protobuf.Descriptors.Descriptor
        method getDefaultInstanceForType (line 2371) | public cc.factorie.protobuf.DocumentProtos.Document getDefaultInst...
        method isInitialized (line 2375) | public boolean isInitialized() {
        method build (line 2378) | public cc.factorie.protobuf.DocumentProtos.Document build() {
        method buildParsed (line 2385) | private cc.factorie.protobuf.DocumentProtos.Document buildParsed()
        method buildPartial (line 2394) | public cc.factorie.protobuf.DocumentProtos.Document buildPartial() {
        method mergeFrom (line 2408) | public Builder mergeFrom(com.google.protobuf.Message other) {
        method mergeFrom (line 2417) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Docum...
        method mergeFrom (line 2432) | public Builder mergeFrom(
        method hasFilename (line 2469) | public boolean hasFilename() {
        method getFilename (line 2472) | public java.lang.String getFilename() {
        method setFilename (line 2475) | public Builder setFilename(java.lang.String value) {
        method clearFilename (line 2483) | public Builder clearFilename() {
        method getSentencesList (line 2490) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Document...
        method getSentencesCount (line 2493) | public int getSentencesCount() {
        method getSentences (line 2496) | public cc.factorie.protobuf.DocumentProtos.Document.Sentence getSe...
        method setSentences (line 2499) | public Builder setSentences(int index, cc.factorie.protobuf.Docume...
        method setSentences (line 2506) | public Builder setSentences(int index, cc.factorie.protobuf.Docume...
        method addSentences (line 2510) | public Builder addSentences(cc.factorie.protobuf.DocumentProtos.Do...
        method addSentences (line 2520) | public Builder addSentences(cc.factorie.protobuf.DocumentProtos.Do...
        method addAllSentences (line 2527) | public Builder addAllSentences(
        method clearSentences (line 2535) | public Builder clearSentences() {
    class Relation (line 2552) | public static final class Relation extends
      method Relation (line 2555) | private Relation() {
      method Relation (line 2558) | private Relation(boolean noInit) {}
      method getDefaultInstance (line 2561) | public static Relation getDefaultInstance() {
      method getDefaultInstanceForType (line 2565) | public Relation getDefaultInstanceForType() {
      method getDescriptor (line 2569) | public static final com.google.protobuf.Descriptors.Descriptor
      method internalGetFieldAccessorTable (line 2574) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
      class RelationMentionRef (line 2579) | public static final class RelationMentionRef extends
        method RelationMentionRef (line 2582) | private RelationMentionRef() {
        method RelationMentionRef (line 2585) | private RelationMentionRef(boolean noInit) {}
        method getDefaultInstance (line 2588) | public static RelationMentionRef getDefaultInstance() {
        method getDefaultInstanceForType (line 2592) | public RelationMentionRef getDefaultInstanceForType() {
        method getDescriptor (line 2596) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 2601) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasFilename (line 2610) | public boolean hasFilename() { return hasFilename; }
        method getFilename (line 2611) | public java.lang.String getFilename() { return filename_; }
        method hasSourceId (line 2617) | public boolean hasSourceId() { return hasSourceId; }
        method getSourceId (line 2618) | public int getSourceId() { return sourceId_; }
        method hasDestId (line 2624) | public boolean hasDestId() { return hasDestId; }
        method getDestId (line 2625) | public int getDestId() { return destId_; }
        method getFeatureList (line 2631) | public java.util.List<java.lang.String> getFeatureList() {
        method getFeatureCount (line 2634) | public int getFeatureCount() { return feature_.size(); }
        method getFeature (line 2635) | public java.lang.String getFeature(int index) {
        method hasSentence (line 2643) | public boolean hasSentence() { return hasSentence; }
        method getSentence (line 2644) | public java.lang.String getSentence() { return sentence_; }
        method initFields (line 2646) | private void initFields() {
        method isInitialized (line 2648) | public final boolean isInitialized() {
        method writeTo (line 2655) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 2677) | public int getSerializedSize() {
        method parseFrom (line 2712) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2717) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2724) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2728) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2735) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2739) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseDelimitedFrom (line 2746) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseDelimitedFrom (line 2755) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2766) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method parseFrom (line 2771) | public static cc.factorie.protobuf.DocumentProtos.Relation.Relatio...
        method newBuilder (line 2779) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 2780) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 2781) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 2784) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 2786) | public static final class Builder extends
          method Builder (line 2791) | private Builder() {}
          method create (line 2793) | private static Builder create() {
          method internalGetResult (line 2799) | protected cc.factorie.protobuf.DocumentProtos.Relation.RelationM...
          method clear (line 2803) | public Builder clear() {
          method clone (line 2812) | public Builder clone() {
          method getDescriptorForType (line 2816) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 2821) | public cc.factorie.protobuf.DocumentProtos.Relation.RelationMent...
          method isInitialized (line 2825) | public boolean isInitialized() {
          method build (line 2828) | public cc.factorie.protobuf.DocumentProtos.Relation.RelationMent...
          method buildParsed (line 2835) | private cc.factorie.protobuf.DocumentProtos.Relation.RelationMen...
          method buildPartial (line 2844) | public cc.factorie.protobuf.DocumentProtos.Relation.RelationMent...
          method mergeFrom (line 2858) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 2867) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Rel...
          method mergeFrom (line 2891) | public Builder mergeFrom(
          method hasFilename (line 2938) | public boolean hasFilename() {
          method getFilename (line 2941) | public java.lang.String getFilename() {
          method setFilename (line 2944) | public Builder setFilename(java.lang.String value) {
          method clearFilename (line 2952) | public Builder clearFilename() {
          method hasSourceId (line 2959) | public boolean hasSourceId() {
          method getSourceId (line 2962) | public int getSourceId() {
          method setSourceId (line 2965) | public Builder setSourceId(int value) {
          method clearSourceId (line 2970) | public Builder clearSourceId() {
          method hasDestId (line 2977) | public boolean hasDestId() {
          method getDestId (line 2980) | public int getDestId() {
          method setDestId (line 2983) | public Builder setDestId(int value) {
          method clearDestId (line 2988) | public Builder clearDestId() {
          method getFeatureList (line 2995) | public java.util.List<java.lang.String> getFeatureList() {
          method getFeatureCount (line 2998) | public int getFeatureCount() {
          method getFeature (line 3001) | public java.lang.String getFeature(int index) {
          method setFeature (line 3004) | public Builder setFeature(int index, java.lang.String value) {
          method addFeature (line 3011) | public Builder addFeature(java.lang.String value) {
          method addAllFeature (line 3021) | public Builder addAllFeature(
          method clearFeature (line 3029) | public Builder clearFeature() {
          method hasSentence (line 3035) | public boolean hasSentence() {
          method getSentence (line 3038) | public java.lang.String getSentence() {
          method setSentence (line 3041) | public Builder setSentence(java.lang.String value) {
          method clearSentence (line 3049) | public Builder clearSentence() {
      method hasSourceGuid (line 3071) | public boolean hasSourceGuid() { return hasSourceGuid; }
      method getSourceGuid (line 3072) | public java.lang.String getSourceGuid() { return sourceGuid_; }
      method hasDestGuid (line 3078) | public boolean hasDestGuid() { return hasDestGuid; }
      method getDestGuid (line 3079) | public java.lang.String getDestGuid() { return destGuid_; }
      method hasRelType (line 3085) | public boolean hasRelType() { return hasRelType; }
      method getRelType (line 3086) | public java.lang.String getRelType() { return relType_; }
      method getMentionList (line 3092) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Relation.R...
      method getMentionCount (line 3095) | public int getMentionCount() { return mention_.size(); }
      method getMention (line 3096) | public cc.factorie.protobuf.DocumentProtos.Relation.RelationMentionR...
      method initFields (line 3100) | private void initFields() {
      method isInitialized (line 3102) | public final boolean isInitialized() {
      method writeTo (line 3112) | public void writeTo(com.google.protobuf.CodedOutputStream output)
      method getSerializedSize (line 3131) | public int getSerializedSize() {
      method parseFrom (line 3157) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method parseFrom (line 3162) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method parseFrom (line 3169) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom...
      method parseFrom (line 3173) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method parseFrom (line 3180) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom...
      method parseFrom (line 3184) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method parseDelimitedFrom (line 3191) | public static cc.factorie.protobuf.DocumentProtos.Relation parseDeli...
      method parseDelimitedFrom (line 3200) | public static cc.factorie.protobuf.DocumentProtos.Relation parseDeli...
      method parseFrom (line 3211) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method parseFrom (line 3216) | public static cc.factorie.protobuf.DocumentProtos.Relation parseFrom(
      method newBuilder (line 3224) | public static Builder newBuilder() { return Builder.create(); }
      method newBuilderForType (line 3225) | public Builder newBuilderForType() { return newBuilder(); }
      method newBuilder (line 3226) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProtos...
      method toBuilder (line 3229) | public Builder toBuilder() { return newBuilder(this); }
      class Builder (line 3231) | public static final class Builder extends
        method Builder (line 3236) | private Builder() {}
        method create (line 3238) | private static Builder create() {
        method internalGetResult (line 3244) | protected cc.factorie.protobuf.DocumentProtos.Relation internalGet...
        method clear (line 3248) | public Builder clear() {
        method clone (line 3257) | public Builder clone() {
        method getDescriptorForType (line 3261) | public com.google.protobuf.Descriptors.Descriptor
        method getDefaultInstanceForType (line 3266) | public cc.factorie.protobuf.DocumentProtos.Relation getDefaultInst...
        method isInitialized (line 3270) | public boolean isInitialized() {
        method build (line 3273) | public cc.factorie.protobuf.DocumentProtos.Relation build() {
        method buildParsed (line 3280) | private cc.factorie.protobuf.DocumentProtos.Relation buildParsed()
        method buildPartial (line 3289) | public cc.factorie.protobuf.DocumentProtos.Relation buildPartial() {
        method mergeFrom (line 3303) | public Builder mergeFrom(com.google.protobuf.Message other) {
        method mergeFrom (line 3312) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Relat...
        method mergeFrom (line 3333) | public Builder mergeFrom(
        method hasSourceGuid (line 3378) | public boolean hasSourceGuid() {
        method getSourceGuid (line 3381) | public java.lang.String getSourceGuid() {
        method setSourceGuid (line 3384) | public Builder setSourceGuid(java.lang.String value) {
        method clearSourceGuid (line 3392) | public Builder clearSourceGuid() {
        method hasDestGuid (line 3399) | public boolean hasDestGuid() {
        method getDestGuid (line 3402) | public java.lang.String getDestGuid() {
        method setDestGuid (line 3405) | public Builder setDestGuid(java.lang.String value) {
        method clearDestGuid (line 3413) | public Builder clearDestGuid() {
        method hasRelType (line 3420) | public boolean hasRelType() {
        method getRelType (line 3423) | public java.lang.String getRelType() {
        method setRelType (line 3426) | public Builder setRelType(java.lang.String value) {
        method clearRelType (line 3434) | public Builder clearRelType() {
        method getMentionList (line 3441) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Relation...
        method getMentionCount (line 3444) | public int getMentionCount() {
        method getMention (line 3447) | public cc.factorie.protobuf.DocumentProtos.Relation.RelationMentio...
        method setMention (line 3450) | public Builder setMention(int index, cc.factorie.protobuf.Document...
        method setMention (line 3457) | public Builder setMention(int index, cc.factorie.protobuf.Document...
        method addMention (line 3461) | public Builder addMention(cc.factorie.protobuf.DocumentProtos.Rela...
        method addMention (line 3471) | public Builder addMention(cc.factorie.protobuf.DocumentProtos.Rela...
        method addAllMention (line 3478) | public Builder addAllMention(
        method clearMention (line 3486) | public Builder clearMention() {
    class Entity (line 3503) | public static final class Entity extends
      method Entity (line 3506) | private Entity() {
      method Entity (line 3509) | private Entity(boolean noInit) {}
      method getDefaultInstance (line 3512) | public static Entity getDefaultInstance() {
      method getDefaultInstanceForType (line 3516) | public Entity getDefaultInstanceForType() {
      method getDescriptor (line 3520) | public static final com.google.protobuf.Descriptors.Descriptor
      method internalGetFieldAccessorTable (line 3525) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
      class EntityMentionRef (line 3530) | public static final class EntityMentionRef extends
        method EntityMentionRef (line 3533) | private EntityMentionRef() {
        method EntityMentionRef (line 3536) | private EntityMentionRef(boolean noInit) {}
        method getDefaultInstance (line 3539) | public static EntityMentionRef getDefaultInstance() {
        method getDefaultInstanceForType (line 3543) | public EntityMentionRef getDefaultInstanceForType() {
        method getDescriptor (line 3547) | public static final com.google.protobuf.Descriptors.Descriptor
        method internalGetFieldAccessorTable (line 3552) | protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
        method hasFilename (line 3561) | public boolean hasFilename() { return hasFilename; }
        method getFilename (line 3562) | public java.lang.String getFilename() { return filename_; }
        method hasId (line 3568) | public boolean hasId() { return hasId; }
        method getId (line 3569) | public int getId() { return id_; }
        method getFeatureList (line 3575) | public java.util.List<java.lang.String> getFeatureList() {
        method getFeatureCount (line 3578) | public int getFeatureCount() { return feature_.size(); }
        method getFeature (line 3579) | public java.lang.String getFeature(int index) {
        method initFields (line 3583) | private void initFields() {
        method isInitialized (line 3585) | public final boolean isInitialized() {
        method writeTo (line 3591) | public void writeTo(com.google.protobuf.CodedOutputStream output)
        method getSerializedSize (line 3607) | public int getSerializedSize() {
        method parseFrom (line 3634) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3639) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3646) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3650) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3657) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3661) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseDelimitedFrom (line 3668) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseDelimitedFrom (line 3677) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3688) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method parseFrom (line 3693) | public static cc.factorie.protobuf.DocumentProtos.Entity.EntityMen...
        method newBuilder (line 3701) | public static Builder newBuilder() { return Builder.create(); }
        method newBuilderForType (line 3702) | public Builder newBuilderForType() { return newBuilder(); }
        method newBuilder (line 3703) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProt...
        method toBuilder (line 3706) | public Builder toBuilder() { return newBuilder(this); }
        class Builder (line 3708) | public static final class Builder extends
          method Builder (line 3713) | private Builder() {}
          method create (line 3715) | private static Builder create() {
          method internalGetResult (line 3721) | protected cc.factorie.protobuf.DocumentProtos.Entity.EntityMenti...
          method clear (line 3725) | public Builder clear() {
          method clone (line 3734) | public Builder clone() {
          method getDescriptorForType (line 3738) | public com.google.protobuf.Descriptors.Descriptor
          method getDefaultInstanceForType (line 3743) | public cc.factorie.protobuf.DocumentProtos.Entity.EntityMentionR...
          method isInitialized (line 3747) | public boolean isInitialized() {
          method build (line 3750) | public cc.factorie.protobuf.DocumentProtos.Entity.EntityMentionR...
          method buildParsed (line 3757) | private cc.factorie.protobuf.DocumentProtos.Entity.EntityMention...
          method buildPartial (line 3766) | public cc.factorie.protobuf.DocumentProtos.Entity.EntityMentionR...
          method mergeFrom (line 3780) | public Builder mergeFrom(com.google.protobuf.Message other) {
          method mergeFrom (line 3789) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Ent...
          method mergeFrom (line 3807) | public Builder mergeFrom(
          method hasFilename (line 3846) | public boolean hasFilename() {
          method getFilename (line 3849) | public java.lang.String getFilename() {
          method setFilename (line 3852) | public Builder setFilename(java.lang.String value) {
          method clearFilename (line 3860) | public Builder clearFilename() {
          method hasId (line 3867) | public boolean hasId() {
          method getId (line 3870) | public int getId() {
          method setId (line 3873) | public Builder setId(int value) {
          method clearId (line 3878) | public Builder clearId() {
          method getFeatureList (line 3885) | public java.util.List<java.lang.String> getFeatureList() {
          method getFeatureCount (line 3888) | public int getFeatureCount() {
          method getFeature (line 3891) | public java.lang.String getFeature(int index) {
          method setFeature (line 3894) | public Builder setFeature(int index, java.lang.String value) {
          method addFeature (line 3901) | public Builder addFeature(java.lang.String value) {
          method addAllFeature (line 3911) | public Builder addAllFeature(
          method clearFeature (line 3919) | public Builder clearFeature() {
      method hasGuid (line 3940) | public boolean hasGuid() { return hasGuid; }
      method getGuid (line 3941) | public java.lang.String getGuid() { return guid_; }
      method hasName (line 3947) | public boolean hasName() { return hasName; }
      method getName (line 3948) | public java.lang.String getName() { return name_; }
      method hasType (line 3954) | public boolean hasType() { return hasType; }
      method getType (line 3955) | public java.lang.String getType() { return type_; }
      method hasPred (line 3961) | public boolean hasPred() { return hasPred; }
      method getPred (line 3962) | public java.lang.String getPred() { return pred_; }
      method getMentionList (line 3968) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Entity.Ent...
      method getMentionCount (line 3971) | public int getMentionCount() { return mention_.size(); }
      method getMention (line 3972) | public cc.factorie.protobuf.DocumentProtos.Entity.EntityMentionRef g...
      method initFields (line 3976) | private void initFields() {
      method isInitialized (line 3978) | public final boolean isInitialized() {
      method writeTo (line 3986) | public void writeTo(com.google.protobuf.CodedOutputStream output)
      method getSerializedSize (line 4008) | public int getSerializedSize() {
      method parseFrom (line 4038) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method parseFrom (line 4043) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method parseFrom (line 4050) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(b...
      method parseFrom (line 4054) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method parseFrom (line 4061) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(j...
      method parseFrom (line 4065) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method parseDelimitedFrom (line 4072) | public static cc.factorie.protobuf.DocumentProtos.Entity parseDelimi...
      method parseDelimitedFrom (line 4081) | public static cc.factorie.protobuf.DocumentProtos.Entity parseDelimi...
      method parseFrom (line 4092) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method parseFrom (line 4097) | public static cc.factorie.protobuf.DocumentProtos.Entity parseFrom(
      method newBuilder (line 4105) | public static Builder newBuilder() { return Builder.create(); }
      method newBuilderForType (line 4106) | public Builder newBuilderForType() { return newBuilder(); }
      method newBuilder (line 4107) | public static Builder newBuilder(cc.factorie.protobuf.DocumentProtos...
      method toBuilder (line 4110) | public Builder toBuilder() { return newBuilder(this); }
      class Builder (line 4112) | public static final class Builder extends
        method Builder (line 4117) | private Builder() {}
        method create (line 4119) | private static Builder create() {
        method internalGetResult (line 4125) | protected cc.factorie.protobuf.DocumentProtos.Entity internalGetRe...
        method clear (line 4129) | public Builder clear() {
        method clone (line 4138) | public Builder clone() {
        method getDescriptorForType (line 4142) | public com.google.protobuf.Descriptors.Descriptor
        method getDefaultInstanceForType (line 4147) | public cc.factorie.protobuf.DocumentProtos.Entity getDefaultInstan...
        method isInitialized (line 4151) | public boolean isInitialized() {
        method build (line 4154) | public cc.factorie.protobuf.DocumentProtos.Entity build() {
        method buildParsed (line 4161) | private cc.factorie.protobuf.DocumentProtos.Entity buildParsed()
        method buildPartial (line 4170) | public cc.factorie.protobuf.DocumentProtos.Entity buildPartial() {
        method mergeFrom (line 4184) | public Builder mergeFrom(com.google.protobuf.Message other) {
        method mergeFrom (line 4193) | public Builder mergeFrom(cc.factorie.protobuf.DocumentProtos.Entit...
        method mergeFrom (line 4217) | public Builder mergeFrom(
        method hasGuid (line 4266) | public boolean hasGuid() {
        method getGuid (line 4269) | public java.lang.String getGuid() {
        method setGuid (line 4272) | public Builder setGuid(java.lang.String value) {
        method clearGuid (line 4280) | public Builder clearGuid() {
        method hasName (line 4287) | public boolean hasName() {
        method getName (line 4290) | public java.lang.String getName() {
        method setName (line 4293) | public Builder setName(java.lang.String value) {
        method clearName (line 4301) | public Builder clearName() {
        method hasType (line 4308) | public boolean hasType() {
        method getType (line 4311) | public java.lang.String getType() {
        method setType (line 4314) | public Builder setType(java.lang.String value) {
        method clearType (line 4322) | public Builder clearType() {
        method hasPred (line 4329) | public boolean hasPred() {
        method getPred (line 4332) | public java.lang.String getPred() {
        method setPred (line 4335) | public Builder setPred(java.lang.String value) {
        method clearPred (line 4343) | public Builder clearPred() {
        method getMentionList (line 4350) | public java.util.List<cc.factorie.protobuf.DocumentProtos.Entity.E...
        method getMentionCount (line 4353) | public int getMentionCount() {
        method getMention (line 4356) | public cc.factorie.protobuf.DocumentProtos.Entity.EntityMentionRef...
        method setMention (line 4359) | public Builder setMention(int index, cc.factorie.protobuf.Document...
        method setMention (line 4366) | public Builder setMention(int index, cc.factorie.protobuf.Document...
        method addMention (line 4370) | public Builder addMention(cc.factorie.protobuf.DocumentProtos.Enti...
        method addMention (line 4380) | public Builder addMention(cc.factorie.protobuf.DocumentProtos.Enti...
        method addAllMention (line 4387) | public Builder addAllMention(
        method clearMention (line 4395) | public Builder clearMention() {
    method getDescriptor (line 4463) | public static com.google.protobuf.Descriptors.FileDescriptor
    method assignDescriptors (line 4500) | public com.google.protobuf.ExtensionRegistry assignDescriptors(
    method internalForceInit (line 4592) | public static void internalForceInit() {}

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/AveragedPerceptron.java
  class AveragedPerceptron (line 10) | public class AveragedPerceptron {
    method AveragedPerceptron (line 21) | public AveragedPerceptron(Model model, Random random) {
    method train (line 36) | public Parameters train(Dataset trainingData) {
    method trainingIteration (line 64) | public void trainingIteration(int iteration, Dataset trainingData) {
    method YsAgree (line 94) | private boolean YsAgree(int[] y1, int[] y2) {
    method update (line 104) | public void update(Parse pred, Parse tru) {
    method updateRel (line 124) | private void updateRel(int toState, SparseBinaryVector features,
    method finalizeRel (line 145) | private void finalizeRel() {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/ConditionalInference.java
  class ConditionalInference (line 8) | public class ConditionalInference {
    method infer (line 10) | public static Parse infer(MILDocument doc,
    class Edge (line 132) | static class Edge {
      method Edge (line 136) | Edge(int m, int y, double score) {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/FullInference.java
  class FullInference (line 6) | public class FullInference {
    method infer (line 8) | public static Parse infer(MILDocument doc,

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Model.java
  class Model (line 11) | public class Model {
    method numFeatures (line 16) | public int numFeatures(int rel) {
    method read (line 22) | public void read(String file) throws IOException {
    method write (line 33) | public void write(String file) throws IOException {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Parameters.java
  class Parameters (line 13) | public class Parameters {
    method sum (line 19) | private DenseVector sum(DenseVector v1, DenseVector v2, float factor) {
    method sum (line 30) | public void sum(Parameters p, float factor) {
    method init (line 35) | public void init() {
    method reset (line 47) | public void reset() {
    method serialize (line 53) | public void serialize(OutputStream os)
    method deserialize (line 60) | public void deserialize(InputStream is)
    method serialize (line 68) | public void serialize(String file)
    method deserialize (line 75) | public void deserialize(String file)

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Parse.java
  class Parse (line 5) | public class Parse {
    method Parse (line 14) | public Parse() {}

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Scorer.java
  class Scorer (line 7) | public class Scorer {
    method Scorer (line 10) | public Scorer() {}
    method scoreMentionRelation (line 13) | public double scoreMentionRelation(MILDocument doc, int m, int rel) {
    method getMentionRelationFeatures (line 21) | public SparseBinaryVector getMentionRelationFeatures(MILDocument doc, ...
    method setParameters (line 25) | public void setParameters(Parameters params) {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Viterbi.java
  class Viterbi (line 5) | public class Viterbi {
    method Viterbi (line 10) | public Viterbi(Model model, Scorer parseScorer) {
    method parse (line 15) | public Parse parse(MILDocument doc, int mention) {
    class Parse (line 36) | public static class Parse {
      method Parse (line 44) | Parse(int state, double score) {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/data/Dataset.java
  type Dataset (line 5) | public interface Dataset {
    method numDocs (line 7) | public int numDocs();
    method shuffle (line 9) | public void shuffle(Random random);
    method next (line 11) | public MILDocument next();
    method next (line 13) | public boolean next(MILDocument doc);
    method reset (line 15) | public void reset();

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/data/MILDocument.java
  class MILDocument (line 16) | public class MILDocument {
    method MILDocument (line 32) | public MILDocument() {
    method clear (line 38) | public void clear() {
    method setCapacity (line 42) | public void setCapacity(int targetSize) {
    method read (line 57) | public boolean read(DataInputStream dis) throws IOException {
    method write (line 82) | public void write(DataOutputStream dos) throws IOException {

FILE: code/Model/multir/src/edu/uw/cs/multir/learning/data/MemoryDataset.java
  class MemoryDataset (line 11) | public class MemoryDataset implements Dataset {
    method MemoryDataset (line 16) | public MemoryDataset() {  }
    method MemoryDataset (line 18) | public MemoryDataset(String file)
    method numDocs (line 32) | public int numDocs() { return docs.length; }
    method shuffle (line 34) | public void shuffle(Random random) {
    method next (line 44) | public MILDocument next() {
    method next (line 50) | public boolean next(MILDocument doc) {
    method reset (line 65) | public void reset() {

FILE: code/Model/multir/src/edu/uw/cs/multir/main/AggregatePrecisionRecallCurve.java
  class AggregatePrecisionRecallCurve (line 20) | public class AggregatePrecisionRecallCurve {
    method run (line 22) | public static void run(String dir)
    method eval (line 37) | public static void eval(Dataset test, Parameters params,
    class Prediction (line 98) | static class Prediction {
      method Prediction (line 105) | Prediction(int rel, boolean trueRel, boolean predRel, double score,
    class PrecisionRecallTester (line 117) | static class PrecisionRecallTester {
      method handle (line 120) | public void handle(String[] tokens, boolean[] predictedLabels,
      method handle (line 136) | public void handle(int rel, boolean p, boolean t, double score) {
      method reset (line 146) | public void reset() { numCorrect = numPredictions = numRelations = 0; }
      method precision (line 147) | public double precision() {
      method recall (line 151) | public double recall() {

FILE: code/Model/multir/src/edu/uw/cs/multir/main/Main.java
  class Main (line 5) | public class Main {
    method main (line 7) | public static void main(String[] args) throws IOException {
    method printUsage (line 100) | private static void printUsage() {
    method printUsagePreprocess (line 107) | private static void printUsagePreprocess() {
    method printUsageTrain (line 112) | private static void printUsageTrain() {
    method printUsageTest (line 117) | private static void printUsageTest() {
    method printUsageResults (line 122) | private static void printUsageResults() {
    method printUsageAggPR (line 127) | private static void printUsageAggPR() {
    method printUsageSenPR (line 132) | private static void printUsageSenPR() {
    method printUsageSenRel (line 137) | private static void printUsageSenRel() {

FILE: code/Model/multir/src/edu/uw/cs/multir/main/Preprocess.java
  class Preprocess (line 10) | public class Preprocess {
    method preprocess (line 12) | public static void preprocess(String trainFile, String testFile, Strin...

FILE: code/Model/multir/src/edu/uw/cs/multir/main/ResultWriter.java
  class ResultWriter (line 19) | public class ResultWriter {
    method write (line 24) | public static void write(String dir) throws IOException {
    method eval (line 40) | public static void eval(String mappingFile, Dataset test, Parameters p...

FILE: code/Model/multir/src/edu/uw/cs/multir/main/SententialPrecisionRecallByRelation.java
  class SententialPrecisionRecallByRelation (line 14) | public class SententialPrecisionRecallByRelation {
    method run (line 19) | public static void run(String labelsFile, String resultsFile) throws I...

FILE: code/Model/multir/src/edu/uw/cs/multir/main/SententialPrecisionRecallCurve.java
  class SententialPrecisionRecallCurve (line 14) | public class SententialPrecisionRecallCurve {
    method run (line 21) | public static void run(String labelsFile, String resultsFile) throws I...
    class Example (line 140) | static class Example {
    class Label (line 149) | static class Label {

FILE: code/Model/multir/src/edu/uw/cs/multir/main/Test.java
  class Test (line 14) | public class Test {
    method test (line 16) | public static void test(String dir) throws IOException {

FILE: code/Model/multir/src/edu/uw/cs/multir/main/Train.java
  class Train (line 13) | public class Train {
    method train (line 15) | public static void train(String dir) throws IOException {

FILE: code/Model/multir/src/edu/uw/cs/multir/preprocess/ConvertProtobufToMILDocument.java
  class ConvertProtobufToMILDocument (line 19) | public class ConvertProtobufToMILDocument {
    method main (line 21) | public static void main(String[] args) throws IOException {
    method convert (line 32) | public static void convert(String input, String output, String mapping...

FILE: code/Model/multir/src/edu/uw/cs/multir/preprocess/Mappings.java
  class Mappings (line 17) | public class Mappings {
    method getRelationID (line 22) | public int getRelationID(String relation, boolean addNew) {
    method getRel2RelID (line 32) | public Map<String,Integer> getRel2RelID() {
    method getFeatureID (line 36) | public int getFeatureID(String feature, boolean addNew) {
    method numRelations (line 46) | public int numRelations() {
    method numFeatures (line 50) | public int numFeatures() {
    method write (line 54) | public void write(String file) throws IOException {
    method read (line 62) | public void read(String file) throws IOException {
    method writeMap (line 70) | private void writeMap(Map<String,Integer> m, BufferedWriter w)
    method readMap (line 82) | private void readMap(Map<String,Integer> m, BufferedReader r)

FILE: code/Model/multir/src/edu/uw/cs/multir/util/DenseVector.java
  class DenseVector (line 9) | public class DenseVector {
    method DenseVector (line 13) | public DenseVector(int length) {
    method dotProduct (line 17) | public double dotProduct(SparseBinaryVector v) {
    method reset (line 21) | public void reset() {
    method dotProduct (line 25) | public static double dotProduct(DenseVector v1, SparseBinaryVector v2) {
    method copy (line 33) | public DenseVector copy() {
    method scale (line 39) | public void scale(float factor) {
    method addSparse (line 44) | public void addSparse(SparseBinaryVector v, double factor) {
    method sum (line 49) | public static DenseVector sum(DenseVector v1, DenseVector v2, double f...
    method scale (line 56) | public static DenseVector scale(DenseVector v, float factor) {
    method serialize (line 62) | public void serialize(OutputStream os)
    method deserialize (line 71) | public void deserialize(InputStream is)
    method sum (line 81) | public DenseVector sum(DenseVector v, float factor) {

FILE: code/Model/multir/src/edu/uw/cs/multir/util/SparseBinaryVector.java
  class SparseBinaryVector (line 9) | public class SparseBinaryVector {
    method SparseBinaryVector (line 13) | public SparseBinaryVector() {
    method SparseBinaryVector (line 18) | public SparseBinaryVector(int[] ids, int num) {
    method reset (line 23) | public void reset() {
    method copy (line 27) | public SparseBinaryVector copy() {
    method dotProduct (line 33) | public double dotProduct(SparseBinaryVector v) {
    method serialize (line 50) | public void serialize(OutputStream os)
    method deserialize (line 59) | public void deserialize(InputStream is)
    method toString (line 69) | public String toString() {

FILE: code/Model/pte/line.cpp
  function TrainModel (line 53) | void TrainModel() {
  function ArgPos (line 86) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 98) | int main(int argc, char **argv) {

FILE: code/Model/pte/linelib.cpp
  function real (line 32) | real *line_node::get_vector()
  type struct_node (line 37) | struct struct_node
  type struct_node (line 70) | struct struct_node
  type struct_node (line 70) | struct struct_node

FILE: code/Model/pte/linelib.h
  type real (line 14) | typedef float real;
  type Eigen (line 16) | typedef Eigen::Matrix< real, Eigen::Dynamic,
  type Eigen (line 20) | typedef Eigen::Matrix< real, 1, Eigen::Dynamic,
  type struct_node (line 24) | struct struct_node {
  function class (line 31) | class line_node
  function class (line 54) | class line_link

FILE: code/Model/pte/pte-hete.cpp
  function TrainModel (line 57) | void TrainModel() {
  function ArgPos (line 105) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 117) | int main(int argc, char **argv) {

FILE: code/Model/pte/ransampl.c
  function ransampl_ws (line 23) | ransampl_ws* ransampl_alloc( integer n )
  function ransampl_set (line 39) | void ransampl_set( ransampl_ws *ws, double* p )
  function integer (line 107) | integer ransampl_draw( ransampl_ws *ws, double ran1, double ran2 )
  function ransampl_free (line 114) | void ransampl_free( ransampl_ws *ws )

FILE: code/Model/pte/ransampl.h
  type ransampl_ws (line 30) | typedef struct {

FILE: code/Model/retype/hplelib.cpp
  function real (line 3) | real sigmoid(real x)
  function real (line 40) | real *line_node::get_vector()
  type struct_node (line 45) | struct struct_node
  type struct_node (line 78) | struct struct_node
  type struct_node (line 78) | struct struct_node

FILE: code/Model/retype/hplelib.h
  type real (line 16) | typedef float real;
  type Eigen (line 18) | typedef Eigen::Matrix< real, Eigen::Dynamic,
  type Eigen (line 22) | typedef Eigen::Matrix< real, 1, Eigen::Dynamic,
  type struct_node (line 26) | struct struct_node {
  type struct_neighbor (line 31) | struct struct_neighbor {
  type triple (line 36) | struct triple
  function class (line 53) | class line_node
  function class (line 81) | class line_link
  function class (line 111) | class line_triple

FILE: code/Model/retype/ransampl.c
  function ransampl_ws (line 23) | ransampl_ws* ransampl_alloc( integer n )
  function ransampl_set (line 39) | void ransampl_set( ransampl_ws *ws, double* p )
  function integer (line 107) | integer ransampl_draw( ransampl_ws *ws, double ran1, double ran2 )
  function ransampl_free (line 114) | void ransampl_free( ransampl_ws *ws )

FILE: code/Model/retype/ransampl.h
  type ransampl_ws (line 30) | typedef struct {

FILE: code/Model/retype/retype-rm.cpp
  function func_rand_num (line 27) | double func_rand_num()
  function TrainModel (line 119) | void TrainModel() {
  function ArgPos (line 205) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 217) | int main(int argc, char **argv) {

FILE: code/Model/retype/retype.cpp
  function func_rand_num (line 34) | double func_rand_num()
  function TrainModel (line 175) | void TrainModel() {
  function ArgPos (line 307) | int ArgPos(char *str, int argc, char **argv) {
  function main (line 319) | int main(int argc, char **argv) {

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm.java
  class Cache (line 16) | class Cache {
    class head_t (line 19) | private final class head_t
    method Cache (line 28) | Cache(int l_, long size_)
    method lru_delete (line 41) | private void lru_delete(head_t h)
    method lru_insert (line 48) | private void lru_insert(head_t h)
    method get_data (line 61) | int get_data(int index, float[][] data, int len)
    method swap_index (line 92) | void swap_index(int i, int j)
  class QMatrix (line 130) | abstract class QMatrix {
    method get_Q (line 131) | abstract float[] get_Q(int column, int len);
    method get_QD (line 132) | abstract double[] get_QD();
    method swap_index (line 133) | abstract void swap_index(int i, int j);
  class Kernel (line 136) | abstract class Kernel extends QMatrix {
    method get_Q (line 146) | abstract float[] get_Q(int column, int len);
    method get_QD (line 147) | abstract double[] get_QD();
    method swap_index (line 149) | void swap_index(int i, int j)
    method powi (line 155) | private static double powi(double base, int times)
    method kernel_function (line 167) | double kernel_function(int i, int j)
    method Kernel (line 186) | Kernel(int l, svm_node[][] x_, svm_parameter param)
    method dot (line 204) | static double dot(svm_node[] x, svm_node[] y)
    method k_function (line 226) | static double k_function(svm_node[] x, svm_node[] y,
  class Solver (line 303) | class Solver {
    method get_C (line 324) | double get_C(int i)
    method update_alpha_status (line 328) | void update_alpha_status(int i)
    method is_upper_bound (line 336) | boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
    method is_lower_bound (line 337) | boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
    method is_free (line 338) | boolean is_free(int i) {  return alpha_status[i] == FREE; }
    class SolutionInfo (line 342) | static class SolutionInfo {
    method swap_index (line 350) | void swap_index(int i, int j)
    method reconstruct_gradient (line 362) | void reconstruct_gradient()
    method Solve (line 404) | void Solve(int l, QMatrix Q, double[] p_, byte[] y_,
    method select_working_set (line 673) | int select_working_set(int[] working_set)
    method be_shrunk (line 772) | private boolean be_shrunk(int i, double Gmax1, double Gmax2)
    method do_shrinking (line 792) | void do_shrinking()
    method calculate_rho (line 852) | double calculate_rho()
  class Solver_NU (line 897) | final class Solver_NU extends Solver
    method Solve (line 901) | void Solve(int l, QMatrix Q, double[] p, byte[] y,
    method select_working_set (line 910) | int select_working_set(int[] working_set)
    method be_shrunk (line 1022) | private boolean be_shrunk(int i, double Gmax1, double Gmax2, double Gm...
    method do_shrinking (line 1042) | void do_shrinking()
    method calculate_rho (line 1094) | double calculate_rho()
  class SVC_Q (line 1148) | class SVC_Q extends Kernel
    method SVC_Q (line 1154) | SVC_Q(svm_problem prob, svm_parameter param, byte[] y_)
    method get_Q (line 1164) | float[] get_Q(int i, int len)
    method get_QD (line 1176) | double[] get_QD()
    method swap_index (line 1181) | void swap_index(int i, int j)
  class ONE_CLASS_Q (line 1190) | class ONE_CLASS_Q extends Kernel
    method ONE_CLASS_Q (line 1195) | ONE_CLASS_Q(svm_problem prob, svm_parameter param)
    method get_Q (line 1204) | float[] get_Q(int i, int len)
    method get_QD (line 1216) | double[] get_QD()
    method swap_index (line 1221) | void swap_index(int i, int j)
  class SVR_Q (line 1229) | class SVR_Q extends Kernel
    method SVR_Q (line 1239) | SVR_Q(svm_problem prob, svm_parameter param)
    method swap_index (line 1260) | void swap_index(int i, int j)
    method get_Q (line 1267) | float[] get_Q(int i, int len)
    method get_QD (line 1286) | double[] get_QD()
  class svm (line 1292) | public class svm {
    method print (line 1301) | public void print(String s)
    method info (line 1310) | static void info(String s)
    method solve_c_svc (line 1315) | private static void solve_c_svc(svm_problem prob, svm_parameter param,
    method solve_nu_svc (line 1347) | private static void solve_nu_svc(svm_problem prob, svm_parameter param,
    method solve_one_class (line 1398) | private static void solve_one_class(svm_problem prob, svm_parameter pa...
    method solve_epsilon_svr (line 1426) | private static void solve_epsilon_svr(svm_problem prob, svm_parameter ...
    method solve_nu_svr (line 1459) | private static void solve_nu_svr(svm_problem prob, svm_parameter param,
    class decision_function (line 1495) | static class decision_function
    method svm_train_one (line 1501) | static decision_function svm_train_one(
    method sigmoid_train (line 1559) | private static void sigmoid_train(int l, double[] dec_values, double[]...
    method sigmoid_predict (line 1672) | private static double sigmoid_predict(double decision_value, double A,...
    method multiclass_probability (line 1682) | private static void multiclass_probability(int k, double[][] r, double...
    method svm_binary_svc_probability (line 1742) | private static void svm_binary_svc_probability(svm_problem prob, svm_p...
    method svm_svr_probability (line 1823) | private static double svm_svr_probability(svm_problem prob, svm_parame...
    method svm_group_classes (line 1854) | private static void svm_group_classes(svm_problem prob, int[] nr_class...
    method svm_train (line 1935) | public static svm_model svm_train(svm_problem prob, svm_parameter param)
    method svm_cross_validation (line 2172) | public static void svm_cross_validation(svm_problem prob, svm_paramete...
    method svm_get_svm_type (line 2282) | public static int svm_get_svm_type(svm_model model)
    method svm_get_nr_class (line 2287) | public static int svm_get_nr_class(svm_model model)
    method svm_get_labels (line 2292) | public static void svm_get_labels(svm_model model, int[] label)
    method svm_get_sv_indices (line 2299) | public static void svm_get_sv_indices(svm_model model, int[] indices)
    method svm_get_nr_sv (line 2306) | public static int svm_get_nr_sv(svm_model model)
    method svm_get_svr_probability (line 2311) | public static double svm_get_svr_probability(svm_model model)
    method svm_predict_values (line 2323) | public static double svm_predict_values(svm_model model, svm_node[] x,...
    method svm_predict (line 2396) | public static double svm_predict(svm_model model, svm_node[] x)
    method svm_predict_probability (line 2410) | public static double svm_predict_probability(svm_model model, svm_node...
    method svm_save_model (line 2453) | public static void svm_save_model(String model_file_name, svm_model mo...
    method atof (line 2538) | private static double atof(String s)
    method atoi (line 2543) | private static int atoi(String s)
    method read_model_header (line 2548) | private static boolean read_model_header(BufferedReader fp, svm_model ...
    method svm_load_model (line 2661) | public static svm_model svm_load_model(String model_file_name) throws ...
    method svm_load_model (line 2666) | public static svm_model svm_load_model(BufferedReader fp) throws IOExc...
    method svm_check_parameter (line 2711) | public static String svm_check_parameter(svm_problem prob, svm_paramet...
    method svm_check_probability_model (line 2831) | public static int svm_check_probability_model(svm_model model)
    method svm_set_print_string_function (line 2842) | public static void svm_set_print_string_function(svm_print_interface p...

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm_model.java
  class svm_model (line 5) | public class svm_model implements java.io.Serializable

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm_node.java
  class svm_node (line 2) | public class svm_node implements java.io.Serializable

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm_parameter.java
  class svm_parameter (line 2) | public class svm_parameter implements Cloneable,java.io.Serializable
    method clone (line 36) | public Object clone()

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm_print_interface.java
  type svm_print_interface (line 2) | public interface svm_print_interface
    method print (line 4) | public void print(String s);

FILE: code/Model/seq-kernel/libsvm/java/libsvm/svm_problem.java
  class svm_problem (line 2) | public class svm_problem implements java.io.Serializable

FILE: code/Model/seq-kernel/libsvm/java/svm_predict.java
  class svm_predict (line 5) | class svm_predict {
    method print (line 8) | public void print(String s) {}
    method print (line 13) | public void print(String s)
    method info (line 21) | static void info(String s)
    method atof (line 26) | private static double atof(String s)
    method atoi (line 31) | private static int atoi(String s)
    method predict (line 36) | private static void predict(BufferedReader input, DataOutputStream out...
    method exit_with_help (line 121) | private static void exit_with_help()
    method main (line 130) | public static void main(String argv[]) throws IOException

FILE: code/Model/seq-kernel/libsvm/java/svm_scale.java
  class svm_scale (line 6) | class svm_scale
    method exit_with_help (line 22) | private static void exit_with_help()
    method rewind (line 36) | private BufferedReader rewind(BufferedReader fp, String filename) thro...
    method output_target (line 42) | private void output_target(double value)
    method output (line 58) | private void output(int index, double value)
    method readline (line 80) | private String readline(BufferedReader fp) throws IOException
    method run (line 86) | private void run(String []argv) throws IOException
    method main (line 345) | public static void main(String argv[]) throws IOException

FILE: code/Model/seq-kernel/libsvm/java/svm_toy.java
  class svm_toy (line 8) | public class svm_toy extends Applet {
    class point (line 32) | class point {
      method point (line 33) | point(double x, double y, byte value)
    method init (line 46) | public void init()
    method draw_point (line 114) | void draw_point(point p)
    method clear_all (line 125) | void clear_all()
    method draw_all_points (line 136) | void draw_all_points()
    method button_change_clicked (line 143) | void button_change_clicked()
    method atof (line 149) | private static double atof(String s)
    method atoi (line 154) | private static int atoi(String s)
    method button_run_clicked (line 159) | void button_run_clicked(String args)
    method button_clear_clicked (line 366) | void button_clear_clicked()
    method button_save_clicked (line 371) | void button_save_clicked(String args)
    method button_load_clicked (line 409) | void button_load_clicked()
    method processMouseEvent (line 445) | protected void processMouseEvent(MouseEvent e)
    method paint (line 458) | public void paint(Graphics g)
    method getPreferredSize (line 470) | public Dimension getPreferredSize() { return new Dimension(XLEN,YLEN+5...
    method setSize (line 472) | public void setSize(Dimension d) { setSize(d.width,d.height); }
    method setSize (line 473) | public void setSize(int w,int h) {
    method main (line 480) | public static void main(String[] argv)
  class AppletFrame (line 486) | class AppletFrame extends Frame {
    method AppletFrame (line 487) | AppletFrame(String title, Applet applet, int width, int height)

FILE: code/Model/seq-kernel/libsvm/java/svm_train.java
  class svm_train (line 5) | class svm_train {
    method print (line 17) | public void print(String s) {}
    method exit_with_help (line 20) | private static void exit_with_help()
    method do_cross_validation (line 54) | private void do_cross_validation()
    method run (line 92) | private void run(String argv[]) throws IOException
    method main (line 115) | public static void main(String argv[]) throws IOException
    method atof (line 121) | private static double atof(String s)
    method atoi (line 132) | private static int atoi(String s)
    method parse_command_line (line 137) | private void parse_command_line(String argv[])
    method read_problem (line 262) | private void read_problem() throws IOException

FILE: code/Model/seq-kernel/libsvm/matlab/libsvmread.c
  type mwIndex (line 11) | typedef int mwIndex;
  function exit_with_help (line 21) | void exit_with_help()
  function fake_answer (line 28) | static void fake_answer(int nlhs, mxArray *plhs[])
  function read_problem (line 57) | void read_problem(const char *filename, int nlhs, mxArray *plhs[])
  function mexFunction (line 188) | void mexFunction( int nlhs, mxArray *plhs[],

FILE: code/Model/seq-kernel/libsvm/matlab/libsvmwrite.c
  type mwIndex (line 8) | typedef int mwIndex;
  function exit_with_help (line 12) | void exit_with_help()
  function fake_answer (line 19) | static void fake_answer(int nlhs, mxArray *plhs[])
  function libsvmwrite (line 26) | void libsvmwrite(const char *filename, const mxArray *label_vec, const m...
  function mexFunction (line 84) | void mexFunction( int nlhs, mxArray *plhs[],

FILE: code/Model/seq-kernel/libsvm/matlab/svm_model_matlab.c
  type mwIndex (line 9) | typedef int mwIndex;
  type svm_model (line 31) | struct svm_model
  type svm_model (line 211) | struct svm_model
  type svm_node (line 216) | struct svm_node
  type svm_model (line 217) | struct svm_model
  type svm_node (line 351) | struct svm_node
  type svm_node (line 351) | struct svm_node
  type svm_node (line 352) | struct svm_node
  type svm_node (line 352) | struct svm_node

FILE: code/Model/seq-kernel/libsvm/matlab/svm_model_matlab.h
  type svm_model (line 1) | struct svm_model
  type svm_model (line 2) | struct svm_model

FILE: code/Model/seq-kernel/libsvm/matlab/svmpredict.c
  type mwIndex (line 11) | typedef int mwIndex;
  function print_null (line 17) | int print_null(const char *s,...) {}
  function read_sparse_instance (line 20) | void read_sparse_instance(const mxArray *prhs, int index, struct svm_nod...
  function fake_answer (line 42) | static void fake_answer(int nlhs, mxArray *plhs[])
  function predict (line 49) | void predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct sv...
  function exit_with_help (line 255) | void exit_with_help()
  function mexFunction (line 272) | void mexFunction( int nlhs, mxArray *plhs[],

FILE: code/Model/seq-kernel/libsvm/matlab/svmtrain.c
  type mwIndex (line 12) | typedef int mwIndex;
  function print_null (line 19) | void print_null(const char *s) {}
  function print_string_matlab (line 20) | void print_string_matlab(const char *s) {mexPrintf(s);}
  function exit_with_help (line 22) | void exit_with_help()
  type svm_parameter (line 56) | struct svm_parameter
  type svm_problem (line 57) | struct svm_problem
  type svm_model (line 58) | struct svm_model
  type svm_node (line 59) | struct svm_node
  function do_cross_validation (line 64) | double do_cross_validation()
  function parse_command_line (line 108) | int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file...
  function read_problem_dense (line 222) | int read_problem_dense(const mxArray *label_vec, const mxArray *instance...
  function read_problem_sparse (line 302) | int read_problem_sparse(const mxArray *label_vec, const mxArray *instanc...
  function fake_answer (line 375) | static void fake_answer(int nlhs, mxArray *plhs[])
  function mexFunction (line 384) | void mexFunction( int nlhs, mxArray *plhs[],

FILE: code/Model/seq-kernel/libsvm/python/svm.py
  function print_null (line 45) | def print_null(s):
  function genFields (line 48) | def genFields(names, types):
  function fillprototype (line 51) | def fillprototype(f, restype, argtypes):
  class svm_node (line 55) | class svm_node(Structure):
    method __str__ (line 60) | def __str__(self):
  function gen_svm_nodearray (line 63) | def gen_svm_nodearray(xi, feature_max=None, isKernel=None):
  class svm_problem (line 90) | class svm_problem(Structure):
    method __init__ (line 95) | def __init__(self, y, x, isKernel=None):
  class svm_parameter (line 114) | class svm_parameter(Structure):
    method __init__ (line 123) | def __init__(self, options = None):
    method __str__ (line 128) | def __str__(self):
    method set_to_default_values (line 138) | def set_to_default_values(self):
    method parse_options (line 158) | def parse_options(self, options):
  class svm_model (line 232) | class svm_model(Structure):
    method __init__ (line 241) | def __init__(self):
    method __del__ (line 244) | def __del__(self):
    method get_svm_type (line 249) | def get_svm_type(self):
    method get_nr_class (line 252) | def get_nr_class(self):
    method get_svr_probability (line 255) | def get_svr_probability(self):
    method get_labels (line 258) | def get_labels(self):
    method get_sv_indices (line 264) | def get_sv_indices(self):
    method get_nr_sv (line 270) | def get_nr_sv(self):
    method is_probability_model (line 273) | def is_probability_model(self):
    method get_sv_coef (line 276) | def get_sv_coef(self):
    method get_SV (line 280) | def get_SV(self):
  function toPyModel (line 295) | def toPyModel(model_ptr):

FILE: code/Model/seq-kernel/libsvm/python/svmutil.py
  function svm_read_problem (line 14) | def svm_read_problem(data_file_name):
  function svm_load_model (line 36) | def svm_load_model(model_file_name):
  function svm_save_model (line 49) | def svm_save_model(model_file_name, model):
  function evaluations (line 57) | def evaluations(ty, pv):
  function svm_train (line 86) | def svm_train(arg1, arg2=None, arg3=None):
  function svm_predict (line 173) | def svm_predict(y, x, m, options=""):

FILE: code/Model/seq-kernel/libsvm/svm-predict.c
  function print_null (line 8) | int print_null(const char *s,...) {return 0;}
  type svm_node (line 12) | struct svm_node
  type svm_model (line 15) | struct svm_model
  function exit_input_error (line 39) | void exit_input_error(int line_num)
  function predict (line 45) | void predict(FILE *input, FILE *output)
  function exit_with_help (line 159) | void exit_with_help()
  function main (line 170) | int main(int argc, char **argv)

FILE: code/Model/seq-kernel/libsvm/svm-scale.c
  function exit_with_help (line 7) | void exit_with_help()
  function main (line 42) | int main(int argc,char **argv)
  function output_target (line 350) | void output_target(double value)
  function output (line 364) | void output(int index, double value)
  function clean_up (line 386) | int clean_up(FILE *fp_restore, FILE *fp, const char* msg)

FILE: code/Model/seq-kernel/libsvm/svm-toy/gtk/callbacks.cpp
  function svm_toy_initialize (line 42) | void svm_toy_initialize()
  function redraw_area (line 61) | void redraw_area(GtkWidget* widget, int x, int y, int w, int h)
  function draw_point (line 69) | void draw_point(const point& p)
  function draw_all_points (line 76) | void draw_all_points()
  function clear_all (line 82) | void clear_all()
  function on_button_change_clicked (line 90) | void
  function on_button_run_clicked (line 98) | void
  function on_button_clear_clicked (line 302) | void
  function on_window1_destroy (line 309) | void
  function gboolean (line 316) | gboolean
  function gboolean (line 327) | gboolean
  function show_fileselection (line 341) | void show_fileselection()
  function on_button_save_clicked (line 357) | void
  function on_button_load_clicked (line 366) | void
  function on_filesel_ok_clicked (line 374) | void
  function on_fileselection_destroy (line 437) | void
  function on_filesel_cancel_clicked (line 443) | void

FILE: code/Model/seq-kernel/libsvm/svm-toy/gtk/interface.c
  function GtkWidget (line 16) | GtkWidget*
  function GtkWidget (line 130) | GtkWidget*

FILE: code/Model/seq-kernel/libsvm/svm-toy/gtk/main.c
  function main (line 10) | int main (int argc, char *argv[])

FILE: code/Model/seq-kernel/libsvm/svm-toy/qt/svm-toy.cpp
  class SvmToyWindow (line 25) | class SvmToyWindow : public QWidget
    type point (line 49) | struct point {
    method QPixmap (line 55) | const QPixmap& choose_icon(int v)
    method clear_all (line 61) | void clear_all()
    method draw_point (line 67) | void draw_point(const point& p)
    method draw_all_points (line 73) | void draw_all_points()
    method button_change_icon_clicked (line 79) | void button_change_icon_clicked()
    method button_run_clicked (line 85) | void button_run_clicked()
    method button_clear_clicked (line 280) | void button_clear_clicked()
    method button_save_clicked (line 284) | void button_save_clicked()
    method button_load_clicked (line 313) | void button_load_clicked()
  function main (line 428) | int main( int argc, char* argv[] )

FILE: code/Model/seq-kernel/libsvm/svm-toy/windows/svm-toy.cpp
  type point (line 48) | struct point {
  function WinMain (line 58) | int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
  function getfilename (line 128) | int getfilename( HWND hWnd , char *filename, int len, int save)
  function clear_all (line 143) | void clear_all()
  function HBRUSH (line 150) | HBRUSH choose_brush(int v)
  function draw_point (line 157) | void draw_point(const point & p)
  function draw_all_points (line 168) | void draw_all_points()
  function button_run_clicked (line 174) | void button_run_clicked()
  function LRESULT (line 372) | LRESULT CALLBACK WndProc(HWND hwnd, UINT iMsg, WPARAM wParam, LPARAM lPa...

FILE: code/Model/seq-kernel/libsvm/svm-train.c
  function print_null (line 9) | void print_null(const char *s) {}
  function exit_with_help (line 11) | void exit_with_help()
  function exit_input_error (line 45) | void exit_input_error(int line_num)
  type svm_parameter (line 55) | struct svm_parameter
  type svm_problem (line 56) | struct svm_problem
  type svm_model (line 57) | struct svm_model
  type svm_node (line 58) | struct svm_node
  function main (line 83) | int main(int argc, char **argv)
  function do_cross_validation (line 122) | void do_cross_validation()
  function parse_command_line (line 161) | void parse_command_line(int argc, char **argv, char *input_file_name, ch...
  function read_problem (line 278) | void read_problem(const char *filename)

FILE: code/Model/seq-kernel/libsvm/svm.cpp
  function T (line 15) | static inline T min(T x,T y) { return (x<y)?x:y; }
  function T (line 18) | static inline T max(T x,T y) { return (x>y)?x:y; }
  function swap (line 20) | static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
  function clone (line 21) | static inline void clone(T*& dst, S* src, int n)
  function powi (line 26) | static inline double powi(double base, int times)
  function print_string_stdout (line 41) | static void print_string_stdout(const char *s)
  function info (line 48) | static void info(const char *fmt,...)
  function info (line 58) | static void info(const char *fmt,...) {}
  class Cache (line 67) | class Cache
    type head_t (line 81) | struct head_t
  class QMatrix (line 194) | class QMatrix {
  class Kernel (line 202) | class Kernel: public QMatrix {
    method swap_index (line 211) | virtual void swap_index(int i, int j) const	// no so const...
    method kernel_linear (line 231) | double kernel_linear(int i, int j) const
    method kernel_poly (line 235) | double kernel_poly(int i, int j) const
    method kernel_rbf (line 239) | double kernel_rbf(int i, int j) const
    method kernel_sigmoid (line 243) | double kernel_sigmoid(int i, int j) const
    method kernel_precomputed (line 247) | double kernel_precomputed(int i, int j) const
  class Solver (line 393) | class Solver {
    method Solver (line 395) | Solver() {}
    type SolutionInfo (line 398) | struct SolutionInfo {
    method get_C (line 426) | double get_C(int i)
    method update_alpha_status (line 430) | void update_alpha_status(int i)
    method is_upper_bound (line 438) | bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
    method is_lower_bound (line 439) | bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
    method is_free (line 440) | bool is_free(int i) { return alpha_status[i] == FREE; }
  class Solver_NU (line 1009) | class Solver_NU: public Solver
    method Solver_NU (line 1012) | Solver_NU() {}
    method Solve (line 1013) | void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
  class SVC_Q (line 1266) | class SVC_Q: public Kernel
    method SVC_Q (line 1269) | SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar...
    method Qfloat (line 1279) | Qfloat *get_Q(int i, int len) const
    method swap_index (line 1296) | void swap_index(int i, int j) const
  class ONE_CLASS_Q (line 1316) | class ONE_CLASS_Q: public Kernel
    method ONE_CLASS_Q (line 1319) | ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
    method Qfloat (line 1328) | Qfloat *get_Q(int i, int len) const
    method swap_index (line 1345) | void swap_index(int i, int j) const
  class SVR_Q (line 1362) | class SVR_Q: public Kernel
    method SVR_Q (line 1365) | SVR_Q(const svm_problem& prob, const svm_parameter& param)
    method swap_index (line 1387) | void swap_index(int i, int j) const
    method Qfloat (line 1394) | Qfloat *get_Q(int i, int len) const
  function solve_c_svc (line 1440) | static void solve_c_svc(
  function solve_nu_svc (line 1475) | static void solve_nu_svc(
  function solve_one_class (line 1530) | static void solve_one_class(
  function solve_epsilon_svr (line 1562) | static void solve_epsilon_svr(
  function solve_nu_svr (line 1600) | static void solve_nu_svr(
  type decision_function (line 1641) | struct decision_function
  function decision_function (line 1647) | static decision_function svm_train_one(
  function sigmoid_train (line 1705) | static void sigmoid_train(
  function sigmoid_predict (line 1818) | static double sigmoid_predict(double decision_value, double A, double B)
  function multiclass_probability (line 1829) | static void multiclass_probability(int k, double **r, double *p)
  function svm_binary_svc_probability (line 1893) | static void svm_binary_svc_probability(
  function svm_svr_probability (line 1980) | static double svm_svr_probability(
  function svm_group_classes (line 2014) | static void svm_group_classes(const svm_problem *prob, int *nr_class_ret...
  function svm_model (line 2092) | svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
  function svm_cross_validation (line 2339) | void svm_cross_validation(const svm_problem *prob, const svm_parameter *...
  function svm_get_svm_type (line 2460) | int svm_get_svm_type(const svm_model *model)
  function svm_get_nr_class (line 2465) | int svm_get_nr_class(const svm_model *model)
  function svm_get_labels (line 2470) | void svm_get_labels(const svm_model *model, int* label)
  function svm_get_sv_indices (line 2477) | void svm_get_sv_indices(const svm_model *model, int* indices)
  function svm_get_nr_sv (line 2484) | int svm_get_nr_sv(const svm_model *model)
  function svm_get_svr_probability (line 2489) | double svm_get_svr_probability(const svm_model *model)
  function svm_predict_values (line 2501) | double svm_predict_values(const svm_model *model, const svm_node *x, dou...
  function svm_predict (line 2577) | double svm_predict(const svm_model *model, const svm_node *x)
  function svm_predict_probability (line 2592) | double svm_predict_probability(
  function svm_save_model (line 2641) | int svm_save_model(const char *model_file_name, const svm_model *model)
  function read_model_header (line 2767) | bool read_model_header(FILE *fp, svm_model* model)
  function svm_model (line 2876) | svm_model *svm_load_model(const char *model_file_name)
  function svm_free_model_content (line 2984) | void svm_free_model_content(svm_model* model_ptr)
  function svm_free_and_destroy_model (line 3019) | void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
  function svm_destroy_param (line 3029) | void svm_destroy_param(svm_parameter* param)
  function svm_check_probability_model (line 3156) | int svm_check_probability_model(const svm_model *model)
  function svm_set_print_string_function (line 3164) | void svm_set_print_string_function(void (*print_func)(const char *))

FILE: code/Model/seq-kernel/libsvm/svm.h
  type svm_node (line 12) | struct svm_node
  type svm_problem (line 18) | struct svm_problem
  type svm_parameter (line 28) | struct svm_parameter
  type svm_model (line 52) | struct svm_model
  type svm_model (line 74) | struct svm_model
  type svm_problem (line 74) | struct svm_problem
  type svm_parameter (line 74) | struct svm_parameter
  type svm_problem (line 75) | struct svm_problem
  type svm_parameter (line 75) | struct svm_parameter
  type svm_model (line 77) | struct svm_model
  type svm_model (line 78) | struct svm_model
  type svm_model (line 80) | struct svm_model
  type svm_model (line 81) | struct svm_model
  type svm_model (line 82) | struct svm_model
  type svm_model (line 83) | struct svm_model
  type svm_model (line 84) | struct svm_model
  type svm_model (line 85) | struct svm_model
  type svm_model (line 87) | struct svm_model
  type svm_node (line 87) | struct svm_node
  type svm_model (line 88) | struct svm_model
  type svm_node (line 88) | struct svm_node
  type svm_model (line 89) | struct svm_model
  type svm_node (line 89) | struct svm_node
  type svm_model (line 91) | struct svm_model
  type svm_model (line 92) | struct svm_model
  type svm_parameter (line 93) | struct svm_parameter
  type svm_problem (line 95) | struct svm_problem
  type svm_parameter (line 95) | struct svm_parameter
  type svm_model (line 96) | struct svm_model

FILE: code/Model/seq-kernel/libsvm/tools/checkdata.py
  function err (line 18) | def err(line_no, msg):
  function my_float (line 22) | def my_float(x):
  function main (line 28) | def main():

FILE: code/Model/seq-kernel/libsvm/tools/grid.py
  class GridOption (line 17) | class GridOption:
    method __init__ (line 18) | def __init__(self, dataset_pathname, options):
    method parse_options (line 40) | def parse_options(self, options):
  function redraw (line 105) | def redraw(db,best_param,gnuplot,options,tofile=False):
  function calculate_jobs (line 159) | def calculate_jobs(options):
  class WorkerStopToken (line 241) | class WorkerStopToken:  # used to notify the worker to stop or if a work...
  class Worker (line 244) | class Worker(Thread):
    method __init__ (line 245) | def __init__(self,name,job_queue,result_queue,options):
    method run (line 252) | def run(self):
    method get_cmd (line 278) | def get_cmd(self,c,g):
  class LocalWorker (line 289) | class LocalWorker(Worker):
    method run_one (line 290) | def run_one(self,c,g):
  class SSHWorker (line 297) | class SSHWorker(Worker):
    method __init__ (line 298) | def __init__(self,name,job_queue,result_queue,host,options):
    method run_one (line 302) | def run_one(self,c,g):
  class TelnetWorker (line 310) | class TelnetWorker(Worker):
    method __init__ (line 311) | def __init__(self,name,job_queue,result_queue,host,username,password,o...
    method run (line 316) | def run(self):
    method run_one (line 331) | def run_one(self,c,g):
  function find_parameters (line 339) | def find_parameters(dataset_pathname, options=''):
  function exit_with_help (line 465) | def exit_with_help():

FILE: code/Model/seq-kernel/libsvm/tools/subset.py
  function exit_with_help (line 9) | def exit_with_help(argv):
  function process_options (line 25) | def process_options(argv):
  function random_selection (line 56) | def random_selection(dataset, subset_size):
  function stratified_selection (line 60) | def stratified_selection(dataset, subset_size):
  function main (line 89) | def main(argv=sys.argv):

FILE: code/Model/seq-kernel/ssk_core/libsvm/CustomKernel.java
  class CustomKernel (line 3) | public abstract class CustomKernel
    method kernel (line 5) | abstract public double kernel(svm_node[] x, svm_node[] y);
    method new_svm_node (line 6) | abstract public svm_node new_svm_node();

FILE: code/Model/seq-kernel/ssk_core/libsvm/svm.java
  class Cache (line 11) | class Cache {
    class head_t (line 14) | private final class head_t
    method Cache (line 23) | Cache(int l_, int size_)
    method lru_delete (line 35) | private void lru_delete(head_t h)
    method lru_insert (line 42) | private void lru_insert(head_t h)
    method get_data (line 55) | int get_data(int index, float[][] data, int len)
    method swap_index (line 86) | void swap_index(int i, int j)
  class QMatrix (line 124) | abstract class QMatrix {
    method get_Q (line 125) | abstract float[] get_Q(int column, int len);
    method swap_index (line 126) | abstract void swap_index(int i, int j);
  class Kernel (line 130) | abstract class Kernel extends QMatrix {
    method get_Q (line 140) | abstract float[] get_Q(int column, int len);
    method swap_index (line 142) | void swap_index(int i, int j)
    method tanh (line 148) | private static double tanh(double x)
    method kernel_function (line 154) | double kernel_function(int i, int j)
    method Kernel (line 173) | Kernel(int l, svm_node[][] x_, svm_parameter param)
    method dot (line 191) | static double dot(svm_node[] x, svm_node[] y)
    method k_function (line 213) | static double k_function(svm_node[] x, svm_node[] y,
  class Solver (line 292) | class Solver {
    method get_C (line 312) | double get_C(int i)
    method update_alpha_status (line 316) | void update_alpha_status(int i)
    method is_upper_bound (line 324) | boolean is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
    method is_lower_bound (line 325) | boolean is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
    method is_free (line 326) | boolean is_free(int i) {  return alpha_status[i] == FREE; }
    class SolutionInfo (line 330) | static class SolutionInfo {
    method swap_index (line 338) | void swap_index(int i, int j)
    method reconstruct_gradient (line 350) | void reconstruct_gradient()
    method Solve (line 370) | void Solve(int l, QMatrix Q, double[] b_, byte[] y_,
    method select_working_set (line 618) | int select_working_set(int[] working_set)
    method do_shrinking (line 680) | void do_shrinking()
    method calculate_rho (line 750) | double calculate_rho()
  class Solver_NU (line 795) | final class Solver_NU extends Solver
    method Solve (line 799) | void Solve(int l, QMatrix Q, double[] b, byte[] y,
    method select_working_set (line 807) | int select_working_set(int[] working_set)
    method do_shrinking (line 883) | void do_shrinking()
    method calculate_rho (line 974) | double calculate_rho()
  class SVC_Q (line 1028) | class SVC_Q extends Kernel
    method SVC_Q (line 1033) | SVC_Q(svm_problem prob, svm_parameter param, byte[] y_)
    method get_Q (line 1040) | float[] get_Q(int i, int len)
    method swap_index (line 1052) | void swap_index(int i, int j)
  class ONE_CLASS_Q (line 1060) | class ONE_CLASS_Q extends Kernel
    method ONE_CLASS_Q (line 1064) | ONE_CLASS_Q(svm_problem prob, svm_parameter param)
    method get_Q (line 1070) | float[] get_Q(int i, int len)
    method swap_index (line 1082) | void swap_index(int i, int j)
  class SVR_Q (line 1089) | class SVR_Q extends Kernel
    method SVR_Q (line 1098) | SVR_Q(svm_problem prob, svm_parameter param)
    method swap_index (line 1116) | void swap_index(int i, int j)
    method get_Q (line 1122) | float[] get_Q(int i, int len)
  class svm (line 1142) | public class svm {
    method setCustomKernel (line 1148) | public static void setCustomKernel(CustomKernel ck)
    method solve_c_svc (line 1156) | private static void solve_c_svc(svm_problem prob, svm_parameter param,
    method solve_nu_svc (line 1188) | private static void solve_nu_svc(svm_problem prob, svm_parameter param,
    method solve_one_class (line 1239) | private static void solve_one_class(svm_problem prob, svm_parameter pa...
    method solve_epsilon_svr (line 1266) | private static void solve_epsilon_svr(svm_problem prob, svm_parameter ...
    method solve_nu_svr (line 1299) | private static void solve_nu_svr(svm_problem prob, svm_parameter param,
    class decision_function (line 1335) | static class decision_function
    method svm_train_one (line 1341) | static decision_function svm_train_one(
    method sigmoid_train (line 1399) | private static void sigmoid_train(int l, double[] dec_values, double[]...
    method sigmoid_predict (line 1512) | private static double sigmoid_predict(double decision_value, double A,...
    method multiclass_probability (line 1522) | private static void multiclass_probability(int k, double[][] r, double...
    method svm_binary_svc_probability (line 1582) | private static void svm_binary_svc_probability(svm_problem prob, svm_p...
    method svm_svr_probability (line 1663) | private static double svm_svr_probability(svm_problem prob, svm_parame...
    method svm_group_classes (line 1694) | private static void svm_group_classes(svm_problem prob, int[] nr_class...
    method svm_train (line 1757) | public static svm_model svm_train(svm_problem prob, svm_parameter param)
    method svm_cross_validation (line 1991) | public static void svm_cross_validation(svm_problem prob, svm_paramete...
    method svm_get_svm_type (line 2100) | public static int svm_get_svm_type(svm_model model)
    method svm_get_nr_class (line 2105) | public static int svm_get_nr_class(svm_model model)
    method svm_get_labels (line 2110) | public static void svm_get_labels(svm_model model, int[] label)
    method svm_get_svr_probability (line 2117) | public static double svm_get_svr_probability(svm_model model)
    method svm_predict_values (line 2129) | public static void svm_predict_values(svm_model model, svm_node[] x, d...
    method svm_predict (line 2181) | public static double svm_predict(svm_model model, svm_node[] x)
    method svm_predict_probability (line 2223) | public static double svm_predict_probability(svm_model model, svm_node...
    method svm_save_model (line 2266) | public static void svm_save_model(String model_file_name, svm_model mo...
    method atof (line 2347) | public static double atof(String s)
    method atoi (line 2352) | public static int atoi(String s)
    method svm_load_model (line 2357) | public static svm_model svm_load_model(String model_file_name)
    method svm_check_parameter (line 2514) | public static String svm_check_parameter(svm_problem prob, svm_paramet...
    method svm_check_probability_model (line 2628) | public static int svm_check_probability_model(svm_model model)

FILE: code/Model/seq-kernel/ssk_core/libsvm/svm_model.java
  class svm_model (line 5) | public class svm_model implements java.io.Serializable

FILE: code/Model/seq-kernel/ssk_core/libsvm/svm_node.java
  class svm_node (line 4) | public class svm_node implements java.io.Serializable
    method read (line 9) | public void read(String line, double[][] coef, int m, int index)
    method write (line 13) | public void write(DataOutputStream fp) throws IOException

FILE: code/Model/seq-kernel/ssk_core/libsvm/svm_parameter.java
  class svm_parameter (line 2) | public class svm_parameter implements Cloneable,java.io.Serializable
    method clone (line 36) | public Object clone()

FILE: code/Model/seq-kernel/ssk_core/libsvm/svm_problem.java
  class svm_problem (line 2) | public class svm_problem implements java.io.Serializable

FILE: code/Model/seq-kernel/ssk_core/ssk/FeatureDictionary.java
  class FeatureDictionary (line 20) | public class FeatureDictionary {
    method FeatureDictionary (line 29) | public FeatureDictionary()
    method getAddFeature (line 38) | public String getAddFeature(int nType, String strFeature)

FILE: code/Model/seq-kernel/ssk_core/ssk/InstanceExample.java
  class InstanceExample (line 12) | public class InstanceExample implements java.io.Serializable {
    method InstanceExample (line 18) | public InstanceExample(String text, FeatureDictionary fd)
    method setLabel (line 48) | public void setLabel(int label)
    method getLabel (line 53) | public int getLabel()
    method toString (line 59) | public String toString()
    method write (line 74) | public void write(DataOutputStream fp) throws IOException

FILE: code/Model/seq-kernel/ssk_core/ssk/SubsequenceKernel.java
  class SubsequenceKernel (line 23) | public class SubsequenceKernel extends CustomKernel
    method SubsequenceKernel (line 41) | public SubsequenceKernel(int maxlen, double lambda,
    method SubsequenceKernel (line 54) | public SubsequenceKernel()
    method kernel (line 74) | public double kernel(InstanceExample ie1, InstanceExample ie2)
    method kernel (line 103) | public double kernel(svm_node[] x1, svm_node[] x2)
    method new_svm_node (line 112) | public svm_node new_svm_node()
    method selfKernel (line 118) | public double selfKernel(String[][] s)
    method singleKernel (line 135) | public double singleKernel(String[][] s1, String[][] s2)
    method stringKernel (line 163) | protected double[] stringKernel(String[][] s, String[][] t,
    method common (line 209) | protected int common(String[] s, String[] t)
    method main (line 221) | public static void main (String[] args)

FILE: code/Model/seq-kernel/ssk_core/ssk/intex_node.java
  class intex_node (line 14) | public class intex_node extends svm_node {
    method read (line 24) | public void read(String line, double[][] coef, int m, int index)
    method write (line 41) | public void write(DataOutputStream fp) throws IOException

FILE: code/Model/warp/warp.cpp
  function main (line 49) | int main(int argc, const char * argv[]) {
  function print_matrix (line 146) | void print_matrix(char * filename, double** array, int nrows, int ncolum...
  function load_data (line 163) | void load_data(char *filename, int** data, int* count){
  function print (line 189) | void print(char* message, double** array, int nrows, int ncolumns) {
  function count_lines (line 199) | int count_lines(char * filename) {
  function free_matrix_double (line 239) | void free_matrix_double(double** array, int nrows) {
  function compare (line 245) | int compare (const void * a, const void * b){
  function gradient (line 276) | double gradient(double ** A, double ** B, int* features, int* positive_t...
  function rank (line 360) | double rank(int k){
  function dot (line 369) | double dot(double*Ax , double* Bi, int len){

Copy disabled (too large) Download .json

Condensed preview — 292 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (61,808K chars).

[
  {
    "path": ".gitignore",
    "chars": 114,
    "preview": "**/eigen-3.2.5/\n*.pyc\n*.DS_Store\n*.o\n*.zip\nDataProcessor/stanford-corenlp-python/\nIntermediate/*\nResults/*\nData/*\n"
  },
  {
    "path": "LICENSE.txt",
    "chars": 1069,
    "preview": "MIT License\n\nCopyright (c) [year] [fullname]\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
  },
  {
    "path": "README.md",
    "chars": 9117,
    "preview": "# USC Distantly-supervised Relation Extraction System\nThis repository puts together recent models and data sets for **se"
  },
  {
    "path": "code/Classifier/CLPL.py",
    "chars": 5506,
    "preview": "__author__ = 'wenqihe'\n\n\nimport sys\nimport random\n\n\nclass CLPL:\n\n    def __init__(self, feature_size, label_size, type_h"
  },
  {
    "path": "code/Classifier/Classifier.py",
    "chars": 7021,
    "preview": "__author__ = 'xiang'\nimport sys\nreload(sys)\nsys.setdefaultencoding('utf8')\nimport time\nimport json\nfrom DataIO import *\n"
  },
  {
    "path": "code/Classifier/Classifier_em.py",
    "chars": 3752,
    "preview": "__author__ = 'xiang'\nimport sys\nreload(sys)\nsys.setdefaultencoding('utf8')\n\nimport json\nfrom DataIO import *\nfrom Percep"
  },
  {
    "path": "code/Classifier/DataIO.py",
    "chars": 3283,
    "preview": "__author__ = 'wenqihe'\nfrom collections import defaultdict\n\ndef load_as_list(filename):\n    \"\"\"\n    Load data as a list "
  },
  {
    "path": "code/Classifier/HierarchySVM.py",
    "chars": 3083,
    "preview": "__author__ = 'wenqihe'\n\nfrom MulticlassSVM import MulticlassSVM\n\n\nclass HierarchySVM:\n\n    def __init__(self, feature_si"
  },
  {
    "path": "code/Classifier/Logistic.py",
    "chars": 1526,
    "preview": "__author__ = 'xiang'\n\nimport sys\nfrom liblinearutil import *\n\nclass Logistic:\n\tdef __init__(self, feature_size, label_si"
  },
  {
    "path": "code/Classifier/MulticlassSVM.py",
    "chars": 3430,
    "preview": "__author__ = 'wenqihe'\n\nfrom SVM import SVM\n\n\nclass MulticlassSVM:\n\n    def __init__(self, feature_size, label_size, lam"
  },
  {
    "path": "code/Classifier/PLSVM.py",
    "chars": 4745,
    "preview": "from __future__ import division\n__author__ = 'wenqihe'\n\nimport sys\nimport random\nimport math\n\n\nclass PLSVM:\n\n    def __i"
  },
  {
    "path": "code/Classifier/Perceptron.py",
    "chars": 3132,
    "preview": "__author__ = 'wenqihe'\nimport sys\n\n\n\nclass MultilabelPerceptron:\n\n    def __init__(self, feature_size, label_size, weigh"
  },
  {
    "path": "code/Classifier/SVM.py",
    "chars": 2328,
    "preview": "from __future__ import division\n__author__ = 'wenqihe'\n\nimport random\n\nMIN_SCALING_FACTOR = 0.0000001\n\n\nclass SVM:\n    \""
  },
  {
    "path": "code/Classifier/TypeHierarchy.py",
    "chars": 2438,
    "preview": "__author__ = 'xiang'\n\nfrom collections import defaultdict\n\nclass TypeSet:\n    def __init__(self, file_name, number_of_ty"
  },
  {
    "path": "code/Classifier/liblinear.py",
    "chars": 10660,
    "preview": "#!/usr/bin/env python\n\nfrom ctypes import *\nfrom ctypes.util import find_library\nfrom os import path\nimport sys\n\n__all__"
  },
  {
    "path": "code/Classifier/liblinearutil.py",
    "chars": 8711,
    "preview": "#!/usr/bin/env python\n\nimport os, sys\nsys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path \nfrom liblinear"
  },
  {
    "path": "code/DataProcessor/Feature/__init__.py",
    "chars": 685,
    "preview": "__author__ = 'wenqihe'\n\nfrom token_feature import HeadFeature, EntityMentionTokenFeature, BetweenEntityMentionTokenFeatu"
  },
  {
    "path": "code/DataProcessor/Feature/abstract_feature.py",
    "chars": 172,
    "preview": "__author__ = 'wenqihe'\n\n\nclass AbstractFeature(object):\n    def apply(self, sentence, mention, features):\n        raise "
  },
  {
    "path": "code/DataProcessor/Feature/brown_feature.py",
    "chars": 873,
    "preview": "__author__ = 'wenqihe'\n\nfrom abstract_feature import AbstractFeature\nfrom token_feature import get_lemma\n\n\nclass BrownFe"
  },
  {
    "path": "code/DataProcessor/Feature/dependency_feature.py",
    "chars": 2158,
    "preview": "__author__ = 'wenqihe'\n\nfrom abstract_feature import AbstractFeature\nfrom token_feature import HeadFeature, get_lemma\n\n\n"
  },
  {
    "path": "code/DataProcessor/Feature/em_brown_feature.py",
    "chars": 883,
    "preview": "__author__ = 'wenqihe'\n\nfrom abstract_feature import AbstractFeature\nfrom em_token_feature import get_lemma\n\n\nclass EMBr"
  },
  {
    "path": "code/DataProcessor/Feature/em_dependency_feature.py",
    "chars": 2165,
    "preview": "__author__ = 'wenqihe'\n\nfrom abstract_feature import AbstractFeature\nfrom em_token_feature import EMHeadFeature, get_lem"
  },
  {
    "path": "code/DataProcessor/Feature/em_other_feature.py",
    "chars": 1514,
    "preview": "__author__ = 'wenqihe'\n\nimport re\nfrom abstract_feature import AbstractFeature\nfrom em_token_feature import EMHeadFeatur"
  },
  {
    "path": "code/DataProcessor/Feature/em_token_feature.py",
    "chars": 3402,
    "preview": "__author__ = 'wenqihe'\n\nimport re\nfrom nltk.stem.wordnet import WordNetLemmatizer\nfrom abstract_feature import AbstractF"
  },
  {
    "path": "code/DataProcessor/Feature/other_feature.py",
    "chars": 2133,
    "preview": "__author__ = 'wenqihe'\n\nimport re\nfrom abstract_feature import AbstractFeature\nfrom token_feature import HeadFeature\n\ncl"
  },
  {
    "path": "code/DataProcessor/Feature/token_feature.py",
    "chars": 5617,
    "preview": "__author__ = 'wenqihe'\n\nimport re\nfrom nltk.stem.wordnet import WordNetLemmatizer\nfrom abstract_feature import AbstractF"
  },
  {
    "path": "code/DataProcessor/__init__.py",
    "chars": 24,
    "preview": "__author__ = 'wenqihe'\n\n"
  },
  {
    "path": "code/DataProcessor/feature_generation.py",
    "chars": 4383,
    "preview": "__author__ = 'ZeqiuWu'\nimport sys\nimport os\nimport math\nfrom multiprocessing import Process, Lock\nfrom nlp_parse import "
  },
  {
    "path": "code/DataProcessor/liblinear_processor.py",
    "chars": 2310,
    "preview": "__author__ = 'xiang'\nimport sys\nreload(sys)\nsys.setdefaultencoding('utf8')\n\ndef load_as_list(filename):\n    \"\"\"\n    Load"
  },
  {
    "path": "code/DataProcessor/mention.py",
    "chars": 3015,
    "preview": "__author__ = 'ZeqiuWu'\n\n\nclass RelationMention(object):\n    \"\"\"\n    Wrap a relation mention. Each entity mention text of"
  },
  {
    "path": "code/DataProcessor/mention_reader.py",
    "chars": 2716,
    "preview": "__author__ = 'wenqihe'\n\nimport json\nfrom mention import RelationMention, EntityMention, Sentence\n\n\nclass MentionReader:\n"
  },
  {
    "path": "code/DataProcessor/ner_feature.py",
    "chars": 10604,
    "preview": "__author__ = 'wenqihe'\n\nfrom Feature import *\nimport sys\nfrom mention_reader import MentionReader\nreload(sys)\nsys.setdef"
  },
  {
    "path": "code/DataProcessor/nlp_parse.py",
    "chars": 8299,
    "preview": "__author__ = 'ZeqiuWu'\n\nimport ujson as json\nfrom stanza.nlp.corenlp import CoreNLPClient\nfrom tqdm import tqdm\nimport s"
  },
  {
    "path": "code/DataProcessor/pruning_heuristics.py",
    "chars": 5978,
    "preview": "__author__ = 'wenqihe'\n\nimport os\nimport operator\nimport sys\nfrom collections import defaultdict\nreload(sys)\nsys.setdefa"
  },
  {
    "path": "code/DataProcessor/statistic.py",
    "chars": 1608,
    "preview": "__author__ = 'wenqihe'\nimport json\nimport sys\nfrom collections import defaultdict\n\nreload(sys)\nsys.setdefaultencoding('u"
  },
  {
    "path": "code/Evaluation/convertPredictionToJson.py",
    "chars": 1984,
    "preview": "import sys\nimport json\n\ndata = sys.argv[1]\npredictionFile = 'data/results/'+data+'/rm/prediction_emb_retype_cosine.txt'\n"
  },
  {
    "path": "code/Evaluation/emb_prediction.py",
    "chars": 8560,
    "preview": "__author__ = 'xiang'\n\nimport sys\nreload(sys)\nsys.setdefaultencoding('utf8')\nsys.path.append('code/Classifier/')\n\nimport "
  },
  {
    "path": "code/Evaluation/emb_test.py",
    "chars": 1714,
    "preview": "# Script to predict and evaluate in a pipeline\n__author__ = 'xiang'\n\nimport sys\nfrom collections import  defaultdict\nfro"
  },
  {
    "path": "code/Evaluation/evaluation.py",
    "chars": 4716,
    "preview": "__author__ = 'xiang'\nimport sys\nfrom collections import  defaultdict\n\ndef find_none_index(file_name):\n    with open(file"
  },
  {
    "path": "code/Evaluation/tune_threshold.py",
    "chars": 3831,
    "preview": "__author__ = 'xiang'\n\nimport sys, os\nfrom collections import  defaultdict\nfrom emb_prediction import *\nfrom evaluation i"
  },
  {
    "path": "code/Model/FCM/README.md",
    "chars": 1022,
    "preview": "FCM_nips_workshop\n=================\nBasic version of FCT model for relation extraction.\nThe package has two executable f"
  },
  {
    "path": "code/Model/FCM/all.sen",
    "chars": 595491,
    "preview": "he was captured in baghdad late monday night . \nmostly they believe in those northern iraq areas brent sadler was just t"
  },
  {
    "path": "code/Model/FCM/code/BaseComponentModel.cpp",
    "chars": 358,
    "preview": "//\n//  BaseComponentModel.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All ri"
  },
  {
    "path": "code/Model/FCM/code/BaseComponentModel.h",
    "chars": 1770,
    "preview": "//\n//  BaseComponentModel.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All righ"
  },
  {
    "path": "code/Model/FCM/code/Commons.h",
    "chars": 466,
    "preview": "//\n//  Commons.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights reserved"
  },
  {
    "path": "code/Model/FCM/code/EmbeddingModel.cpp",
    "chars": 11798,
    "preview": "//\n//  EmbeddingModel.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights"
  },
  {
    "path": "code/Model/FCM/code/EmbeddingModel.h",
    "chars": 2880,
    "preview": "//\n//  EmbeddingModel.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights r"
  },
  {
    "path": "code/Model/FCM/code/FctCoarseModel.cpp",
    "chars": 3546,
    "preview": "//\n//  FctCoarseModel.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights"
  },
  {
    "path": "code/Model/FCM/code/FctCoarseModel.h",
    "chars": 1031,
    "preview": "//\n//  FctCoarseModel.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights r"
  },
  {
    "path": "code/Model/FCM/code/FctConvolutionModel.cpp",
    "chars": 6156,
    "preview": "//\n//  FctConvolutionModel.cpp\n//  fct_re_git\n//\n//  Created by gflfof gflfof on 14-9-7.\n//  Copyright (c) 2014年 hit. Al"
  },
  {
    "path": "code/Model/FCM/code/FctConvolutionModel.h",
    "chars": 1499,
    "preview": "//\n//  FctConvolutionModel.h\n//  fct_re_git\n//\n//  Created by gflfof gflfof on 14-9-7.\n//  Copyright (c) 2014年 hit. All "
  },
  {
    "path": "code/Model/FCM/code/FctDeepModel.cpp",
    "chars": 9124,
    "preview": "//\n//  FctDeepModel.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights r"
  },
  {
    "path": "code/Model/FCM/code/FctDeepModel.h",
    "chars": 1683,
    "preview": "//\n//  FctDeepModel.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights res"
  },
  {
    "path": "code/Model/FCM/code/FeatureModel.cpp",
    "chars": 2093,
    "preview": "//\n//  FeatureModel.cpp\n//  fct_re_git\n//\n//  Created by gflfof gflfof on 14-10-14.\n//  Copyright (c) 2014年 hit. All rig"
  },
  {
    "path": "code/Model/FCM/code/FeatureModel.h",
    "chars": 1125,
    "preview": "//\n//  FeatureModel.h\n//  fct_re_git\n//\n//  Created by gflfof gflfof on 14-10-14.\n//  Copyright (c) 2014年 hit. All right"
  },
  {
    "path": "code/Model/FCM/code/FullFctModel.cpp",
    "chars": 78166,
    "preview": "//\n//  FullFctModel.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights r"
  },
  {
    "path": "code/Model/FCM/code/FullFctModel.h",
    "chars": 4298,
    "preview": "//\n//  FullFctModel.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights res"
  },
  {
    "path": "code/Model/FCM/code/Instances.cpp",
    "chars": 150,
    "preview": "//\n//  Instances.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights rese"
  },
  {
    "path": "code/Model/FCM/code/Instances.h",
    "chars": 4446,
    "preview": "//\n//  Instances.h\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-30.\n//  Copyright (c) 2014年 hit. All rights reserv"
  },
  {
    "path": "code/Model/FCM/code/RE_FCT.cpp",
    "chars": 2143,
    "preview": "//\n//  RE_FCT.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-31.\n//  Copyright (c) 2014年 hit. All rights reserve"
  },
  {
    "path": "code/Model/FCM/code/RE_FCT_fixed.cpp",
    "chars": 2125,
    "preview": "//\n//  RE_FCT.cpp\n//  RE_FCT\n//\n//  Created by gflfof gflfof on 14-8-31.\n//  Copyright (c) 2014年 hit. All rights reserve"
  },
  {
    "path": "code/Model/FCM/code/makefile",
    "chars": 596,
    "preview": "CC = g++\n#The -Ofast might not work with older versions of gcc; in that case, use -O2\nCFLAGS = -lm -pthread -Ofast -marc"
  },
  {
    "path": "code/Model/FCM/code/predict.fea.fullnerpair.onlyne.txt",
    "chars": 69819,
    "preview": "8001\tMessage-Topic(e1,e2)\n8002\tProduct-Producer(e2,e1)\n8003\tInstrument-Agency(e2,e1)\n8004\tEntity-Destination(e1,e2)\n8005"
  },
  {
    "path": "code/Model/FCM/data/SemEval.test.fea.sst",
    "chars": 1455848,
    "preview": "Message-Topic(e1,e2)\t3\t3\taudits\t6\t6\twaste\nThe\tDT\t0\t0\t0\tmost\tRBS\tB-adv.all\t0\t0\tcommon\tJJ\tB-adj.all\t0\t0\taudits\tNNS\tB-noun."
  },
  {
    "path": "code/Model/FCM/data/SemEval.test.keys",
    "chars": 69323,
    "preview": "8001\tMessage-Topic(e1,e2)\n8002\tProduct-Producer(e2,e1)\n8003\tInstrument-Agency(e2,e1)\n8004\tEntity-Destination(e1,e2)\n8005"
  },
  {
    "path": "code/Model/FCM/data/SemEval.train.fea.sst",
    "chars": 4286822,
    "preview": "Component-Whole(e2,e1)\t12\t12\tconfiguration\t15\t15\telements\nThe\tDT\t0\t0\t0\tsystem\tNN\tB-noun.artifact\t0\t0\tas\tIN\t0\t0\t0\tdescrib"
  },
  {
    "path": "code/Model/FCM/data/semeval2010_task8_scorer-v1.2.pl",
    "chars": 15848,
    "preview": "#!/usr/bin/perl -w\n#\n#\n#  Author: Preslav Nakov\n#          nakov@comp.nus.edu.sg\n#          National University of Singa"
  },
  {
    "path": "code/Model/FCM/filter.py",
    "chars": 640,
    "preview": "import sys\nimport os\n\nlabel = {}\nfi = open(sys.argv[1], 'r')\nwhile True:\n\tl1 = fi.readline()\n\tl2 = fi.readline()\n\tl3 = f"
  },
  {
    "path": "code/Model/FCM/gen_fmt.py",
    "chars": 742,
    "preview": "import sys\nimport os\n\nsid2idx = {}\nfi = open(sys.argv[1], 'r')\nfor line in fi:\n\tsid = line.strip().split()[0]\n\tidx = lin"
  },
  {
    "path": "code/Model/FCM/gen_sen.py",
    "chars": 242,
    "preview": "import sys\nimport os\nimport json\n\nfi = open(sys.argv[1], 'r')\nfo = open(sys.argv[2], 'w')\n\nfor line in fi:\n\tdic = json.l"
  },
  {
    "path": "code/Model/FCM/predict.txt",
    "chars": 181419,
    "preview": "8001\tNone\n8002\tPART-WHOLE\n8003\tNone\n8004\tNone\n8005\tNone\n8006\tNone\n8007\tNone\n8008\tNone\n8009\tNone\n8010\tNone\n8011\tNone\n8012"
  },
  {
    "path": "code/Model/FCM/process.py",
    "chars": 1485,
    "preview": "import sys\nimport os\nimport json\n\nfi = open(sys.argv[1], 'r')\nfo = open(sys.argv[2], 'w')\nfoid = open(sys.argv[3], 'w')\n"
  },
  {
    "path": "code/Model/FCM/run.sh",
    "chars": 1047,
    "preview": "#!/bin/sh\n\nData=$1  # nyt_candidates, kbp_candidates\n\nIndir='data/intermediate/'$Data'/rm'\nOutdir='data/results/'$Data'/"
  },
  {
    "path": "code/Model/FCM/test.fmt",
    "chars": 10319094,
    "preview": "ORG-AFF\t1\t1\tshia\t0\t2\ta shia government\na\tDT\t0\t0\t0\tshia\tJJ\t0\t0\t0\tgovernment\tNN\t0\t0\t0\twill\tMD\t0\t0\t0\tallow\tVB\t0\t0\t0\tus\tPRP\t"
  },
  {
    "path": "code/Model/FCM/test.fmt.tmp",
    "chars": 10319094,
    "preview": "ORG-AFF\t1\t1\tshia\t0\t2\ta shia government\na\tDT\t0\t0\t0\tshia\tJJ\t0\t0\t0\tgovernment\tNN\t0\t0\t0\twill\tMD\t0\t0\t0\tallow\tVB\t0\t0\t0\tus\tPRP\t"
  },
  {
    "path": "code/Model/FCM/test.id",
    "chars": 763902,
    "preview": "unsoc.culture.iraq-20050211.0445_39_1_2_0_3\nunsoc.culture.iraq-20050211.0445_39_0_3_1_2\nunsoc.culture.iraq-20050211.0445"
  },
  {
    "path": "code/Model/FCM/test.sen",
    "chars": 134562,
    "preview": "a shia government will allow us an exit strategy . \nin my opinion , all we have to the average iraqi citizen is brought "
  },
  {
    "path": "code/Model/FCM/train.fmt",
    "chars": 8669729,
    "preview": "None\t4\t4\tbaghdad\t0\t0\the\nhe\tPRP\t0\t0\t0\twas\tVBD\t0\t0\t0\tcaptured\tVBN\t0\t0\t0\tin\tIN\t0\t0\t0\tbaghdad\tNN\t0\t0\t0\tlate\tJJ\t0\t0\t0\tmonday\t"
  },
  {
    "path": "code/Model/FCM/train.id",
    "chars": 717056,
    "preview": "bnCNN-ENG-20030416-160804.4_4_4_5_0_1\nbnCNN-ENG-20030416-160804.4_4_0_1_4_5\nbcCNN-IP-20030409.1600.02_14_8_10_4_17\nbcCNN"
  },
  {
    "path": "code/Model/FCM/train.sen",
    "chars": 460929,
    "preview": "he was captured in baghdad late monday night . \nmostly they believe in those northern iraq areas brent sadler was just t"
  },
  {
    "path": "code/Model/FCM/word2vec.cpp",
    "chars": 31011,
    "preview": "//  Copyright 2013 Google Inc. All Rights Reserved.\n//\n//  Licensed under the Apache License, Version 2.0 (the \"License\""
  },
  {
    "path": "code/Model/baselines/hypenet/README.md",
    "chars": 1145,
    "preview": "# HypeNet\nImproving Hypernymy Detection with an Integrated Path-based and Distributional Method, Vered Shwartz, Yoav Gol"
  },
  {
    "path": "code/Model/baselines/hypenet/data/README.md",
    "chars": 86,
    "preview": "download the GloVe file here.\n\ndownload KBP/NYT/TACRED data in corresponding folders.\n"
  },
  {
    "path": "code/Model/baselines/hypenet/evaluation.py",
    "chars": 8311,
    "preview": "import sys\nfrom collections import defaultdict\n\n\ndef find_none_index(file_name):\n    with open(file_name) as f:\n        "
  },
  {
    "path": "code/Model/baselines/hypenet/helper.py",
    "chars": 7717,
    "preview": "from collections import defaultdict\nimport json\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\n"
  },
  {
    "path": "code/Model/baselines/hypenet/lemmatize.py",
    "chars": 1220,
    "preview": "from pycorenlp import StanfordCoreNLP\nfrom pprint import pprint\nimport json\n\nFILE = \"data/sentences_50k\"\n\nnlp = Stanford"
  },
  {
    "path": "code/Model/baselines/hypenet/plot.py",
    "chars": 2770,
    "preview": "import json\nimport numpy as np\nfrom sklearn.metrics import precision_recall_curve\nfrom sklearn.metrics import average_pr"
  },
  {
    "path": "code/Model/baselines/hypenet/postprocess.py",
    "chars": 570,
    "preview": "import string\nimport json\n\nfin = open('result.txt')\nfout = open('pr.txt', 'w')\n\nA = dict()\n\ntot = 0\nfor line in fin:\n\tjs"
  },
  {
    "path": "code/Model/baselines/hypenet/preprocess.py",
    "chars": 3795,
    "preview": "import json\n\n\ndef is_overlap(a, b):\n    if set(a) & set(b):\n        return True\n    else:\n        return False\n\n\ndef pro"
  },
  {
    "path": "code/Model/baselines/hypenet/sdp.py",
    "chars": 11777,
    "preview": "import csv\nimport datetime\nimport json\nimport time\nfrom os.path import expanduser, exists\nfrom pprint import pprint\nfrom"
  },
  {
    "path": "code/Model/baselines/hypenet/sentence_normalize.py",
    "chars": 1710,
    "preview": "from pycorenlp import StanfordCoreNLP\nfrom pprint import pprint\nimport json\n\nFILE = \"data/test200\"\n\nnlp = StanfordCoreNL"
  },
  {
    "path": "code/Model/baselines/hypenet/sentence_tokens.py",
    "chars": 1026,
    "preview": "from pycorenlp import StanfordCoreNLP\nfrom pprint import pprint\nimport json\n\nnlp = StanfordCoreNLP('http://localhost:{0}"
  },
  {
    "path": "code/Model/baselines/hypenet/shortest_dep.py",
    "chars": 5044,
    "preview": "import networkx as nx\nfrom pycorenlp import StanfordCoreNLP\nfrom pprint import pprint\nimport json\n\nFILE = \"data/TACRED/d"
  },
  {
    "path": "code/Model/baselines/hypenet/split_baseline_data.py",
    "chars": 643,
    "preview": "import json\n\nfrom sklearn.model_selection import train_test_split\n\ndata = json.load(open('data/label5000.json', encoding"
  },
  {
    "path": "code/Model/baselines/hypenet/test_corenlp.py",
    "chars": 1165,
    "preview": "from pycorenlp import StanfordCoreNLP\nfrom pprint import pprint\nimport json\n\nnlp = StanfordCoreNLP('http://localhost:{0}"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/README.md",
    "chars": 655,
    "preview": "# SDP-LSTM model for TACRED classification\n\n#### Requirements\n\n- python 2.7\n- Tensorflow 1.8.0\n\n#### Directories\n\n- depe"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/analyze.py",
    "chars": 2377,
    "preview": "import time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n\nimport data_utils\n\ntf.app.fla"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/data_utils.py",
    "chars": 18564,
    "preview": "import os\nimport sys\nimport numpy as np\nimport cPickle as pickle\nfrom collections import Counter\nfrom collections import"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/emb_utils.py",
    "chars": 3347,
    "preview": "import os\nimport sys\nimport numpy as np\nimport cPickle as pickle\nfrom collections import Counter\nimport argparse\n\nimport"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/eval.py",
    "chars": 6092,
    "preview": "from datetime import datetime\nimport time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/scorer.py",
    "chars": 4141,
    "preview": "#!/usr/bin/env python\n\nimport sys\nfrom collections import Counter\n\ndef score(key_file, pred_files, f_measure=1, verbose="
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/sprnn_model.py",
    "chars": 11814,
    "preview": "import sys\nimport tensorflow as tf\n\nimport data_utils\n\ntf.app.flags.DEFINE_integer('num_layers', 2, 'Number of cell laye"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/train.py",
    "chars": 15783,
    "preview": "from datetime import datetime\nimport time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/tree.py",
    "chars": 7636,
    "preview": "# A tree structure that can read from conll-style data.\nfrom collections import Counter\nfrom collections import OrderedD"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency/utils.py",
    "chars": 2585,
    "preview": "import data_utils\n\ndef _get_feed_dict_for_others(model, x_batch, y_batch, x_lens, use_pos=True, use_ner=True, use_deprel"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/analyze.py",
    "chars": 2377,
    "preview": "import time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n\nimport data_utils\n\ntf.app.fla"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/data_utils.py",
    "chars": 16513,
    "preview": "import os\nimport sys\nimport numpy as np\nimport cPickle as pickle\nfrom collections import Counter\nfrom collections import"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/emb_utils.py",
    "chars": 3443,
    "preview": "import os\nimport sys\nimport numpy as np\nimport cPickle as pickle\nfrom collections import Counter\nimport argparse\n\nimport"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/eval.py",
    "chars": 6092,
    "preview": "from datetime import datetime\nimport time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/scorer.py",
    "chars": 4141,
    "preview": "#!/usr/bin/env python\n\nimport sys\nfrom collections import Counter\n\ndef score(key_file, pred_files, f_measure=1, verbose="
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/sprnn_model.py",
    "chars": 11868,
    "preview": "import sys\nimport tensorflow as tf\n\nimport data_utils\n\ntf.app.flags.DEFINE_integer('num_layers', 2, 'Number of cell laye"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/train-cv.py",
    "chars": 12972,
    "preview": "from datetime import datetime\nimport time\nimport os\nimport sys\nimport random\nimport tensorflow as tf\nimport numpy as np\n"
  },
  {
    "path": "code/Model/baselines/sdp-lstm/dependency-kbp/utils.py",
    "chars": 2565,
    "preview": "import data_utils\n\ndef _get_feed_dict_for_others(model, x_batch, y_batch, x_lens, use_pos=True, use_ner=False, use_depre"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/README.md",
    "chars": 1189,
    "preview": "# Sentence-level Models\n\n#### Requirements\n\n- Python 3.6.4\n- Pytorch 0.4.0\n\n#### Input files\n\n- ./data/json/train.json\n-"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/cotype2json.py",
    "chars": 2794,
    "preview": "'''\nConvert CoType data into json format\n'''\n__author__ = 'Maosen'\nfrom tqdm import tqdm\nimport json\nimport argparse\nimp"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/model.py",
    "chars": 3736,
    "preview": "'''\nModel wrapper for Relation Extraction\n'''\n__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nfrom tqdm import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/models/bgru.py",
    "chars": 3309,
    "preview": "__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport utils\nfrom utils import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/models/cnn.py",
    "chars": 3130,
    "preview": "__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport utils\nfrom utils import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/models/lstm.py",
    "chars": 3277,
    "preview": "__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport utils\nfrom utils import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/models/pcnn.py",
    "chars": 3729,
    "preview": "__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport utils\nfrom utils import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/models/position_aware_lstm.py",
    "chars": 3916,
    "preview": "__author__ = 'Maosen'\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport utils\nfrom utils import "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/tacred2json.py",
    "chars": 3013,
    "preview": "'''\nConvert TACRED data into json format\n'''\n__author__ = 'Maosen'\nfrom tqdm import tqdm\nimport json\nimport argparse\nrel"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/train-cv.py",
    "chars": 5989,
    "preview": "'''\nTraining script with ramdom splitting dev set\n'''\n__author__ = 'Maosen'\nimport torch\nfrom model import Model\nimport "
  },
  {
    "path": "code/Model/baselines/sentence-level-models/train.py",
    "chars": 5988,
    "preview": "__author__ = 'Maosen'\nimport torch\nfrom model import Model\nimport utils\nfrom utils import Dataset, CVDataset, get_cv_dat"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/utils.py",
    "chars": 10665,
    "preview": "'''\nData Loader for Position-Aware LSTM for Relation Extraction\n'''\n__author__ = 'Maosen'\nimport torch\nimport torch.util"
  },
  {
    "path": "code/Model/baselines/sentence-level-models/vocab.py",
    "chars": 8090,
    "preview": "'''\nPrepare vocabulary and initial word vectors\n'''\nimport json\nimport os\nimport pickle\nimport argparse\nimport numpy as "
  },
  {
    "path": "code/Model/dw/deepwalk-bipa.cpp",
    "chars": 14223,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <vector>\n#include <algorithm>\n#inc"
  },
  {
    "path": "code/Model/dw/deepwalk-hete-em.cpp",
    "chars": 16788,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <vector>\n#include <algorithm>\n#inc"
  },
  {
    "path": "code/Model/dw/deepwalk-hete.cpp",
    "chars": 16788,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <vector>\n#include <algorithm>\n#inc"
  },
  {
    "path": "code/Model/dw/makefile",
    "chars": 707,
    "preview": "CC = g++\nCFLAGS = -lm -pthread -Ofast -march=native -Wall -funroll-loops -Wno-unused-result -lgsl -lm -lgslcblas\nLIBS = "
  },
  {
    "path": "code/Model/multir/.classpath",
    "chars": 360,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<classpath>\n\t<classpathentry kind=\"src\" path=\"src\"/>\n\t<classpathentry kind=\"con\" "
  },
  {
    "path": "code/Model/multir/.project",
    "chars": 373,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<projectDescription>\n\t<name>multir-release</name>\n\t<comment></comment>\n\t<projects"
  },
  {
    "path": "code/Model/multir/.settings/org.eclipse.jdt.core.prefs",
    "chars": 617,
    "preview": "#Fri Apr 22 20:40:19 PDT 2011\neclipse.preferences.version=1\norg.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enab"
  },
  {
    "path": "code/Model/multir/LICENSE.txt",
    "chars": 2749,
    "preview": "MultiR License Agreement\n                                                                               |\nAll of the doc"
  },
  {
    "path": "code/Model/multir/README.txt",
    "chars": 1758,
    "preview": "This distribution contains the source code for the experiments presented in\nthe following research publication:\n\n    Rap"
  },
  {
    "path": "code/Model/multir/annotations/sentential-byrelation.txt",
    "chars": 800973,
    "preview": "/guid/9202a8c04000641f800000000000f47a\t/guid/9202a8c04000641f800000000000fd84\t0\t/sports/sports_team/location\tn\t\"Mr. Gray"
  },
  {
    "path": "code/Model/multir/annotations/sentential.txt",
    "chars": 581625,
    "preview": "/guid/9202a8c04000641f8000000000061982\t/guid/9202a8c04000641f80000000001987ec\t3\t/people/person/place_lived\tn\t\"Even when "
  },
  {
    "path": "code/Model/multir/run.sh",
    "chars": 371,
    "preview": "Data=$1  # nyt_candidates, kbp_candidates\n\nOutdir='data/results/'$Data'/rm'\noutput_file=$Outdir'/prediction_multir_null_"
  },
  {
    "path": "code/Model/multir/src/cc/factorie/protobuf/DocumentProtos.java",
    "chars": 177913,
    "preview": "// Generated by the protocol buffer compiler.  DO NOT EDIT!\n// source: Document.proto\n\npackage cc.factorie.protobuf;\n\npu"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/AveragedPerceptron.java",
    "chars": 4782,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport java.util.Random;\n\nimport edu.uw.cs.multir.learning.data.Dataset;\ni"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/ConditionalInference.java",
    "chars": 3843,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport java.util.Arrays;\nimport java.util.Comparator;\n\nimport edu.uw.cs.mu"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/FullInference.java",
    "chars": 1523,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport edu.uw.cs.multir.learning.data.MILDocument;\n\n\npublic class FullInfe"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Model.java",
    "chars": 1162,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport java.io.BufferedReader;\nimport java.io.BufferedWriter;\nimport java."
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Parameters.java",
    "chars": 2037,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport java.io.BufferedInputStream;\nimport java.io.BufferedOutputStream;\ni"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Parse.java",
    "chars": 300,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport edu.uw.cs.multir.learning.data.MILDocument;\n\npublic class Parse {\n\n"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Scorer.java",
    "chars": 777,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport edu.uw.cs.multir.learning.data.MILDocument;\nimport edu.uw.cs.multir"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/algorithm/Viterbi.java",
    "chars": 1042,
    "preview": "package edu.uw.cs.multir.learning.algorithm;\n\nimport edu.uw.cs.multir.learning.data.MILDocument;\n\npublic class Viterbi {"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/data/Dataset.java",
    "chars": 255,
    "preview": "package edu.uw.cs.multir.learning.data;\n\nimport java.util.Random;\n\npublic interface Dataset {\n\t\n\tpublic int numDocs();\n\t"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/data/MILDocument.java",
    "chars": 2571,
    "preview": "package edu.uw.cs.multir.learning.data;\n\nimport java.io.DataInputStream;\nimport java.io.DataOutputStream;\nimport java.io"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/learning/data/MemoryDataset.java",
    "chars": 1531,
    "preview": "package edu.uw.cs.multir.learning.data;\n\nimport java.io.BufferedInputStream;\nimport java.io.DataInputStream;\nimport java"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/AggregatePrecisionRecallCurve.java",
    "chars": 4286,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.io.PrintStream;\nimport java"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/Main.java",
    "chars": 4111,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.IOException;\n\npublic class Main {\n\n\tpublic static void main(String[] args"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/Preprocess.java",
    "chars": 1151,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.File;\nimport java.io.IOException;\n\nimport edu.uw.cs.multir.learning.algor"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/ResultWriter.java",
    "chars": 2795,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.io.PrintStream;\nimport java"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/SententialPrecisionRecallByRelation.java",
    "chars": 4054,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.BufferedReader;\nimport java.io.FileInputStream;\nimport java.io.IOExceptio"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/SententialPrecisionRecallCurve.java",
    "chars": 4097,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.BufferedReader;\nimport java.io.FileInputStream;\nimport java.io.IOExceptio"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/Test.java",
    "chars": 1149,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.File;\nimport java.io.IOException;\n\nimport edu.uw.cs.multir.learning.algor"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/main/Train.java",
    "chars": 1026,
    "preview": "package edu.uw.cs.multir.main;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.util.Random;\n\nimport edu.uw"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/preprocess/ConvertProtobufToMILDocument.java",
    "chars": 3793,
    "preview": "package edu.uw.cs.multir.preprocess;\n\nimport java.io.BufferedInputStream;\nimport java.io.BufferedOutputStream;\nimport ja"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/preprocess/Mappings.java",
    "chars": 2437,
    "preview": "package edu.uw.cs.multir.preprocess;\n\nimport java.io.BufferedReader;\nimport java.io.BufferedWriter;\nimport java.io.FileI"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/util/DenseVector.java",
    "chars": 1982,
    "preview": "package edu.uw.cs.multir.util;\n\nimport java.io.DataInputStream;\nimport java.io.DataOutputStream;\nimport java.io.IOExcept"
  },
  {
    "path": "code/Model/multir/src/edu/uw/cs/multir/util/SparseBinaryVector.java",
    "chars": 1592,
    "preview": "package edu.uw.cs.multir.util;\n\nimport java.io.DataInputStream;\nimport java.io.DataOutputStream;\nimport java.io.IOExcept"
  },
  {
    "path": "code/Model/pte/line.cpp",
    "chars": 4670,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <time.h>\n#include <pthread.h>\n#inc"
  },
  {
    "path": "code/Model/pte/linelib.cpp",
    "chars": 10132,
    "preview": "#include \"linelib.h\"\n\nline_node::line_node() : vec(NULL, 0, 0)\n{\n    node = NULL;\n    node_size = 0;\n    vector_size = 0"
  },
  {
    "path": "code/Model/pte/linelib.h",
    "chars": 1902,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <algorithm>\n#include <Eigen/Dense>"
  },
  {
    "path": "code/Model/pte/makefile",
    "chars": 884,
    "preview": "CC = g++\nCFLAGS = -lm -pthread -Ofast -march=native -Wall -funroll-loops -Wno-unused-result -lgsl -lm -lgslcblas\nINCLUDE"
  },
  {
    "path": "code/Model/pte/pte-hete.cpp",
    "chars": 5789,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <time.h>\n#include <pthread.h>\n#inc"
  },
  {
    "path": "code/Model/pte/ransampl.c",
    "chars": 3096,
    "preview": "/*\n * Library:   ransampl (random number sampling)\n *\n * File:      ransampl.c\n *\n * Contents:  Random-number sampling u"
  },
  {
    "path": "code/Model/pte/ransampl.h",
    "chars": 927,
    "preview": "/*\n * Library:   ransampl (random number sampling)\n *\n * File:      ransampl.h\n *\n * Contents:  Random-number sampling u"
  },
  {
    "path": "code/Model/retype/hplelib.cpp",
    "chars": 30252,
    "preview": "#include \"hplelib.h\"\n\nreal sigmoid(real x)\n{\n    return 1.0 / (1.0 + exp(-x));\n}\n\nline_node::line_node() : vec(NULL, 0, "
  },
  {
    "path": "code/Model/retype/hplelib.h",
    "chars": 3847,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <algorithm>\n#include <vector>\n#inc"
  },
  {
    "path": "code/Model/retype/makefile",
    "chars": 916,
    "preview": "CC = g++\nCFLAGS = -lm -pthread -O2 -march=native -Wall -funroll-loops -Wno-unused-result -lgsl -lm -lgslcblas\nINCLUDES ="
  },
  {
    "path": "code/Model/retype/ransampl.c",
    "chars": 3096,
    "preview": "/*\n * Library:   ransampl (random number sampling)\n *\n * File:      ransampl.c\n *\n * Contents:  Random-number sampling u"
  },
  {
    "path": "code/Model/retype/ransampl.h",
    "chars": 927,
    "preview": "/*\n * Library:   ransampl (random number sampling)\n *\n * File:      ransampl.h\n *\n * Contents:  Random-number sampling u"
  },
  {
    "path": "code/Model/retype/retype-rm.cpp",
    "chars": 9461,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <time.h>\n#include <pthread.h>\n#inc"
  },
  {
    "path": "code/Model/retype/retype.cpp",
    "chars": 14924,
    "preview": "#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <math.h>\n#include <time.h>\n#include <pthread.h>\n#inc"
  },
  {
    "path": "code/Model/seq-kernel/gen_data.py",
    "chars": 488,
    "preview": "import sys\nimport os\n\nfil = open(sys.argv[1], 'r')\nfiv = open(sys.argv[2], 'r')\nfo = open(sys.argv[3], 'w')\n\nline = fiv."
  },
  {
    "path": "code/Model/seq-kernel/gen_fmt.py",
    "chars": 636,
    "preview": "import sys\nimport os\n\ns2i = {}\nfi = open(sys.argv[1], 'r')\nfor line in fi:\n\ts = line.strip().split()[0]\n\ti = line.strip("
  },
  {
    "path": "code/Model/seq-kernel/gen_lb.py",
    "chars": 348,
    "preview": "import sys\nimport os\n\ns2l = {}\nfi = open(sys.argv[3], 'r')\nfor line in fi:\n\ts = line.split('\\t')[0]\n\tl = line.split('\\t'"
  },
  {
    "path": "code/Model/seq-kernel/gen_sen.py",
    "chars": 160,
    "preview": "import sys\nimport os\n\nfi = open(sys.argv[1], 'r')\nfo = open(sys.argv[2], 'w')\n\nfor line in fi:\n\tlst = line.split('::')\n\t"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/COPYRIGHT",
    "chars": 1497,
    "preview": "\nCopyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin\nAll rights reserved.\n\nRedistribution and use in source and bi"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/FAQ.html",
    "chars": 83087,
    "preview": "\n\n<html>\n<head>\n<title>LIBSVM FAQ</title>\n</head>\n<body bgcolor=\"#ffffcc\">\n\n<a name=\"_TOP\"><b><h1><a\nhref=http://www.csi"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/Makefile",
    "chars": 732,
    "preview": "CXX ?= g++\nCFLAGS = -Wall -Wconversion -O3 -fPIC\nSHVER = 2\nOS = $(shell uname)\n\nall: svm-train svm-predict svm-scale\n\nli"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/Makefile.win",
    "chars": 1136,
    "preview": "#You must ensure nmake.exe, cl.exe, link.exe are in system path.\n#VCVARS64.bat\n#Under dosbox prompt\n#nmake -f Makefile.w"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/README",
    "chars": 28679,
    "preview": "Libsvm is a simple, easy-to-use, and efficient software for SVM\nclassification and regression. It solves C-SVM classific"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/heart_scale",
    "chars": 27670,
    "preview": "+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 \n-1 1:0.583333 2:-1 3:0."
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/Makefile",
    "chars": 624,
    "preview": ".SUFFIXES: .class .java\nFILES = libsvm/svm.class libsvm/svm_model.class libsvm/svm_node.class \\\n\t\tlibsvm/svm_parameter.c"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm.java",
    "chars": 63839,
    "preview": "\n\n\n\n\npackage libsvm;\nimport java.io.*;\nimport java.util.*;\n\n//\n// Kernel Cache\n//\n// l is the number of total data items"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm.m4",
    "chars": 63132,
    "preview": "define(`swap',`do {$1 _=$2; $2=$3; $3=_;} while(false)')\ndefine(`Qfloat',`float')\ndefine(`SIZE_OF_QFLOAT',4)\ndefine(`TAU"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm_model.java",
    "chars": 868,
    "preview": "//\n// svm_model\n//\npackage libsvm;\npublic class svm_model implements java.io.Serializable\n{\n\tpublic svm_parameter param;"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm_node.java",
    "chars": 115,
    "preview": "package libsvm;\npublic class svm_node implements java.io.Serializable\n{\n\tpublic int index;\n\tpublic double value;\n}\n"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm_parameter.java",
    "chars": 1288,
    "preview": "package libsvm;\npublic class svm_parameter implements Cloneable,java.io.Serializable\n{\n\t/* svm_type */\n\tpublic static fi"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm_print_interface.java",
    "chars": 87,
    "preview": "package libsvm;\npublic interface svm_print_interface\n{\n\tpublic void print(String s);\n}\n"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/libsvm/svm_problem.java",
    "chars": 136,
    "preview": "package libsvm;\npublic class svm_problem implements java.io.Serializable\n{\n\tpublic int l;\n\tpublic double[] y;\n\tpublic sv"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/svm_predict.java",
    "chars": 4950,
    "preview": "import libsvm.*;\nimport java.io.*;\nimport java.util.*;\n\nclass svm_predict {\n\tprivate static svm_print_interface svm_prin"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/svm_scale.java",
    "chars": 8944,
    "preview": "import libsvm.*;\nimport java.io.*;\nimport java.util.*;\nimport java.text.DecimalFormat;\n\nclass svm_scale\n{\n\tprivate Strin"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/svm_toy.java",
    "chars": 12269,
    "preview": "import libsvm.*;\nimport java.applet.*;\nimport java.awt.*;\nimport java.util.*;\nimport java.awt.event.*;\nimport java.io.*;"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/svm_train.java",
    "chars": 8355,
    "preview": "import libsvm.*;\nimport java.io.*;\nimport java.util.*;\n\nclass svm_train {\n\tprivate svm_parameter param;\t\t// set by parse"
  },
  {
    "path": "code/Model/seq-kernel/libsvm/java/test_applet.html",
    "chars": 81,
    "preview": "<APPLET code=\"svm_toy.class\" archive=\"libsvm.jar\" width=300 height=350></APPLET>\n"
  }
]

// ... and 92 more files (download for full content)

About this extraction

This page contains the full source code of the INK-USC/DS-RelationExtraction GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 292 files (66.5 MB), approximately 12.6M tokens, and a symbol index with 1906 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo