Full Code of Eniac-Xie/faster-rcnn-resnet for AI

master aba743e8404b cached
217 files
3.4 MB
911.6k tokens
258 symbols
1 requests
Download .txt
Showing preview only (3,644K chars total). Download the full file or copy to clipboard to get everything.
Repository: Eniac-Xie/faster-rcnn-resnet
Branch: master
Commit: aba743e8404b
Files: 217
Total size: 3.4 MB

Directory structure:
gitextract_z_8jtbwf/

├── .gitignore
├── LICENSE
├── README.md
├── data/
│   ├── .gitignore
│   ├── README.md
│   ├── pylintrc
│   └── scripts/
│       ├── fetch_faster_rcnn_models.sh
│       ├── fetch_imagenet_models.sh
│       └── fetch_selective_search_data.sh
├── experiments/
│   ├── README.md
│   ├── cfgs/
│   │   ├── faster_rcnn_alt_opt.yml
│   │   ├── faster_rcnn_end2end.yml
│   │   └── faster_rcnn_end2end_ohem.yml
│   ├── logs/
│   │   └── .gitignore
│   └── scripts/
│       ├── fast_rcnn.sh
│       ├── faster_rcnn_alt_opt.sh
│       ├── faster_rcnn_end2end.sh
│       ├── test_resnet101_bn_scale_merged_0712_end2end.sh
│       ├── test_resnet101_bn_scale_merged_0712_end2end_ohem.sh
│       ├── train_resnet101_bn_scale_merged_0712_end2end.sh
│       └── train_resnet101_bn_scale_merged_0712_end2end_ohem.sh
├── lib/
│   ├── Makefile
│   ├── datasets/
│   │   ├── VOCdevkit-matlab-wrapper/
│   │   │   ├── get_voc_opts.m
│   │   │   ├── voc_eval.m
│   │   │   └── xVOCap.m
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── ds_utils.py
│   │   ├── factory.py
│   │   ├── imdb.py
│   │   ├── pascal_voc.py
│   │   ├── tools/
│   │   │   └── mcg_munge.py
│   │   └── voc_eval.py
│   ├── fast_rcnn/
│   │   ├── __init__.py
│   │   ├── bbox_transform.py
│   │   ├── config.py
│   │   ├── nms_wrapper.py
│   │   ├── test.py
│   │   └── train.py
│   ├── nms/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── cpu_nms.pyx
│   │   ├── gpu_nms.hpp
│   │   ├── gpu_nms.pyx
│   │   ├── nms_kernel.cu
│   │   └── py_cpu_nms.py
│   ├── pycocotools/
│   │   ├── UPSTREAM_REV
│   │   ├── __init__.py
│   │   ├── _mask.pyx
│   │   ├── coco.py
│   │   ├── cocoeval.py
│   │   ├── license.txt
│   │   ├── mask.py
│   │   ├── maskApi.c
│   │   └── maskApi.h
│   ├── roi_data_layer/
│   │   ├── __init__.py
│   │   ├── layer.py
│   │   ├── minibatch.py
│   │   └── roidb.py
│   ├── rpn/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── anchor_target_layer.py
│   │   ├── generate.py
│   │   ├── generate_anchors.py
│   │   ├── proposal_layer.py
│   │   └── proposal_target_layer.py
│   ├── setup.py
│   ├── transform/
│   │   ├── __init__.py
│   │   └── torch_image_transform_layer.py
│   └── utils/
│       ├── .gitignore
│       ├── __init__.py
│       ├── bbox.pyx
│       ├── blob.py
│       └── timer.py
├── models/
│   ├── README.md
│   ├── coco/
│   │   ├── VGG16/
│   │   │   ├── fast_rcnn/
│   │   │   │   ├── solver.prototxt
│   │   │   │   ├── test.prototxt
│   │   │   │   └── train.prototxt
│   │   │   └── faster_rcnn_end2end/
│   │   │       ├── solver.prototxt
│   │   │       ├── test.prototxt
│   │   │       └── train.prototxt
│   │   └── VGG_CNN_M_1024/
│   │       ├── fast_rcnn/
│   │       │   ├── solver.prototxt
│   │       │   ├── test.prototxt
│   │       │   └── train.prototxt
│   │       └── faster_rcnn_end2end/
│   │           ├── solver.prototxt
│   │           ├── test.prototxt
│   │           └── train.prototxt
│   └── pascal_voc/
│       ├── ResNet-50/
│       │   ├── faster_rcnn_BN_SCALE_Merged/
│       │   │   ├── faster_rcnn_alt_opt/
│       │   │   │   ├── faster_rcnn_test.pt
│       │   │   │   ├── rpn_test.pt
│       │   │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   │   ├── stage1_rpn_train.pt
│       │   │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   │   └── stage2_rpn_train.pt
│       │   │   └── faster_rcnn_end2end/
│       │   │       ├── solver.prototxt
│       │   │       ├── test.prototxt
│       │   │       └── train.prototxt
│       │   ├── faster_rcnn_BN_SCALE_Merged_OHEM/
│       │   │   └── faster_rcnn_end2end/
│       │   │       ├── solver.prototxt
│       │   │       ├── test.prototxt
│       │   │       └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   ├── rfcn_alt_opt_5step_ohem/
│       │   │   ├── rfcn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_rfcn_ohem_solver80k120k.pt
│       │   │   ├── stage1_rfcn_ohem_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_rfcn_ohem_solver80k120k.pt
│       │   │   ├── stage2_rfcn_ohem_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   ├── stage2_rpn_train.pt
│       │   │   ├── stage3_rpn_solver60k80k.pt
│       │   │   └── stage3_rpn_train.pt
│       │   └── rfcn_end2end/
│       │       ├── class-aware/
│       │       │   ├── test.prototxt
│       │       │   └── train_ohem.prototxt
│       │       ├── solver.prototxt
│       │       ├── solver_ohem.prototxt
│       │       ├── test_agnostic.prototxt
│       │       ├── train_agnostic.prototxt
│       │       └── train_agnostic_ohem.prototxt
│       ├── ResNet101/
│       │   └── faster_rcnn_alt_opt/
│       │       ├── faster_rcnn_test.pt
│       │       ├── rpn_test.pt
│       │       ├── stage1_fast_rcnn_solver30k40k.pt
│       │       ├── stage1_fast_rcnn_train.pt
│       │       ├── stage1_rpn_solver60k80k.pt
│       │       ├── stage1_rpn_train.pt
│       │       ├── stage2_fast_rcnn_solver30k40k.pt
│       │       ├── stage2_fast_rcnn_train.pt
│       │       ├── stage2_rpn_solver60k80k.pt
│       │       └── stage2_rpn_train.pt
│       ├── ResNet101_BN_SCALE_Merged/
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       ├── ResNet101_BN_SCALE_Merged_OHEM/
│       │   └── faster_rcnn_end2end_ohem/
│       │       ├── solver.prototxt
│       │       └── train.prototxt
│       ├── VGG16/
│       │   ├── fast_rcnn/
│       │   │   ├── solver.prototxt
│       │   │   ├── test.prototxt
│       │   │   └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       ├── VGG_CNN_M_1024/
│       │   ├── fast_rcnn/
│       │   │   ├── solver.prototxt
│       │   │   ├── test.prototxt
│       │   │   └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       └── ZF/
│           ├── fast_rcnn/
│           │   ├── solver.prototxt
│           │   ├── test.prototxt
│           │   └── train.prototxt
│           ├── faster_rcnn_alt_opt/
│           │   ├── faster_rcnn_test.pt
│           │   ├── rpn_test.pt
│           │   ├── stage1_fast_rcnn_solver30k40k.pt
│           │   ├── stage1_fast_rcnn_train.pt
│           │   ├── stage1_rpn_solver60k80k.pt
│           │   ├── stage1_rpn_train.pt
│           │   ├── stage2_fast_rcnn_solver30k40k.pt
│           │   ├── stage2_fast_rcnn_train.pt
│           │   ├── stage2_rpn_solver60k80k.pt
│           │   └── stage2_rpn_train.pt
│           └── faster_rcnn_end2end/
│               ├── solver.prototxt
│               ├── test.prototxt
│               └── train.prototxt
└── tools/
    ├── README.md
    ├── _init_paths.py
    ├── compress_net.py
    ├── demo.py
    ├── eval_recall.py
    ├── merge_bn_scale.py
    ├── reval.py
    ├── rpn_generate.py
    ├── test_net.py
    ├── train_faster_rcnn_alt_opt.py
    ├── train_net.py
    └── train_svms.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc
.ipynb_checkpoints
lib/build
lib/pycocotools/_mask.c
lib/pycocotools/_mask.so
output
.idea


================================================
FILE: LICENSE
================================================
The MIT License (MIT)

Copyright (c) 2017 Chen-Wei Xie

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# Faster-RCNN-ResNet

This code extends [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by adding ResNet implementation
and Online Hard Example Mining.


This is a ResNet Implementation for Faster-RCNN.
The faster rcnn code is based on [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn).
The ohem code is based on [ohem](https://github.com/abhi2610/ohem).
To reduce the memory usage, we use batchnorm layer in [Microsoft's caffe](https://github.com/Microsoft/caffe)

# Modification
1. The [caffe-fast-rcnn](https://github.com/Eniac-Xie/caffe-fast-rcnn.git) we use is a little different from the one [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) use,
   it uses the batchnorm layer from [Microsoft's caffe](https://github.com/Microsoft/caffe) to reduce the memory usage.
2. Using the in-place eltwise sum within the [PR](https://github.com/BVLC/caffe/pull/3708)
3. To reduce the memory usage, we also release a pretrained ResNet-101 model in which batchnorm layer's parameters is
   merged into scale layer's, see tools/merge_bn_scale.py form more detail.
4. Use Online-Hard-Example-Mining while training.

# Installation
The usage is similar to [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn).

1. Clone this repository
  ```Shell
  git clone https://github.com/Eniac-Xie/faster-rcnn-resnet.git
  ```
  We'll call the directory that you cloned faster-rcnn-resnet `ROOT`

2. Clone the modified caffe-fast-rcnn

  ```Shell
  cd $ROOT/
  git clone https://github.com/Eniac-Xie/caffe-fast-rcnn.git
  ```

3. Build Cython module

  ```Shell
   cd $ROOT/lib/
   make
  ```

4. Build Caffe

  ```Shell
   cd $ROOT/caffe-fast-rcnn
   make all -j8
   make pycaffe
  ```
# Result

|                        | training data       | test data             |   ohem |    mAP@0.5    |
|------------------------|:-------------------:|:---------------------:|:------:|:-------------:|
|Faster-RCNN, ResNet-101  | VOC 07+12 trainval  | VOC 07 test           |  False |   78.78%      |           
|Faster-RCNN, ResNet-101 | VOC 07+12 trainval  | VOC 07 test           |  True  |   79.44%      |     


# Testing
Download faster-rcnn-resnet weights from:

[faster-rcnn-resnet without ohem (BaiduYun)](http://pan.baidu.com/s/1kUKXgVH)

[faster-rcnn-resnet without ohem (OneDrive)](https://1drv.ms/u/s!AgkRygoHQVTXigHNLWT6gRbTHo2f)

[faster-rcnn-resnet with ohem (BaiduYun)](http://pan.baidu.com/s/1o8CtJwI)

[faster-rcnn-resnet with ohem (OneDrive)](https://1drv.ms/u/s!AgkRygoHQVTXigInqoym2V6z4CNA)

then you can do as follow:

  ```Shell
   cd $ROOT/
   sh experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end.sh
   make
  ```
or

  ```Shell
   cd $ROOT/
   sh experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end_ohem.sh
   make
  ```

# Training
Download resnet-101 pretrained model, note that we use a modified version in which batchnorm layer's parameters is
merged into scale layer's, you can download the model from [Baidu Yun](http://pan.baidu.com/s/1qX7VFjA) or [OneDrive](https://1drv.ms/u/s!AgkRygoHQVTXigBCR-5cnmAkfGfy)

then you can do as follow:
  ```Shell
   cd $ROOT/
   sh experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end.sh
  ```
or
  ```Shell
   cd $ROOT/
   sh experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end_ohem.sh
  ```


================================================
FILE: data/.gitignore
================================================
selective_search*
imagenet_models*
fast_rcnn_models*
VOCdevkit*
cache


================================================
FILE: data/README.md
================================================
This directory holds (*after you download them*):
- Caffe models pre-trained on ImageNet
- Faster R-CNN models
- Symlinks to datasets

To download Caffe models (ZF, VGG16) pre-trained on ImageNet, run:

```
./data/scripts/fetch_imagenet_models.sh
```

This script will populate `data/imagenet_models`.

To download Faster R-CNN models trained on VOC 2007, run:

```
./data/scripts/fetch_faster_rcnn_models.sh
```

This script will populate `data/faster_rcnn_models`.

In order to train and test with PASCAL VOC, you will need to establish symlinks.
From the `data` directory (`cd data`):

```
# For VOC 2007
ln -s /your/path/to/VOC2007/VOCdevkit VOCdevkit2007

# For VOC 2012
ln -s /your/path/to/VOC2012/VOCdevkit VOCdevkit2012
```

Install the MS COCO dataset at /path/to/coco

```
ln -s /path/to/coco coco
```

For COCO with Fast R-CNN, place object proposals under `coco_proposals` (inside
the `data` directory). You can obtain proposals on COCO from Jan Hosang at
https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/object-recognition-and-scene-understanding/how-good-are-detection-proposals-really/.
For COCO, using MCG is recommended over selective search. MCG boxes can be downloaded
from http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/.
Use the tool `lib/datasets/tools/mcg_munge.py` to convert the downloaded MCG data
into the same file layout as those from Jan Hosang.

Since you'll likely be experimenting with multiple installs of Fast/er R-CNN in
parallel, you'll probably want to keep all of this data in a shared place and
use symlinks. On my system I create the following symlinks inside `data`:

Annotations for the 5k image 'minival' subset of COCO val2014 that I like to use
can be found at http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_minival2014.json.zip.
Annotations for COCO val2014 (set) minus minival (~35k images) can be found at
http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/instances_valminusminival2014.json.zip.

```
# data/cache holds various outputs created by the datasets package
ln -s /data/fast_rcnn_shared/cache

# move the imagenet_models to shared location and symlink to them
ln -s /data/fast_rcnn_shared/imagenet_models

# move the selective search data to a shared location and symlink to them
# (only applicable to Fast R-CNN training)
ln -s /data/fast_rcnn_shared/selective_search_data

ln -s /data/VOC2007/VOCdevkit VOCdevkit2007
ln -s /data/VOC2012/VOCdevkit VOCdevkit2012
```


================================================
FILE: data/pylintrc
================================================
[TYPECHECK]

ignored-modules = numpy, numpy.random, cv2


================================================
FILE: data/scripts/fetch_faster_rcnn_models.sh
================================================
#!/bin/bash

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
cd $DIR

FILE=faster_rcnn_models.tgz
URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE
CHECKSUM=ac116844f66aefe29587214272054668

if [ -f $FILE ]; then
  echo "File already exists. Checking md5..."
  os=`uname -s`
  if [ "$os" = "Linux" ]; then
    checksum=`md5sum $FILE | awk '{ print $1 }'`
  elif [ "$os" = "Darwin" ]; then
    checksum=`cat $FILE | md5`
  fi
  if [ "$checksum" = "$CHECKSUM" ]; then
    echo "Checksum is correct. No need to download."
    exit 0
  else
    echo "Checksum is incorrect. Need to download again."
  fi
fi

echo "Downloading Faster R-CNN demo models (695M)..."

wget $URL -O $FILE

echo "Unzipping..."

tar zxvf $FILE

echo "Done. Please run this command again to verify that checksum = $CHECKSUM."


================================================
FILE: data/scripts/fetch_imagenet_models.sh
================================================
#!/bin/bash

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
cd $DIR

FILE=imagenet_models.tgz
URL=http://www.cs.berkeley.edu/~rbg/faster-rcnn-data/$FILE
CHECKSUM=ed34ca912d6782edfb673a8c3a0bda6d

if [ -f $FILE ]; then
  echo "File already exists. Checking md5..."
  os=`uname -s`
  if [ "$os" = "Linux" ]; then
    checksum=`md5sum $FILE | awk '{ print $1 }'`
  elif [ "$os" = "Darwin" ]; then
    checksum=`cat $FILE | md5`
  fi
  if [ "$checksum" = "$CHECKSUM" ]; then
    echo "Checksum is correct. No need to download."
    exit 0
  else
    echo "Checksum is incorrect. Need to download again."
  fi
fi

echo "Downloading pretrained ImageNet models (1G)..."

wget $URL -O $FILE

echo "Unzipping..."

tar zxvf $FILE

echo "Done. Please run this command again to verify that checksum = $CHECKSUM."


================================================
FILE: data/scripts/fetch_selective_search_data.sh
================================================
#!/bin/bash

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../" && pwd )"
cd $DIR

FILE=selective_search_data.tgz
URL=http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/$FILE
CHECKSUM=7078c1db87a7851b31966b96774cd9b9

if [ -f $FILE ]; then
  echo "File already exists. Checking md5..."
  os=`uname -s`
  if [ "$os" = "Linux" ]; then
    checksum=`md5sum $FILE | awk '{ print $1 }'`
  elif [ "$os" = "Darwin" ]; then
    checksum=`cat $FILE | md5`
  fi
  if [ "$checksum" = "$CHECKSUM" ]; then
    echo "Checksum is correct. No need to download."
    exit 0
  else
    echo "Checksum is incorrect. Need to download again."
  fi
fi

echo "Downloading precomputed selective search boxes (0.5G)..."

wget $URL -O $FILE

echo "Unzipping..."

tar zxvf $FILE

echo "Done. Please run this command again to verify that checksum = $CHECKSUM."


================================================
FILE: experiments/README.md
================================================
Scripts are under `experiments/scripts`.

Each script saves a log file under `experiments/logs`.

Configuration override files used in the experiments are stored in `experiments/cfgs`.


================================================
FILE: experiments/cfgs/faster_rcnn_alt_opt.yml
================================================
EXP_DIR: faster_rcnn_alt_opt
TRAIN:
  BG_THRESH_LO: 0.0
TEST:
  HAS_RPN: True


================================================
FILE: experiments/cfgs/faster_rcnn_end2end.yml
================================================
EXP_DIR: faster_rcnn_end2end
TRAIN:
  HAS_RPN: True
  IMS_PER_BATCH: 1
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
TEST:
  HAS_RPN: True


================================================
FILE: experiments/cfgs/faster_rcnn_end2end_ohem.yml
================================================
EXP_DIR: faster_rcnn_end2end_ohem
TRAIN:
  HAS_RPN: True
  IMS_PER_BATCH: 1
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
  BATCH_SIZE: 64
  RPN_POST_NMS_TOP_N: 700
  USE_OHEM: True
  ASPECT_GROUPING: False
TEST:
  HAS_RPN: True


================================================
FILE: experiments/logs/.gitignore
================================================
*.txt*


================================================
FILE: experiments/scripts/fast_rcnn.sh
================================================
#!/bin/bash
# Usage:
# ./experiments/scripts/fast_rcnn.sh GPU NET DATASET [options args to {train,test}_net.py]
# DATASET is either pascal_voc or coco.
#
# Example:
# ./experiments/scripts/fast_rcnn.sh 0 VGG_CNN_M_1024 pascal_voc \
#   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"

set -x
set -e

export PYTHONUNBUFFERED="True"

GPU_ID=$1
NET=$2
NET_lc=${NET,,}
DATASET=$3

array=( $@ )
len=${#array[@]}
EXTRA_ARGS=${array[@]:3:$len}
EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}

case $DATASET in
  pascal_voc)
    TRAIN_IMDB="voc_2007_trainval"
    TEST_IMDB="voc_2007_test"
    PT_DIR="pascal_voc"
    ITERS=40000
    ;;
  coco)
    TRAIN_IMDB="coco_2014_train"
    TEST_IMDB="coco_2014_minival"
    PT_DIR="coco"
    ITERS=280000
    ;;
  *)
    echo "No dataset given"
    exit
    ;;
esac

LOG="experiments/logs/fast_rcnn_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
exec &> >(tee -a "$LOG")
echo Logging output to "$LOG"

time ./tools/train_net.py --gpu ${GPU_ID} \
  --solver models/${PT_DIR}/${NET}/fast_rcnn/solver.prototxt \
  --weights data/imagenet_models/${NET}.v2.caffemodel \
  --imdb ${TRAIN_IMDB} \
  --iters ${ITERS} \
  ${EXTRA_ARGS}

set +x
NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
set -x

time ./tools/test_net.py --gpu ${GPU_ID} \
  --def models/${PT_DIR}/${NET}/fast_rcnn/test.prototxt \
  --net ${NET_FINAL} \
  --imdb ${TEST_IMDB} \
  ${EXTRA_ARGS}


================================================
FILE: experiments/scripts/faster_rcnn_alt_opt.sh
================================================
#!/bin/bash
# Usage:
# ./experiments/scripts/faster_rcnn_alt_opt.sh GPU NET DATASET [options args to {train,test}_net.py]
# DATASET is only pascal_voc for now
#
# Example:
# ./experiments/scripts/faster_rcnn_alt_opt.sh 0 VGG_CNN_M_1024 pascal_voc \
#   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"

set -x
set -e

export PYTHONUNBUFFERED="True"

GPU_ID=$1
NET=$2
NET_lc=${NET,,}
DATASET=$3

array=( $@ )
len=${#array[@]}
EXTRA_ARGS=${array[@]:3:$len}
EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}

case $DATASET in
  pascal_voc)
    TRAIN_IMDB="voc_2007_trainval"
    TEST_IMDB="voc_2007_test"
    PT_DIR="pascal_voc"
    ITERS=40000
    ;;
  coco)
    echo "Not implemented: use experiments/scripts/faster_rcnn_end2end.sh for coco"
    exit
    ;;
  *)
    echo "No dataset given"
    exit
    ;;
esac

LOG="experiments/logs/faster_rcnn_alt_opt_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
exec &> >(tee -a "$LOG")
echo Logging output to "$LOG"

time ./tools/train_faster_rcnn_alt_opt.py --gpu ${GPU_ID} \
  --net_name ${NET} \
  --weights data/imagenet_models/${NET}.v2.caffemodel \
  --imdb ${TRAIN_IMDB} \
  --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
  ${EXTRA_ARGS}

set +x
NET_FINAL=`grep "Final model:" ${LOG} | awk '{print $3}'`
set -x

time ./tools/test_net.py --gpu ${GPU_ID} \
  --def models/${PT_DIR}/${NET}/faster_rcnn_alt_opt/faster_rcnn_test.pt \
  --net ${NET_FINAL} \
  --imdb ${TEST_IMDB} \
  --cfg experiments/cfgs/faster_rcnn_alt_opt.yml \
  ${EXTRA_ARGS}


================================================
FILE: experiments/scripts/faster_rcnn_end2end.sh
================================================
#!/bin/bash
# Usage:
# ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py]
# DATASET is either pascal_voc or coco.
#
# Example:
# ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \
#   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"

set -x
set -e

export PYTHONUNBUFFERED="True"

GPU_ID=$1
NET=$2
NET_lc=${NET,,}
DATASET=$3

array=( $@ )
len=${#array[@]}
EXTRA_ARGS=${array[@]:3:$len}
EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}

case $DATASET in
  pascal_voc)
    TRAIN_IMDB="voc_2007_trainval"
    TEST_IMDB="voc_2007_test"
    PT_DIR="pascal_voc"
    ITERS=70000
    ;;
  coco)
    # This is a very long and slow training schedule
    # You can probably use fewer iterations and reduce the
    # time to the LR drop (set in the solver to 350,000 iterations).
    TRAIN_IMDB="coco_2014_train"
    TEST_IMDB="coco_2014_minival"
    PT_DIR="coco"
    ITERS=490000
    ;;
  *)
    echo "No dataset given"
    exit
    ;;
esac

LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
exec &> >(tee -a "$LOG")
echo Logging output to "$LOG"

time ./tools/train_net.py --gpu ${GPU_ID} \
  --solver models/${PT_DIR}/${NET}/faster_rcnn_end2end/solver.prototxt \
  --weights data/imagenet_models/${NET}.v2.caffemodel \
  --imdb ${TRAIN_IMDB} \
  --iters ${ITERS} \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml \
  ${EXTRA_ARGS}

set +x
NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
set -x

time ./tools/test_net.py --gpu ${GPU_ID} \
  --def models/${PT_DIR}/${NET}/faster_rcnn_end2end/test.prototxt \
  --net ${NET_FINAL} \
  --imdb ${TEST_IMDB} \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml \
  ${EXTRA_ARGS}


================================================
FILE: experiments/scripts/test_resnet101_bn_scale_merged_0712_end2end.sh
================================================
./tools/test_net.py --gpu 1 \
  --def models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt \
  --net output/faster_rcnn_end2end/voc_0712_trainval/resnet101_faster_rcnn_bn_scale_merged_end2end_iter_70000.caffemodel \
  --imdb voc_0712_test \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml \


================================================
FILE: experiments/scripts/test_resnet101_bn_scale_merged_0712_end2end_ohem.sh
================================================
./tools/test_net.py --gpu 1 \
  --def models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt \
  --net output/faster_rcnn_end2end_ohem/voc_0712_trainval/resnet101_faster_rcnn_bn_scale_merged_end2end_ohem_iter_70000.caffemodel \
  --imdb voc_0712_test \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml \



================================================
FILE: experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end.sh
================================================
./tools/train_net.py --gpu 1 \
  --solver models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/solver.prototxt \
  --weights ResNet-101-BN-SCALE-Merged/ResNet101_BN_SCALE_Merged.caffemodel \
  --imdb voc_0712_trainval \
  --iters 70000 \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml

================================================
FILE: experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end_ohem.sh
================================================
./tools/train_net.py --gpu 1 \
  --solver models/pascal_voc/ResNet101_BN_SCALE_Merged_OHEM/faster_rcnn_end2end_ohem/solver.prototxt \
  --weights ResNet-101-BN-SCALE-Merged/ResNet101_BN_SCALE_Merged.caffemodel \
  --imdb voc_0712_trainval \
  --iters 70000 \
  --cfg experiments/cfgs/faster_rcnn_end2end_ohem.yml

================================================
FILE: lib/Makefile
================================================
all:
	python setup.py build_ext --inplace
	rm -rf build


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m
================================================
function VOCopts = get_voc_opts(path)

tmp = pwd;
cd(path);
try
  addpath('VOCcode');
  VOCinit;
catch
  rmpath('VOCcode');
  cd(tmp);
  error(sprintf('VOCcode directory not found under %s', path));
end
rmpath('VOCcode');
cd(tmp);


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m
================================================
function res = voc_eval(path, comp_id, test_set, output_dir)

VOCopts = get_voc_opts(path);
VOCopts.testset = test_set;

for i = 1:length(VOCopts.classes)
  cls = VOCopts.classes{i};
  res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
end

fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
fprintf('Results:\n');
aps = [res(:).ap]';
fprintf('%.1f\n', aps * 100);
fprintf('%.1f\n', mean(aps) * 100);
fprintf('~~~~~~~~~~~~~~~~~~~~\n');

function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)

test_set = VOCopts.testset;
year = VOCopts.dataset(4:end);

addpath(fullfile(VOCopts.datadir, 'VOCcode'));

res_fn = sprintf(VOCopts.detrespath, comp_id, cls);

recall = [];
prec = [];
ap = 0;
ap_auc = 0;

do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
if do_eval
  % Bug in VOCevaldet requires that tic has been called first
  tic;
  [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
  ap_auc = xVOCap(recall, prec);

  % force plot limits
  ylim([0 1]);
  xlim([0 1]);

  print(gcf, '-djpeg', '-r0', ...
        [output_dir '/' cls '_pr.jpg']);
end
fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);

res.recall = recall;
res.prec = prec;
res.ap = ap;
res.ap_auc = ap_auc;

save([output_dir '/' cls '_pr.mat'], ...
     'res', 'recall', 'prec', 'ap', 'ap_auc');

rmpath(fullfile(VOCopts.datadir, 'VOCcode'));


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m
================================================
function ap = xVOCap(rec,prec)
% From the PASCAL VOC 2011 devkit

mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
    mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


================================================
FILE: lib/datasets/__init__.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------


================================================
FILE: lib/datasets/coco.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
from fast_rcnn.config import cfg
import os.path as osp
import sys
import os
import numpy as np
import scipy.sparse
import scipy.io as sio
import cPickle
import json
import uuid
# COCO API
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as COCOmask

def _filter_crowd_proposals(roidb, crowd_thresh):
    """
    Finds proposals that are inside crowd regions and marks them with
    overlap = -1 (for all gt rois), which means they will be excluded from
    training.
    """
    for ix, entry in enumerate(roidb):
        overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(overlaps.max(axis=1) == -1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        iscrowd = [int(True) for _ in xrange(len(crowd_inds))]
        crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        overlaps[non_gt_inds[bad_inds], :] = -1
        roidb[ix]['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps)
    return roidb

class coco(imdb):
    def __init__(self, image_set, year):
        imdb.__init__(self, 'coco_' + year + '_' + image_set)
        # COCO specific config options
        self.config = {'top_k' : 2000,
                       'use_salt' : True,
                       'cleanup' : True,
                       'crowd_thresh' : 0.7,
                       'min_size' : 2}
        # name, paths
        self._year = year
        self._image_set = image_set
        self._data_path = osp.join(cfg.DATA_DIR, 'coco')
        # load COCO API, classes, class <-> id mappings
        self._COCO = COCO(self._get_ann_file())
        cats = self._COCO.loadCats(self._COCO.getCatIds())
        self._classes = tuple(['__background__'] + [c['name'] for c in cats])
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats],
                                              self._COCO.getCatIds()))
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        self.set_proposal_method('selective_search')
        self.competition_mode(False)

        # Some image sets are "views" (i.e. subsets) into others.
        # For example, minival2014 is a random 5000 image subset of val2014.
        # This mapping tells us where the view's images and proposals come from.
        self._view_map = {
            'minival2014' : 'val2014',          # 5k val2014 subset
            'valminusminival2014' : 'val2014',  # val2014 \setminus minival2014
        }
        coco_name = image_set + year  # e.g., "val2014"
        self._data_name = (self._view_map[coco_name]
                           if self._view_map.has_key(coco_name)
                           else coco_name)
        # Dataset splits that have ground-truth annotations (test splits
        # do not have gt annotations)
        self._gt_splits = ('train', 'val', 'minival')

    def _get_ann_file(self):
        prefix = 'instances' if self._image_set.find('test') == -1 \
                             else 'image_info'
        return osp.join(self._data_path, 'annotations',
                        prefix + '_' + self._image_set + self._year + '.json')

    def _load_image_set_index(self):
        """
        Load image ids.
        """
        image_ids = self._COCO.getImgIds()
        return image_ids

    def _get_widths(self):
        anns = self._COCO.loadImgs(self._image_index)
        widths = [ann['width'] for ann in anns]
        return widths

    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        # Example image path for index=119993:
        #   images/train2014/COCO_train2014_000000119993.jpg
        file_name = ('COCO_' + self._data_name + '_' +
                     str(index).zfill(12) + '.jpg')
        image_path = osp.join(self._data_path, 'images',
                              self._data_name, file_name)
        assert osp.exists(image_path), \
                'Path does not exist: {}'.format(image_path)
        return image_path

    def selective_search_roidb(self):
        return self._roidb_from_proposals('selective_search')

    def edge_boxes_roidb(self):
        return self._roidb_from_proposals('edge_boxes_AR')

    def mcg_roidb(self):
        return self._roidb_from_proposals('MCG')

    def _roidb_from_proposals(self, method):
        """
        Creates a roidb from pre-computed proposals of a particular methods.
        """
        top_k = self.config['top_k']
        cache_file = osp.join(self.cache_path, self.name +
                              '_{:s}_top{:d}'.format(method, top_k) +
                              '_roidb.pkl')

        if osp.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{:s} {:s} roidb loaded from {:s}'.format(self.name, method,
                                                            cache_file)
            return roidb

        if self._image_set in self._gt_splits:
            gt_roidb = self.gt_roidb()
            method_roidb = self._load_proposals(method, gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, method_roidb)
            # Make sure we don't use proposals that are contained in crowds
            roidb = _filter_crowd_proposals(roidb, self.config['crowd_thresh'])
        else:
            roidb = self._load_proposals(method, None)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote {:s} roidb to {:s}'.format(method, cache_file)
        return roidb

    def _load_proposals(self, method, gt_roidb):
        """
        Load pre-computed proposals in the format provided by Jan Hosang:
        http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
          computing/research/object-recognition-and-scene-understanding/how-
          good-are-detection-proposals-really/
        For MCG, use boxes from http://www.eecs.berkeley.edu/Research/Projects/
          CS/vision/grouping/mcg/ and convert the file layout using
        lib/datasets/tools/mcg_munge.py.
        """
        box_list = []
        top_k = self.config['top_k']
        valid_methods = [
            'MCG',
            'selective_search',
            'edge_boxes_AR',
            'edge_boxes_70']
        assert method in valid_methods

        print 'Loading {} boxes'.format(method)
        for i, index in enumerate(self._image_index):
            if i % 1000 == 0:
                print '{:d} / {:d}'.format(i + 1, len(self._image_index))

            box_file = osp.join(
                cfg.DATA_DIR, 'coco_proposals', method, 'mat',
                self._get_box_file(index))

            raw_data = sio.loadmat(box_file)['boxes']
            boxes = np.maximum(raw_data - 1, 0).astype(np.uint16)
            if method == 'MCG':
                # Boxes from the MCG website are in (y1, x1, y2, x2) order
                boxes = boxes[:, (1, 0, 3, 2)]
            # Remove duplicate boxes and very small boxes and then take top k
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            boxes = boxes[:top_k, :]
            box_list.append(boxes)
            # Sanity check
            im_ann = self._COCO.loadImgs(index)[0]
            width = im_ann['width']
            height = im_ann['height']
            ds_utils.validate_boxes(boxes, width=width, height=height)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.
        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if osp.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            return roidb

        gt_roidb = [self._load_coco_annotation(index)
                    for index in self._image_index]

        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote gt roidb to {}'.format(cache_file)
        return gt_roidb

    def _load_coco_annotation(self, index):
        """
        Loads COCO bounding-box instance annotations. Crowd instances are
        handled by marking their overlaps (with all categories) to -1. This
        overlap value means that crowd "instances" are excluded from training.
        """
        im_ann = self._COCO.loadImgs(index)[0]
        width = im_ann['width']
        height = im_ann['height']

        annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
        objs = self._COCO.loadAnns(annIds)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        for obj in objs:
            x1 = np.max((0, obj['bbox'][0]))
            y1 = np.max((0, obj['bbox'][1]))
            x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
            y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
            if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
        objs = valid_objs
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Lookup table to map from COCO category ids to our internal class
        # indices
        coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls],
                                          self._class_to_ind[cls])
                                         for cls in self._classes[1:]])

        for ix, obj in enumerate(objs):
            cls = coco_cat_id_to_class_ind[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                overlaps[ix, :] = -1.0
            else:
                overlaps[ix, cls] = 1.0

        ds_utils.validate_boxes(boxes, width=width, height=height)
        overlaps = scipy.sparse.csr_matrix(overlaps)
        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : seg_areas}

    def _get_box_file(self, index):
        # first 14 chars / first 22 chars / all chars + .mat
        # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
        file_name = ('COCO_' + self._data_name +
                     '_' + str(index).zfill(12) + '.mat')
        return osp.join(file_name[:14], file_name[:22], file_name)

    def _print_detection_eval_metrics(self, coco_eval):
        IoU_lo_thresh = 0.5
        IoU_hi_thresh = 0.95
        def _get_thr_ind(coco_eval, thr):
            ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
                           (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
            iou_thr = coco_eval.params.iouThrs[ind]
            assert np.isclose(iou_thr, thr)
            return ind

        ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
        ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
        # precision has dims (iou, recall, cls, area range, max dets)
        # area range index 0: all area ranges
        # max dets index 2: 100 per image
        precision = \
            coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
        ap_default = np.mean(precision[precision > -1])
        print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
               '~~~~').format(IoU_lo_thresh, IoU_hi_thresh)
        print '{:.1f}'.format(100 * ap_default)
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            # minus 1 because of __background__
            precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
            ap = np.mean(precision[precision > -1])
            print '{:.1f}'.format(100 * ap)

        print '~~~~ Summary metrics ~~~~'
        coco_eval.summarize()

    def _do_detection_eval(self, res_file, output_dir):
        ann_type = 'bbox'
        coco_dt = self._COCO.loadRes(res_file)
        coco_eval = COCOeval(self._COCO, coco_dt)
        coco_eval.params.useSegm = (ann_type == 'segm')
        coco_eval.evaluate()
        coco_eval.accumulate()
        self._print_detection_eval_metrics(coco_eval)
        eval_file = osp.join(output_dir, 'detection_results.pkl')
        with open(eval_file, 'wb') as fid:
            cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL)
        print 'Wrote COCO eval results to: {}'.format(eval_file)

    def _coco_results_one_category(self, boxes, cat_id):
        results = []
        for im_ind, index in enumerate(self.image_index):
            dets = boxes[im_ind].astype(np.float)
            if dets == []:
                continue
            scores = dets[:, -1]
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            results.extend(
              [{'image_id' : index,
                'category_id' : cat_id,
                'bbox' : [xs[k], ys[k], ws[k], hs[k]],
                'score' : scores[k]} for k in xrange(dets.shape[0])])
        return results

    def _write_coco_results_file(self, all_boxes, res_file):
        # [{"image_id": 42,
        #   "category_id": 18,
        #   "bbox": [258.15,41.29,348.26,243.78],
        #   "score": 0.236}, ...]
        results = []
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            print 'Collecting {} results ({:d}/{:d})'.format(cls, cls_ind,
                                                          self.num_classes - 1)
            coco_cat_id = self._class_to_coco_cat_id[cls]
            results.extend(self._coco_results_one_category(all_boxes[cls_ind],
                                                           coco_cat_id))
        print 'Writing results json to {}'.format(res_file)
        with open(res_file, 'w') as fid:
            json.dump(results, fid)

    def evaluate_detections(self, all_boxes, output_dir):
        res_file = osp.join(output_dir, ('detections_' +
                                         self._image_set +
                                         self._year +
                                         '_results'))
        if self.config['use_salt']:
            res_file += '_{}'.format(str(uuid.uuid4()))
        res_file += '.json'
        self._write_coco_results_file(all_boxes, res_file)
        # Only do evaluation on non-test sets
        if self._image_set.find('test') == -1:
            self._do_detection_eval(res_file, output_dir)
        # Optionally cleanup results json file
        if self.config['cleanup']:
            os.remove(res_file)

    def competition_mode(self, on):
        if on:
            self.config['use_salt'] = False
            self.config['cleanup'] = False
        else:
            self.config['use_salt'] = True
            self.config['cleanup'] = True


================================================
FILE: lib/datasets/ds_utils.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np

def unique_boxes(boxes, scale=1.0):
    """Return indices of unique boxes."""
    v = np.array([1, 1e3, 1e6, 1e9])
    hashes = np.round(boxes * scale).dot(v)
    _, index = np.unique(hashes, return_index=True)
    return np.sort(index)

def xywh_to_xyxy(boxes):
    """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
    return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))

def xyxy_to_xywh(boxes):
    """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
    return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))

def validate_boxes(boxes, width=0, height=0):
    """Check that a set of boxes are valid."""
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    assert (x1 >= 0).all()
    assert (y1 >= 0).all()
    assert (x2 >= x1).all()
    assert (y2 >= y1).all()
    assert (x2 < width).all()
    assert (y2 < height).all()

def filter_small_boxes(boxes, min_size):
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]
    keep = np.where((w >= min_size) & (h > min_size))[0]
    return keep


================================================
FILE: lib/datasets/factory.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Factory method for easily getting imdbs by name."""

__sets = {}

from datasets.pascal_voc import pascal_voc
from datasets.coco import coco
import numpy as np

# Set up voc_<year>_<split> using selective search "fast" mode
for year in ['2007', '2012', '0712']:
    for split in ['train', 'val', 'trainval', 'test']:
        name = 'voc_{}_{}'.format(year, split)
        __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))

# Set up coco_2014_<split>
for year in ['2014']:
    for split in ['train', 'val', 'minival', 'valminusminival']:
        name = 'coco_{}_{}'.format(year, split)
        __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up coco_2015_<split>
for year in ['2015']:
    for split in ['test', 'test-dev']:
        name = 'coco_{}_{}'.format(year, split)
        __sets[name] = (lambda split=split, year=year: coco(split, year))

def get_imdb(name):
    """Get an imdb (image database) by name."""
    if not __sets.has_key(name):
        raise KeyError('Unknown dataset: {}'.format(name))
    return __sets[name]()

def list_imdbs():
    """List all registered imdbs."""
    return __sets.keys()


================================================
FILE: lib/datasets/imdb.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import os
import os.path as osp
import PIL
from utils.cython_bbox import bbox_overlaps
import numpy as np
import scipy.sparse
from fast_rcnn.config import cfg

class imdb(object):
    """Image database."""

    def __init__(self, name):
        self._name = name
        self._num_classes = 0
        self._classes = []
        self._image_index = []
        self._obj_proposer = 'selective_search'
        self._roidb = None
        self._roidb_handler = self.default_roidb
        # Use this dict for storing dataset specific config options
        self.config = {}

    @property
    def name(self):
        return self._name

    @property
    def num_classes(self):
        return len(self._classes)

    @property
    def classes(self):
        return self._classes

    @property
    def image_index(self):
        return self._image_index

    @property
    def roidb_handler(self):
        return self._roidb_handler

    @roidb_handler.setter
    def roidb_handler(self, val):
        self._roidb_handler = val

    def set_proposal_method(self, method):
        method = eval('self.' + method + '_roidb')
        self.roidb_handler = method

    @property
    def roidb(self):
        # A roidb is a list of dictionaries, each with the following keys:
        #   boxes
        #   gt_overlaps
        #   gt_classes
        #   flipped
        if self._roidb is not None:
            return self._roidb
        self._roidb = self.roidb_handler()
        return self._roidb

    @property
    def cache_path(self):
        cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)
        return cache_path

    @property
    def num_images(self):
      return len(self.image_index)

    def image_path_at(self, i):
        raise NotImplementedError

    def default_roidb(self):
        raise NotImplementedError

    def evaluate_detections(self, all_boxes, output_dir=None):
        """
        all_boxes is a list of length number-of-classes.
        Each list element is a list of length number-of-images.
        Each of those list elements is either an empty list []
        or a numpy array of detection.

        all_boxes[class][image] = [] or np.array of shape #dets x 5
        """
        raise NotImplementedError

    def _get_widths(self):
      return [PIL.Image.open(self.image_path_at(i)).size[0]
              for i in xrange(self.num_images)]

    def append_flipped_images(self):
        num_images = self.num_images
        widths = self._get_widths()
        for i in xrange(num_images):
            boxes = self.roidb[i]['boxes'].copy()
            oldx1 = boxes[:, 0].copy()
            oldx2 = boxes[:, 2].copy()
            boxes[:, 0] = widths[i] - oldx2 - 1
            boxes[:, 2] = widths[i] - oldx1 - 1
            assert (boxes[:, 2] >= boxes[:, 0]).all()
            entry = {'boxes' : boxes,
                     'gt_overlaps' : self.roidb[i]['gt_overlaps'],
                     'gt_classes' : self.roidb[i]['gt_classes'],
                     'flipped' : True}
            self.roidb.append(entry)
        self._image_index = self._image_index * 2

    def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                        area='all', limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
                  '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
        area_ranges = [ [0**2, 1e5**2],    # all
                        [0**2, 32**2],     # small
                        [32**2, 96**2],    # medium
                        [96**2, 1e5**2],   # large
                        [96**2, 128**2],   # 96-128
                        [128**2, 256**2],  # 128-256
                        [256**2, 512**2],  # 256-512
                        [512**2, 1e5**2],  # 512-inf
                      ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                               (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                                     (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert(gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert(_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps}

    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes' : boxes,
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb

    @staticmethod
    def merge_roidbs(a, b):
        assert len(a) == len(b)
        for i in xrange(len(a)):
            a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
            a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
                                            b[i]['gt_classes']))
            a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
                                                       b[i]['gt_overlaps']])
            a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
                                           b[i]['seg_areas']))
        return a

    def competition_mode(self, on):
        """Turn competition mode on or off."""
        pass


================================================
FILE: lib/datasets/pascal_voc.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import os
from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import scipy.io as sio
import utils.cython_bbox
import cPickle
import subprocess
import uuid
from voc_eval import voc_eval
from fast_rcnn.config import cfg

class pascal_voc(imdb):
    def __init__(self, image_set, year, devkit_path=None):
        imdb.__init__(self, 'voc_' + year + '_' + image_set)
        self._year = year
        self._image_set = image_set
        self._devkit_path = self._get_default_path() if devkit_path is None \
                            else devkit_path
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = ('__background__', # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._image_ext = '.jpg'
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        self._roidb_handler = self.selective_search_roidb
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'

        # PASCAL specific config options
        self.config = {'cleanup'     : True,
                       'use_salt'    : True,
                       'use_diff'    : False,
                       'matlab_eval' : False,
                       'rpn_file'    : None,
                       'min_size'    : 2}

        assert os.path.exists(self._devkit_path), \
                'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
                'Path does not exist: {}'.format(self._data_path)

    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        image_path = os.path.join(self._data_path, 'JPEGImages',
                                  index + self._image_ext)
        assert os.path.exists(image_path), \
                'Path does not exist: {}'.format(image_path)
        return image_path

    def _load_image_set_index(self):
        """
        Load the indexes listed in this dataset's image set file.
        """
        # Example path to image set file:
        # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
        image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
                                      self._image_set + '.txt')
        assert os.path.exists(image_set_file), \
                'Path does not exist: {}'.format(image_set_file)
        with open(image_set_file) as f:
            image_index = [x.strip() for x in f.readlines()]
        return image_index

    def _get_default_path(self):
        """
        Return the default path where PASCAL VOC is expected to be installed.
        """
        return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            return roidb

        gt_roidb = [self._load_pascal_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote gt roidb to {}'.format(cache_file)

        return gt_roidb

    def selective_search_roidb(self):
        """
        Return the database of selective search regions of interest.
        Ground-truth ROIs are also included.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path,
                                  self.name + '_selective_search_roidb.pkl')

        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} ss roidb loaded from {}'.format(self.name, cache_file)
            return roidb

        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            ss_roidb = self._load_selective_search_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
        else:
            roidb = self._load_selective_search_roidb(None)
        with open(cache_file, 'wb') as fid:
            cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote ss roidb to {}'.format(cache_file)

        return roidb

    def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
        else:
            roidb = self._load_rpn_roidb(None)

        return roidb

    def _load_rpn_roidb(self, gt_roidb):
        filename = self.config['rpn_file']
        print 'loading {}'.format(filename)
        assert os.path.exists(filename), \
               'rpn data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            box_list = cPickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

    def _load_selective_search_roidb(self, gt_roidb):
        filename = os.path.abspath(os.path.join(cfg.DATA_DIR,
                                                'selective_search_data',
                                                self.name + '.mat'))
        assert os.path.exists(filename), \
               'Selective search data not found at: {}'.format(filename)
        raw_data = sio.loadmat(filename)['boxes'].ravel()

        box_list = []
        for i in xrange(raw_data.shape[0]):
            boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            box_list.append(boxes)

        return self.create_roidb_from_box_list(box_list, gt_roidb)

    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        tree = ET.parse(filename)
        objs = tree.findall('object')
        if not self.config['use_diff']:
            # Exclude the samples labeled as difficult
            non_diff_objs = [
                obj for obj in objs if int(obj.find('difficult').text) == 0]
            # if len(non_diff_objs) != len(objs):
            #     print 'Removed {} difficult objects'.format(
            #         len(objs) - len(non_diff_objs))
            objs = non_diff_objs
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1
            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : seg_areas}

    def _get_comp_id(self):
        comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
            else self._comp_id)
        return comp_id

    def _get_voc_results_file_template(self):
        # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
        filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
        path = os.path.join(
            self._devkit_path,
            'results',
            'VOC' + self._year,
            'Main',
            filename)
        return path

    def _write_voc_results_file(self, all_boxes):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            print 'Writing {} VOC results file'.format(cls)
            filename = self._get_voc_results_file_template().format(cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(self.image_index):
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in xrange(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, -1],
                                       dets[k, 0] + 1, dets[k, 1] + 1,
                                       dets[k, 2] + 1, dets[k, 3] + 1))

    def _do_python_eval(self, output_dir = 'output'):
        annopath = os.path.join(
            self._devkit_path,
            'VOC' + self._year,
            'Annotations',
            '{:s}.xml')
        imagesetfile = os.path.join(
            self._devkit_path,
            'VOC' + self._year,
            'ImageSets',
            'Main',
            self._image_set + '.txt')
        cachedir = os.path.join(self._devkit_path, 'annotations_cache')
        aps = []
        # The PASCAL VOC metric changed in 2010
        use_07_metric = True if int(self._year) < 2010 else False
        print 'VOC07 metric? ' + ('Yes' if use_07_metric else 'No')
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        for i, cls in enumerate(self._classes):
            if cls == '__background__':
                continue
            filename = self._get_voc_results_file_template().format(cls)
            rec, prec, ap = voc_eval(
                filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
                use_07_metric=use_07_metric)
            aps += [ap]
            print('AP for {} = {:.4f}'.format(cls, ap))
            with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f:
                cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
        print('Mean AP = {:.4f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('Results:')
        for ap in aps:
            print('{:.3f}'.format(ap))
        print('{:.3f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('')
        print('--------------------------------------------------------------')
        print('Results computed with the **unofficial** Python eval code.')
        print('Results should be very close to the official MATLAB eval code.')
        print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
        print('-- Thanks, The Management')
        print('--------------------------------------------------------------')

    def _do_matlab_eval(self, output_dir='output'):
        print '-----------------------------------------------------'
        print 'Computing results with the official MATLAB eval code.'
        print '-----------------------------------------------------'
        path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets',
                            'VOCdevkit-matlab-wrapper')
        cmd = 'cd {} && '.format(path)
        cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
        cmd += '-r "dbstop if error; '
        cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
               .format(self._devkit_path, self._get_comp_id(),
                       self._image_set, output_dir)
        print('Running:\n{}'.format(cmd))
        status = subprocess.call(cmd, shell=True)

    def evaluate_detections(self, all_boxes, output_dir):
        self._write_voc_results_file(all_boxes)
        self._do_python_eval(output_dir)
        if self.config['matlab_eval']:
            self._do_matlab_eval(output_dir)
        if self.config['cleanup']:
            for cls in self._classes:
                if cls == '__background__':
                    continue
                filename = self._get_voc_results_file_template().format(cls)
                os.remove(filename)

    def competition_mode(self, on):
        if on:
            self.config['use_salt'] = False
            self.config['cleanup'] = False
        else:
            self.config['use_salt'] = True
            self.config['cleanup'] = True

if __name__ == '__main__':
    from datasets.pascal_voc import pascal_voc
    d = pascal_voc('trainval', '2007')
    res = d.roidb
    from IPython import embed; embed()


================================================
FILE: lib/datasets/tools/mcg_munge.py
================================================
import os
import sys

"""Hacky tool to convert file system layout of MCG boxes downloaded from
http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
so that it's consistent with those computed by Jan Hosang (see:
http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
  computing/research/object-recognition-and-scene-understanding/how-
  good-are-detection-proposals-really/)

NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
"""

def munge(src_dir):
    # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
    # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat

    files = os.listdir(src_dir)
    for fn in files:
        base, ext = os.path.splitext(fn)
        # first 14 chars / first 22 chars / all chars + .mat
        # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
        first = base[:14]
        second = base[:22]
        dst_dir = os.path.join('MCG', 'mat', first, second)
        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)
        src = os.path.join(src_dir, fn)
        dst = os.path.join(dst_dir, fn)
        print 'MV: {} -> {}'.format(src, dst)
        os.rename(src, dst)

if __name__ == '__main__':
    # src_dir should look something like:
    #  src_dir = 'MCG-COCO-val2014-boxes'
    src_dir = sys.argv[1]
    munge(src_dir)


================================================
FILE: lib/datasets/voc_eval.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------

import xml.etree.ElementTree as ET
import os
import cPickle
import numpy as np

def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    return objects

def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric])
    Compute VOC AP given precision and recall.
    If use_07_metric is true, uses the
    VOC 07 11 point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    if not os.path.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                print 'Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames))
        # save
        print 'Saving cached annotations to {:s}'.format(cachefile)
        with open(cachefile, 'w') as f:
            cPickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'r') as f:
            recs = cPickle.load(f)

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap


================================================
FILE: lib/fast_rcnn/__init__.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------


================================================
FILE: lib/fast_rcnn/bbox_transform.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np

def bbox_transform(ex_rois, gt_rois):
    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights

    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights

    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = np.log(gt_widths / ex_widths)
    targets_dh = np.log(gt_heights / ex_heights)

    targets = np.vstack(
        (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
    return targets

def bbox_transform_inv(boxes, deltas):
    if boxes.shape[0] == 0:
        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    boxes = boxes.astype(deltas.dtype, copy=False)

    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]

    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
    pred_w = np.exp(dw) * widths[:, np.newaxis]
    pred_h = np.exp(dh) * heights[:, np.newaxis]

    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
    # x1
    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h

    return pred_boxes

def clip_boxes(boxes, im_shape):
    """
    Clip boxes to image boundaries.
    """

    # x1 >= 0
    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
    # y1 >= 0
    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
    # x2 < im_shape[1]
    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
    # y2 < im_shape[0]
    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
    return boxes


================================================
FILE: lib/fast_rcnn/config.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Fast R-CNN config system.

This file specifies default config options for Fast R-CNN. You should not
change values in this file. Instead, you should write a config file (in yaml)
and use cfg_from_file(yaml_file) to load it and override the default options.

Most tools in $ROOT/tools take a --cfg option to specify an override file.
    - See tools/{train,test}_net.py for example code that uses cfg_from_file()
    - See experiments/cfgs/*.yml for example YAML config override files
"""

import os
import os.path as osp
import numpy as np
# `pip install easydict` if you don't have it
from easydict import EasyDict as edict

__C = edict()
# Consumers can get config by:
#   from fast_rcnn_config import cfg
cfg = __C

#
# Training options
#

__C.TRAIN = edict()

# Scales to use during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TRAIN.SCALES = (600,)

# Max pixel size of the longest side of a scaled input image
__C.TRAIN.MAX_SIZE = 1000

# Images to use per minibatch
__C.TRAIN.IMS_PER_BATCH = 2

# Minibatch size (number of regions of interest [ROIs])
__C.TRAIN.BATCH_SIZE = 128

# Fraction of minibatch that is labeled foreground (i.e. class > 0)
__C.TRAIN.FG_FRACTION = 0.25

# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
__C.TRAIN.FG_THRESH = 0.5

# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.1

# Use horizontally-flipped images during training?
__C.TRAIN.USE_FLIPPED = True

# Train bounding-box regressors
__C.TRAIN.BBOX_REG = True

# Overlap required between a ROI and ground-truth box in order for that ROI to
# be used as a bounding-box regression training example
__C.TRAIN.BBOX_THRESH = 0.5

# Iterations between snapshots
__C.TRAIN.SNAPSHOT_ITERS = 10000

# solver.prototxt specifies the snapshot path prefix, this adds an optional
# infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
__C.TRAIN.SNAPSHOT_INFIX = ''

# Use a prefetch thread in roi_data_layer.layer
# So far I haven't found this useful; likely more engineering work is required
__C.TRAIN.USE_PREFETCH = False

# Normalize the targets (subtract empirical mean, divide by empirical stddev)
__C.TRAIN.BBOX_NORMALIZE_TARGETS = True
# Deprecated (inside weights)
__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Normalize the targets using "precomputed" (or made up) means and stdevs
# (BBOX_NORMALIZE_TARGETS must also be True)
__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)

# Train using these proposals
__C.TRAIN.PROPOSAL_METHOD = 'selective_search'

# Make minibatches from images that have similar aspect ratios (i.e. both
# tall and thin or both short and wide) in order to avoid wasting computation
# on zero-padding.
__C.TRAIN.ASPECT_GROUPING = True

# Use RPN to detect objects
__C.TRAIN.HAS_RPN = False
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5
# Total number of examples
__C.TRAIN.RPN_BATCHSIZE = 256
# NMS threshold used on RPN proposals
__C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
# Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TRAIN.RPN_MIN_SIZE = 16
# Deprecated (outside weights)
__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Give the positive RPN examples weight of p * 1 / {num positives}
# and give negatives a weight of (1 - p)
# Set to -1.0 to use uniform example weighting
__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0

# Parameters for "Online Hard-example Mining Algorithm"
__C.TRAIN.USE_OHEM = False
# For diversity and de-duplication
__C.TRAIN.OHEM_USE_NMS = True
__C.TRAIN.OHEM_NMS_THRESH = 0.7

#
# Testing options
#

__C.TEST = edict()

# Scales to use during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)

# Max pixel size of the longest side of a scaled input image
__C.TEST.MAX_SIZE = 1000

# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
__C.TEST.NMS = 0.3

# Experimental: treat the (K+1) units in the cls_score layer as linear
# predictors (trained, eg, with one-vs-rest SVMs).
__C.TEST.SVM = False

# Test using bounding-box regressors
__C.TEST.BBOX_REG = True

# Propose boxes
__C.TEST.HAS_RPN = False

# Test using these proposals
__C.TEST.PROPOSAL_METHOD = 'selective_search'

## NMS threshold used on RPN proposals
__C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 16


#
# MISC
#

# The mapping from image coordinates to feature map coordinates might cause
# some boxes that are distinct in image space to become identical in feature
# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
# for identifying duplicate boxes.
# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
__C.DEDUP_BOXES = 1./16.

# Pixel mean values (BGR order) as a (1, 1, 3) array
# We use the same pixel mean for all networks even though it's not exactly what
# they were trained with
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])

# For reproducibility
__C.RNG_SEED = 3

# A small number that's used many times
__C.EPS = 1e-14

# Root directory of project
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))

# Data directory
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))

# Model directory
__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))

# Name (or path to) the matlab executable
__C.MATLAB = 'matlab'

# Place outputs under an experiments directory
__C.EXP_DIR = 'default'

# Use GPU implementation of non-maximum suppression
__C.USE_GPU_NMS = True

# Default GPU device id
__C.GPU_ID = 0


def get_output_dir(imdb, net=None):
    """Return the directory where experimental artifacts are placed.
    If the directory does not exist, it is created.

    A canonical path is built using the name from an imdb and a network
    (if not None).
    """
    outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
    if net is not None:
        outdir = osp.join(outdir, net.name)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    return outdir

def _merge_a_into_b(a, b):
    """Merge config dictionary a into config dictionary b, clobbering the
    options in b whenever they are also specified in a.
    """
    if type(a) is not edict:
        return

    for k, v in a.iteritems():
        # a must specify keys that are in b
        if not b.has_key(k):
            raise KeyError('{} is not a valid config key'.format(k))

        # the types must match, too
        old_type = type(b[k])
        if old_type is not type(v):
            if isinstance(b[k], np.ndarray):
                v = np.array(v, dtype=b[k].dtype)
            else:
                raise ValueError(('Type mismatch ({} vs. {}) '
                                'for config key: {}').format(type(b[k]),
                                                            type(v), k))

        # recursively merge dicts
        if type(v) is edict:
            try:
                _merge_a_into_b(a[k], b[k])
            except:
                print('Error under config key: {}'.format(k))
                raise
        else:
            b[k] = v

def cfg_from_file(filename):
    """Load a config file and merge it into the default options."""
    import yaml
    with open(filename, 'r') as f:
        yaml_cfg = edict(yaml.load(f))

    _merge_a_into_b(yaml_cfg, __C)

def cfg_from_list(cfg_list):
    """Set config keys via list (e.g., from command line)."""
    from ast import literal_eval
    assert len(cfg_list) % 2 == 0
    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
        key_list = k.split('.')
        d = __C
        for subkey in key_list[:-1]:
            assert d.has_key(subkey)
            d = d[subkey]
        subkey = key_list[-1]
        assert d.has_key(subkey)
        try:
            value = literal_eval(v)
        except:
            # handle the case when v is a string literal
            value = v
        assert type(value) == type(d[subkey]), \
            'type {} does not match original type {}'.format(
            type(value), type(d[subkey]))
        d[subkey] = value


================================================
FILE: lib/fast_rcnn/nms_wrapper.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

from fast_rcnn.config import cfg
from nms.gpu_nms import gpu_nms
from nms.cpu_nms import cpu_nms

def nms(dets, thresh, force_cpu=False):
    """Dispatch to either CPU or GPU NMS implementations."""

    if dets.shape[0] == 0:
        return []
    if cfg.USE_GPU_NMS and not force_cpu:
        return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
    else:
        return cpu_nms(dets, thresh)


================================================
FILE: lib/fast_rcnn/test.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Test a Fast R-CNN network on an imdb (image database)."""

from fast_rcnn.config import cfg, get_output_dir
from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
import argparse
from utils.timer import Timer
import numpy as np
import cv2
import caffe
from fast_rcnn.nms_wrapper import nms
import cPickle
from utils.blob import im_list_to_blob
import os

def _get_image_blob(im):
    """Converts an image into a network input.

    Arguments:
        im (ndarray): a color image in BGR order

    Returns:
        blob (ndarray): a data blob holding an image pyramid
        im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
    """
    im_orig = im.astype(np.float32, copy=True)
    im_orig -= cfg.PIXEL_MEANS

    im_shape = im_orig.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])

    processed_ims = []
    im_scale_factors = []

    for target_size in cfg.TEST.SCALES:
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
            im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
        im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
        im_scale_factors.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, np.array(im_scale_factors)

def _get_rois_blob(im_rois, im_scale_factors):
    """Converts RoIs into network inputs.

    Arguments:
        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
        im_scale_factors (list): scale factors as returned by _get_image_blob

    Returns:
        blob (ndarray): R x 5 matrix of RoIs in the image pyramid
    """
    rois, levels = _project_im_rois(im_rois, im_scale_factors)
    rois_blob = np.hstack((levels, rois))
    return rois_blob.astype(np.float32, copy=False)

def _project_im_rois(im_rois, scales):
    """Project image RoIs into the image pyramid built by _get_image_blob.

    Arguments:
        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
        scales (list): scale factors as returned by _get_image_blob

    Returns:
        rois (ndarray): R x 4 matrix of projected RoI coordinates
        levels (list): image pyramid levels used by each projected RoI
    """
    im_rois = im_rois.astype(np.float, copy=False)

    if len(scales) > 1:
        widths = im_rois[:, 2] - im_rois[:, 0] + 1
        heights = im_rois[:, 3] - im_rois[:, 1] + 1

        areas = widths * heights
        scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
        diff_areas = np.abs(scaled_areas - 224 * 224)
        levels = diff_areas.argmin(axis=1)[:, np.newaxis]
    else:
        levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)

    rois = im_rois * scales[levels]

    return rois, levels

def _get_blobs(im, rois):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {'data' : None, 'rois' : None}
    blobs['data'], im_scale_factors = _get_image_blob(im)
    if not cfg.TEST.HAS_RPN:
        blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
    return blobs, im_scale_factors

def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes, return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        scores = net.blobs['cls_score'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes

def vis_detections(im, class_name, dets, thresh=0.3):
    """Visual debugging of detections."""
    import matplotlib.pyplot as plt
    im = im[:, :, (2, 1, 0)]
    for i in xrange(np.minimum(10, dets.shape[0])):
        bbox = dets[i, :4]
        score = dets[i, -1]
        if score > thresh:
            plt.cla()
            plt.imshow(im)
            plt.gca().add_patch(
                plt.Rectangle((bbox[0], bbox[1]),
                              bbox[2] - bbox[0],
                              bbox[3] - bbox[1], fill=False,
                              edgecolor='g', linewidth=3)
                )
            plt.title('{}  {:.3f}'.format(class_name, score))
            plt.show()

def apply_nms(all_boxes, thresh):
    """Apply non-maximum suppression to all predicted boxes output by the
    test_net method.
    """
    num_classes = len(all_boxes)
    num_images = len(all_boxes[0])
    nms_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(num_classes)]
    for cls_ind in xrange(num_classes):
        for im_ind in xrange(num_images):
            dets = all_boxes[cls_ind][im_ind]
            if dets == []:
                continue
            # CPU NMS is much faster than GPU NMS when the number of boxes
            # is relative small (e.g., < 10k)
            # TODO(rbg): autotune NMS dispatch
            keep = nms(dets, thresh, force_cpu=True)
            if len(keep) == 0:
                continue
            nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
    return nms_boxes

def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)


================================================
FILE: lib/fast_rcnn/train.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Train a Fast R-CNN network."""

import caffe
from fast_rcnn.config import cfg
import roi_data_layer.roidb as rdl_roidb
from utils.timer import Timer
import numpy as np
import os

from caffe.proto import caffe_pb2
import google.protobuf as pb2

class SolverWrapper(object):
    """A simple wrapper around Caffe's solver.
    This wrapper gives us control over he snapshotting process, which we
    use to unnormalize the learned bounding-box regression weights.
    """

    def __init__(self, solver_prototxt, roidb, output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
            cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
            # RPN can only use precomputed normalization because there are no
            # fixed statistics to compute a priori
            assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED

        if cfg.TRAIN.BBOX_REG:
            print 'Computing bounding-box regression targets...'
            self.bbox_means, self.bbox_stds = \
                    rdl_roidb.add_bbox_regression_targets(roidb)
            print 'done'

        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print ('Loading pretrained model '
                   'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)

    def snapshot(self):
        """Take a snapshot of the network after unnormalizing the learned
        bounding-box regression weights. This enables easy use at test-time.
        """
        net = self.solver.net

        scale_bbox_params = (cfg.TRAIN.BBOX_REG and
                             cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
                             net.params.has_key('bbox_pred'))

        if scale_bbox_params:
            # save original values
            orig_0 = net.params['bbox_pred'][0].data.copy()
            orig_1 = net.params['bbox_pred'][1].data.copy()

            # scale and shift with bbox reg unnormalization; then save snapshot
            net.params['bbox_pred'][0].data[...] = \
                    (net.params['bbox_pred'][0].data *
                     self.bbox_stds[:, np.newaxis])
            net.params['bbox_pred'][1].data[...] = \
                    (net.params['bbox_pred'][1].data *
                     self.bbox_stds + self.bbox_means)

        infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
                 if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
        filename = (self.solver_param.snapshot_prefix + infix +
                    '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
        filename = os.path.join(self.output_dir, filename)

        net.save(str(filename))
        print 'Wrote snapshot to: {:s}'.format(filename)

        if scale_bbox_params:
            # restore net to original state
            net.params['bbox_pred'][0].data[...] = orig_0
            net.params['bbox_pred'][1].data[...] = orig_1
        return filename

    def train_model(self, max_iters):
        """Network training loop."""
        last_snapshot_iter = -1
        timer = Timer()
        model_paths = []
        while self.solver.iter < max_iters:
            # Make one SGD update
            timer.tic()
            self.solver.step(1)
            timer.toc()
            if self.solver.iter % (10 * self.solver_param.display) == 0:
                print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self.solver.iter
                model_paths.append(self.snapshot())

        if last_snapshot_iter != self.solver.iter:
            model_paths.append(self.snapshot())
        return model_paths

def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb

def filter_roidb(roidb):
    """Remove roidb entries that have no usable RoIs."""

    def is_valid(entry):
        # Valid images have:
        #   (1) At least one foreground RoI OR
        #   (2) At least one background RoI
        overlaps = entry['max_overlaps']
        # find boxes with sufficient overlap
        fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
                           (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
        # image is only valid if such boxes exist
        valid = len(fg_inds) > 0 or len(bg_inds) > 0
        return valid

    num = len(roidb)
    filtered_roidb = [entry for entry in roidb if is_valid(entry)]
    num_after = len(filtered_roidb)
    print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
                                                       num, num_after)
    return filtered_roidb

def train_net(solver_prototxt, roidb, output_dir,
              pretrained_model=None, max_iters=40000):
    """Train a Fast R-CNN network."""

    roidb = filter_roidb(roidb)
    sw = SolverWrapper(solver_prototxt, roidb, output_dir,
                       pretrained_model=pretrained_model)

    print 'Solving...'
    model_paths = sw.train_model(max_iters)
    print 'done solving'
    return model_paths


================================================
FILE: lib/nms/.gitignore
================================================
*.c
*.cpp
*.so


================================================
FILE: lib/nms/__init__.py
================================================


================================================
FILE: lib/nms/cpu_nms.pyx
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np
cimport numpy as np

cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
    return a if a >= b else b

cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
    return a if a <= b else b

def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]

    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]

    cdef int ndets = dets.shape[0]
    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
            np.zeros((ndets), dtype=np.int)

    # nominal indices
    cdef int _i, _j
    # sorted indices
    cdef int i, j
    # temp variables for box i's (the box currently under consideration)
    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
    # variables for computing overlap with box j (lower scoring box)
    cdef np.float32_t xx1, yy1, xx2, yy2
    cdef np.float32_t w, h
    cdef np.float32_t inter, ovr

    keep = []
    for _i in range(ndets):
        i = order[_i]
        if suppressed[i] == 1:
            continue
        keep.append(i)
        ix1 = x1[i]
        iy1 = y1[i]
        ix2 = x2[i]
        iy2 = y2[i]
        iarea = areas[i]
        for _j in range(_i + 1, ndets):
            j = order[_j]
            if suppressed[j] == 1:
                continue
            xx1 = max(ix1, x1[j])
            yy1 = max(iy1, y1[j])
            xx2 = min(ix2, x2[j])
            yy2 = min(iy2, y2[j])
            w = max(0.0, xx2 - xx1 + 1)
            h = max(0.0, yy2 - yy1 + 1)
            inter = w * h
            ovr = inter / (iarea + areas[j] - inter)
            if ovr >= thresh:
                suppressed[j] = 1

    return keep


================================================
FILE: lib/nms/gpu_nms.hpp
================================================
void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
          int boxes_dim, float nms_overlap_thresh, int device_id);


================================================
FILE: lib/nms/gpu_nms.pyx
================================================
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np
cimport numpy as np

assert sizeof(int) == sizeof(np.int32_t)

cdef extern from "gpu_nms.hpp":
    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)

def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
            np.int32_t device_id=0):
    cdef int boxes_num = dets.shape[0]
    cdef int boxes_dim = dets.shape[1]
    cdef int num_out
    cdef np.ndarray[np.int32_t, ndim=1] \
        keep = np.zeros(boxes_num, dtype=np.int32)
    cdef np.ndarray[np.float32_t, ndim=1] \
        scores = dets[:, 4]
    cdef np.ndarray[np.int_t, ndim=1] \
        order = scores.argsort()[::-1]
    cdef np.ndarray[np.float32_t, ndim=2] \
        sorted_dets = dets[order, :]
    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
    keep = keep[:num_out]
    return list(order[keep])


================================================
FILE: lib/nms/nms_kernel.cu
================================================
// ------------------------------------------------------------------
// Faster R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Shaoqing Ren
// ------------------------------------------------------------------

#include "gpu_nms.hpp"
#include <vector>
#include <iostream>

#define CUDA_CHECK(condition) \
  /* Code block avoids redefinition of cudaError_t error */ \
  do { \
    cudaError_t error = condition; \
    if (error != cudaSuccess) { \
      std::cout << cudaGetErrorString(error) << std::endl; \
    } \
  } while (0)

#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
int const threadsPerBlock = sizeof(unsigned long long) * 8;

__device__ inline float devIoU(float const * const a, float const * const b) {
  float left = max(a[0], b[0]), right = min(a[2], b[2]);
  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
  return interS / (Sa + Sb - interS);
}

__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
                           const float *dev_boxes, unsigned long long *dev_mask) {
  const int row_start = blockIdx.y;
  const int col_start = blockIdx.x;

  // if (row_start > col_start) return;

  const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
  const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

  __shared__ float block_boxes[threadsPerBlock * 5];
  if (threadIdx.x < col_size) {
    block_boxes[threadIdx.x * 5 + 0] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
    block_boxes[threadIdx.x * 5 + 1] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
    block_boxes[threadIdx.x * 5 + 2] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
    block_boxes[threadIdx.x * 5 + 3] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
    block_boxes[threadIdx.x * 5 + 4] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
  }
  __syncthreads();

  if (threadIdx.x < row_size) {
    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
    const float *cur_box = dev_boxes + cur_box_idx * 5;
    int i = 0;
    unsigned long long t = 0;
    int start = 0;
    if (row_start == col_start) {
      start = threadIdx.x + 1;
    }
    for (i = start; i < col_size; i++) {
      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
        t |= 1ULL << i;
      }
    }
    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
    dev_mask[cur_box_idx * col_blocks + col_start] = t;
  }
}

void _set_device(int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
}

void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
          int boxes_dim, float nms_overlap_thresh, int device_id) {
  _set_device(device_id);

  float* boxes_dev = NULL;
  unsigned long long* mask_dev = NULL;

  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);

  CUDA_CHECK(cudaMalloc(&boxes_dev,
                        boxes_num * boxes_dim * sizeof(float)));
  CUDA_CHECK(cudaMemcpy(boxes_dev,
                        boxes_host,
                        boxes_num * boxes_dim * sizeof(float),
                        cudaMemcpyHostToDevice));

  CUDA_CHECK(cudaMalloc(&mask_dev,
                        boxes_num * col_blocks * sizeof(unsigned long long)));

  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
              DIVUP(boxes_num, threadsPerBlock));
  dim3 threads(threadsPerBlock);
  nms_kernel<<<blocks, threads>>>(boxes_num,
                                  nms_overlap_thresh,
                                  boxes_dev,
                                  mask_dev);

  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
  CUDA_CHECK(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));

  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

  int num_to_keep = 0;
  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;

    if (!(remv[nblock] & (1ULL << inblock))) {
      keep_out[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }
  *num_out = num_to_keep;

  CUDA_CHECK(cudaFree(boxes_dev));
  CUDA_CHECK(cudaFree(mask_dev));
}


================================================
FILE: lib/nms/py_cpu_nms.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np

def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


================================================
FILE: lib/pycocotools/UPSTREAM_REV
================================================
https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574


================================================
FILE: lib/pycocotools/__init__.py
================================================
__author__ = 'tylin'


================================================
FILE: lib/pycocotools/_mask.pyx
================================================
# distutils: language = c
# distutils: sources = ../MatlabAPI/private/maskApi.c

#**************************************************************************
# Microsoft COCO Toolbox.      version 2.0
# Data, paper, and tutorials available at:  http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
#**************************************************************************

__author__ = 'tsungyi'

# import both Python-level and C-level symbols of Numpy
# the API uses Numpy to interface C and Python
import numpy as np
cimport numpy as np
from libc.stdlib cimport malloc, free

# intialized Numpy. must do.
np.import_array()

# import numpy C function
# we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
cdef extern from "numpy/arrayobject.h":
    void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)

# Declare the prototype of the C functions in MaskApi.h
cdef extern from "maskApi.h":
    ctypedef unsigned int uint
    ctypedef unsigned long siz
    ctypedef unsigned char byte
    ctypedef double* BB
    ctypedef struct RLE:
        siz h,
        siz w,
        siz m,
        uint* cnts,
    void rlesInit( RLE **R, siz n )
    void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
    void rleDecode( const RLE *R, byte *mask, siz n )
    void rleMerge( const RLE *R, RLE *M, siz n, bint intersect )
    void rleArea( const RLE *R, siz n, uint *a )
    void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
    void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
    void rleToBbox( const RLE *R, BB bb, siz n )
    void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
    void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
    char* rleToString( const RLE *R )
    void rleFrString( RLE *R, char *s, siz h, siz w )

# python class to wrap RLE array in C
# the class handles the memory allocation and deallocation
cdef class RLEs:
    cdef RLE *_R
    cdef siz _n

    def __cinit__(self, siz n =0):
        rlesInit(&self._R, n)
        self._n = n

    # free the RLE array here
    def __dealloc__(self):
        if self._R is not NULL:
            for i in range(self._n):
                free(self._R[i].cnts)
            free(self._R)
    def __getattr__(self, key):
        if key == 'n':
            return self._n
        raise AttributeError(key)

# python class to wrap Mask array in C
# the class handles the memory allocation and deallocation
cdef class Masks:
    cdef byte *_mask
    cdef siz _h
    cdef siz _w
    cdef siz _n

    def __cinit__(self, h, w, n):
        self._mask = <byte*> malloc(h*w*n* sizeof(byte))
        self._h = h
        self._w = w
        self._n = n
    # def __dealloc__(self):
        # the memory management of _mask has been passed to np.ndarray
        # it doesn't need to be freed here

    # called when passing into np.array() and return an np.ndarray in column-major order
    def __array__(self):
        cdef np.npy_intp shape[1]
        shape[0] = <np.npy_intp> self._h*self._w*self._n
        # Create a 1D array, and reshape it to fortran/Matlab column-major array
        ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
        # The _mask allocated by Masks is now handled by ndarray
        PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
        return ndarray

# internal conversion from Python RLEs object to compressed RLE format
def _toString(RLEs Rs):
    cdef siz n = Rs.n
    cdef bytes py_string
    cdef char* c_string
    objs = []
    for i in range(n):
        c_string = rleToString( <RLE*> &Rs._R[i] )
        py_string = c_string
        objs.append({
            'size': [Rs._R[i].h, Rs._R[i].w],
            'counts': py_string
        })
        free(c_string)
    return objs

# internal conversion from compressed RLE format to Python RLEs object
def _frString(rleObjs):
    cdef siz n = len(rleObjs)
    Rs = RLEs(n)
    cdef bytes py_string
    cdef char* c_string
    for i, obj in enumerate(rleObjs):
        py_string = str(obj['counts'])
        c_string = py_string
        rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
    return Rs

# encode mask to RLEs objects
# list of RLE string can be generated by RLEs member function
def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
    h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
    cdef RLEs Rs = RLEs(n)
    rleEncode(Rs._R,<byte*>mask.data,h,w,n)
    objs = _toString(Rs)
    return objs

# decode mask from compressed list of RLE string or RLEs object
def decode(rleObjs):
    cdef RLEs Rs = _frString(rleObjs)
    h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
    masks = Masks(h, w, n)
    rleDecode( <RLE*>Rs._R, masks._mask, n );
    return np.array(masks)

def merge(rleObjs, bint intersect=0):
    cdef RLEs Rs = _frString(rleObjs)
    cdef RLEs R = RLEs(1)
    rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
    obj = _toString(R)[0]
    return obj

def area(rleObjs):
    cdef RLEs Rs = _frString(rleObjs)
    cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
    rleArea(Rs._R, Rs._n, _a)
    cdef np.npy_intp shape[1]
    shape[0] = <np.npy_intp> Rs._n
    a = np.array((Rs._n, ), dtype=np.uint8)
    a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
    PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
    return a

# iou computation. support function overload (RLEs-RLEs and bbox-bbox).
def iou( dt, gt, pyiscrowd ):
    def _preproc(objs):
        if len(objs) == 0:
            return objs
        if type(objs) == np.ndarray:
            if len(objs.shape) == 1:
                objs = objs.reshape((objs[0], 1))
            # check if it's Nx4 bbox
            if not len(objs.shape) == 2 or not objs.shape[1] == 4:
                raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
            objs = objs.astype(np.double)
        elif type(objs) == list:
            # check if list is in box format and convert it to np.ndarray
            isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
            isrle = np.all(np.array([type(obj) == dict for obj in objs]))
            if isbox:
                objs = np.array(objs, dtype=np.double)
                if len(objs.shape) == 1:
                    objs = objs.reshape((1,objs.shape[0]))
            elif isrle:
                objs = _frString(objs)
            else:
                raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
        else:
            raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
        return objs
    def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
        rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
    def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
        bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
    def _len(obj):
        cdef siz N = 0
        if type(obj) == RLEs:
            N = obj.n
        elif len(obj)==0:
            pass
        elif type(obj) == np.ndarray:
            N = obj.shape[0]
        return N
    # convert iscrowd to numpy array
    cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
    # simple type checking
    cdef siz m, n
    dt = _preproc(dt)
    gt = _preproc(gt)
    m = _len(dt)
    n = _len(gt)
    if m == 0 or n == 0:
        return []
    if not type(dt) == type(gt):
        raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')

    # define local variables
    cdef double* _iou = <double*> 0
    cdef np.npy_intp shape[1]
    # check type and assign iou function
    if type(dt) == RLEs:
        _iouFun = _rleIou
    elif type(dt) == np.ndarray:
        _iouFun = _bbIou
    else:
        raise Exception('input data type not allowed.')
    _iou = <double*> malloc(m*n* sizeof(double))
    iou = np.zeros((m*n, ), dtype=np.double)
    shape[0] = <np.npy_intp> m*n
    iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
    PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
    _iouFun(dt, gt, iscrowd, m, n, iou)
    return iou.reshape((m,n), order='F')

def toBbox( rleObjs ):
    cdef RLEs Rs = _frString(rleObjs)
    cdef siz n = Rs.n
    cdef BB _bb = <BB> malloc(4*n* sizeof(double))
    rleToBbox( <const RLE*> Rs._R, _bb, n )
    cdef np.npy_intp shape[1]
    shape[0] = <np.npy_intp> 4*n
    bb = np.array((1,4*n), dtype=np.double)
    bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
    PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
    return bb

def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
    cdef siz n = bb.shape[0]
    Rs = RLEs(n)
    rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
    objs = _toString(Rs)
    return objs

def frPoly( poly, siz h, siz w ):
    cdef np.ndarray[np.double_t, ndim=1] np_poly
    n = len(poly)
    Rs = RLEs(n)
    for i, p in enumerate(poly):
        np_poly = np.array(p, dtype=np.double, order='F')
        rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, len(np_poly)/2, h, w )
    objs = _toString(Rs)
    return objs

def frUncompressedRLE(ucRles, siz h, siz w):
    cdef np.ndarray[np.uint32_t, ndim=1] cnts
    cdef RLE R
    cdef uint *data
    n = len(ucRles)
    objs = []
    for i in range(n):
        Rs = RLEs(1)
        cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
        # time for malloc can be saved here but it's fine
        data = <uint*> malloc(len(cnts)* sizeof(uint))
        for j in range(len(cnts)):
            data[j] = <uint> cnts[j]
        R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
        Rs._R[0] = R
        objs.append(_toString(Rs)[0])
    return objs

def frPyObjects(pyobj, siz h, w):
    if type(pyobj) == np.ndarray:
        objs = frBbox(pyobj, h, w )
    elif type(pyobj) == list and len(pyobj[0]) == 4:
        objs = frBbox(pyobj, h, w )
    elif type(pyobj) == list and len(pyobj[0]) > 4:
        objs = frPoly(pyobj, h, w )
    elif type(pyobj) == list and type(pyobj[0]) == dict:
        objs = frUncompressedRLE(pyobj, h, w)
    else:
        raise Exception('input type is not supported.')
    return objs


================================================
FILE: lib/pycocotools/coco.py
================================================
__author__ = 'tylin'
__version__ = '1.0.1'
# Interface for accessing the Microsoft COCO dataset.

# Microsoft COCO is a large image dataset designed for object detection,
# segmentation, and caption generation. pycocotools is a Python API that
# assists in loading, parsing and visualizing the annotations in COCO.
# Please visit http://mscoco.org/ for more information on COCO, including
# for the data, paper, and tutorials. The exact format of the annotations
# is also described on the COCO website. For example usage of the pycocotools
# please see pycocotools_demo.ipynb. In addition to this API, please download both
# the COCO images and annotations in order to run the demo.

# An alternative to using the API is to load the annotations directly
# into Python dictionary
# Using the API provides additional utility functions. Note that this API
# supports both *instance* and *caption* annotations. In the case of
# captions not all functions are defined (e.g. categories are undefined).

# The following API functions are defined:
#  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
#  decodeMask - Decode binary mask M encoded via run-length encoding.
#  encodeMask - Encode binary mask M using run-length encoding.
#  getAnnIds  - Get ann ids that satisfy given filter conditions.
#  getCatIds  - Get cat ids that satisfy given filter conditions.
#  getImgIds  - Get img ids that satisfy given filter conditions.
#  loadAnns   - Load anns with the specified ids.
#  loadCats   - Load cats with the specified ids.
#  loadImgs   - Load imgs with the specified ids.
#  segToMask  - Convert polygon segmentation to binary mask.
#  showAnns   - Display the specified annotations.
#  loadRes    - Load algorithm results and create API for accessing them.
#  download   - Download COCO images from mscoco.org server.
# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
# Help on each functions can be accessed by: "help COCO>function".

# See also COCO>decodeMask,
# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
# COCO>loadImgs, COCO>segToMask, COCO>showAnns

# Microsoft COCO Toolbox.      version 2.0
# Data, paper, and tutorials available at:  http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
# Licensed under the Simplified BSD License [see bsd.txt]

import json
import datetime
import time
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
import numpy as np
from skimage.draw import polygon
import urllib
import copy
import itertools
import mask
import os

class COCO:
    def __init__(self, annotation_file=None):
        """
        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
        :param annotation_file (str): location of annotation file
        :param image_folder (str): location to the folder that hosts images.
        :return:
        """
        # load dataset
        self.dataset = {}
        self.anns = []
        self.imgToAnns = {}
        self.catToImgs = {}
        self.imgs = {}
        self.cats = {}
        if not annotation_file == None:
            print 'loading annotations into memory...'
            tic = time.time()
            dataset = json.load(open(annotation_file, 'r'))
            print 'Done (t=%0.2fs)'%(time.time()- tic)
            self.dataset = dataset
            self.createIndex()

    def createIndex(self):
        # create index
        print 'creating index...'
        anns = {}
        imgToAnns = {}
        catToImgs = {}
        cats = {}
        imgs = {}
        if 'annotations' in self.dataset:
            imgToAnns = {ann['image_id']: [] for ann in self.dataset['annotations']}
            anns =      {ann['id']:       [] for ann in self.dataset['annotations']}
            for ann in self.dataset['annotations']:
                imgToAnns[ann['image_id']] += [ann]
                anns[ann['id']] = ann

        if 'images' in self.dataset:
            imgs      = {im['id']: {} for im in self.dataset['images']}
            for img in self.dataset['images']:
                imgs[img['id']] = img

        if 'categories' in self.dataset:
            cats = {cat['id']: [] for cat in self.dataset['categories']}
            for cat in self.dataset['categories']:
                cats[cat['id']] = cat
            catToImgs = {cat['id']: [] for cat in self.dataset['categories']}
            if 'annotations' in self.dataset:
                for ann in self.dataset['annotations']:
                    catToImgs[ann['category_id']] += [ann['image_id']]

        print 'index created!'

        # create class members
        self.anns = anns
        self.imgToAnns = imgToAnns
        self.catToImgs = catToImgs
        self.imgs = imgs
        self.cats = cats

    def info(self):
        """
        Print information about the annotation file.
        :return:
        """
        for key, value in self.dataset['info'].items():
            print '%s: %s'%(key, value)

    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
        """
        Get ann ids that satisfy given filter conditions. default skips that filter
        :param imgIds  (int array)     : get anns for given imgs
               catIds  (int array)     : get anns for given cats
               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
               iscrowd (boolean)       : get anns for given crowd label (False or True)
        :return: ids (int array)       : integer array of ann ids
        """
        imgIds = imgIds if type(imgIds) == list else [imgIds]
        catIds = catIds if type(catIds) == list else [catIds]

        if len(imgIds) == len(catIds) == len(areaRng) == 0:
            anns = self.dataset['annotations']
        else:
            if not len(imgIds) == 0:
                # this can be changed by defaultdict
                lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
                anns = list(itertools.chain.from_iterable(lists))
            else:
                anns = self.dataset['annotations']
            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
        if not iscrowd == None:
            ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
        else:
            ids = [ann['id'] for ann in anns]
        return ids

    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
        """
        filtering parameters. default skips that filter.
        :param catNms (str array)  : get cats for given cat names
        :param supNms (str array)  : get cats for given supercategory names
        :param catIds (int array)  : get cats for given cat ids
        :return: ids (int array)   : integer array of cat ids
        """
        catNms = catNms if type(catNms) == list else [catNms]
        supNms = supNms if type(supNms) == list else [supNms]
        catIds = catIds if type(catIds) == list else [catIds]

        if len(catNms) == len(supNms) == len(catIds) == 0:
            cats = self.dataset['categories']
        else:
            cats = self.dataset['categories']
            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
        ids = [cat['id'] for cat in cats]
        return ids

    def getImgIds(self, imgIds=[], catIds=[]):
        '''
        Get img ids that satisfy given filter conditions.
        :param imgIds (int array) : get imgs for given ids
        :param catIds (int array) : get imgs with all given cats
        :return: ids (int array)  : integer array of img ids
        '''
        imgIds = imgIds if type(imgIds) == list else [imgIds]
        catIds = catIds if type(catIds) == list else [catIds]

        if len(imgIds) == len(catIds) == 0:
            ids = self.imgs.keys()
        else:
            ids = set(imgIds)
            for i, catId in enumerate(catIds):
                if i == 0 and len(ids) == 0:
                    ids = set(self.catToImgs[catId])
                else:
                    ids &= set(self.catToImgs[catId])
        return list(ids)

    def loadAnns(self, ids=[]):
        """
        Load anns with the specified ids.
        :param ids (int array)       : integer ids specifying anns
        :return: anns (object array) : loaded ann objects
        """
        if type(ids) == list:
            return [self.anns[id] for id in ids]
        elif type(ids) == int:
            return [self.anns[ids]]

    def loadCats(self, ids=[]):
        """
        Load cats with the specified ids.
        :param ids (int array)       : integer ids specifying cats
        :return: cats (object array) : loaded cat objects
        """
        if type(ids) == list:
            return [self.cats[id] for id in ids]
        elif type(ids) == int:
            return [self.cats[ids]]

    def loadImgs(self, ids=[]):
        """
        Load anns with the specified ids.
        :param ids (int array)       : integer ids specifying img
        :return: imgs (object array) : loaded img objects
        """
        if type(ids) == list:
            return [self.imgs[id] for id in ids]
        elif type(ids) == int:
            return [self.imgs[ids]]

    def showAnns(self, anns):
        """
        Display the specified annotations.
        :param anns (array of object): annotations to display
        :return: None
        """
        if len(anns) == 0:
            return 0
        if 'segmentation' in anns[0]:
            datasetType = 'instances'
        elif 'caption' in anns[0]:
            datasetType = 'captions'
        if datasetType == 'instances':
            ax = plt.gca()
            polygons = []
            color = []
            for ann in anns:
                c = np.random.random((1, 3)).tolist()[0]
                if type(ann['segmentation']) == list:
                    # polygon
                    for seg in ann['segmentation']:
                        poly = np.array(seg).reshape((len(seg)/2, 2))
                        polygons.append(Polygon(poly, True,alpha=0.4))
                        color.append(c)
                else:
                    # mask
                    t = self.imgs[ann['image_id']]
                    if type(ann['segmentation']['counts']) == list:
                        rle = mask.frPyObjects([ann['segmentation']], t['height'], t['width'])
                    else:
                        rle = [ann['segmentation']]
                    m = mask.decode(rle)
                    img = np.ones( (m.shape[0], m.shape[1], 3) )
                    if ann['iscrowd'] == 1:
                        color_mask = np.array([2.0,166.0,101.0])/255
                    if ann['iscrowd'] == 0:
                        color_mask = np.random.random((1, 3)).tolist()[0]
                    for i in range(3):
                        img[:,:,i] = color_mask[i]
                    ax.imshow(np.dstack( (img, m*0.5) ))
            p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4)
            ax.add_collection(p)
        elif datasetType == 'captions':
            for ann in anns:
                print ann['caption']

    def loadRes(self, resFile):
        """
        Load result file and return a result api object.
        :param   resFile (str)     : file name of result file
        :return: res (obj)         : result api object
        """
        res = COCO()
        res.dataset['images'] = [img for img in self.dataset['images']]
        # res.dataset['info'] = copy.deepcopy(self.dataset['info'])
        # res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses'])

        print 'Loading and preparing results...     '
        tic = time.time()
        anns    = json.load(open(resFile))
        assert type(anns) == list, 'results in not an array of objects'
        annsImgIds = [ann['image_id'] for ann in anns]
        assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
               'Results do not correspond to current coco set'
        if 'caption' in anns[0]:
            imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
            res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
            for id, ann in enumerate(anns):
                ann['id'] = id+1
        elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
            for id, ann in enumerate(anns):
                bb = ann['bbox']
                x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
                if not 'segmentation' in ann:
                    ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
                ann['area'] = bb[2]*bb[3]
                ann['id'] = id+1
                ann['iscrowd'] = 0
        elif 'segmentation' in anns[0]:
            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
            for id, ann in enumerate(anns):
                # now only support compressed RLE format as segmentation results
                ann['area'] = mask.area([ann['segmentation']])[0]
                if not 'bbox' in ann:
                    ann['bbox'] = mask.toBbox([ann['segmentation']])[0]
                ann['id'] = id+1
                ann['iscrowd'] = 0
        print 'DONE (t=%0.2fs)'%(time.time()- tic)

        res.dataset['annotations'] = anns
        res.createIndex()
        return res

    def download( self, tarDir = None, imgIds = [] ):
        '''
        Download COCO images from mscoco.org server.
        :param tarDir (str): COCO results directory name
               imgIds (list): images to be downloaded
        :return:
        '''
        if tarDir is None:
            print 'Please specify target directory'
            return -1
        if len(imgIds) == 0:
            imgs = self.imgs.values()
        else:
            imgs = self.loadImgs(imgIds)
        N = len(imgs)
        if not os.path.exists(tarDir):
            os.makedirs(tarDir)
        for i, img in enumerate(imgs):
            tic = time.time()
            fname = os.path.join(tarDir, img['file_name'])
            if not os.path.exists(fname):
                urllib.urlretrieve(img['coco_url'], fname)
            print 'downloaded %d/%d images (t=%.1fs)'%(i, N, time.time()- tic)


================================================
FILE: lib/pycocotools/cocoeval.py
================================================
__author__ = 'tsungyi'

import numpy as np
import datetime
import time
from collections import defaultdict
import mask
import copy

class COCOeval:
    # Interface for evaluating detection on the Microsoft COCO dataset.
    #
    # The usage for CocoEval is as follows:
    #  cocoGt=..., cocoDt=...       # load dataset and results
    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
    #  E.params.recThrs = ...;      # set parameters as desired
    #  E.evaluate();                # run per image evaluation
    #  E.accumulate();              # accumulate per image results
    #  E.summarize();               # display summary metrics of results
    # For example usage see evalDemo.m and http://mscoco.org/.
    #
    # The evaluation parameters are as follows (defaults in brackets):
    #  imgIds     - [all] N img ids to use for evaluation
    #  catIds     - [all] K cat ids to use for evaluation
    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
    #  areaRng    - [...] A=4 object area ranges for evaluation
    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
    #  useSegm    - [1] if true evaluate against ground-truth segments
    #  useCats    - [1] if true use category labels for evaluation    # Note: if useSegm=0 the evaluation is run on bounding boxes.
    # Note: if useCats=0 category labels are ignored as in proposal scoring.
    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
    #
    # evaluate(): evaluates detections on every image and every category and
    # concats the results into the "evalImgs" with fields:
    #  dtIds      - [1xD] id for each of the D detections (dt)
    #  gtIds      - [1xG] id for each of the G ground truths (gt)
    #  dtMatches  - [TxD] matching gt id at each IoU or 0
    #  gtMatches  - [TxG] matching dt id at each IoU or 0
    #  dtScores   - [1xD] confidence of each dt
    #  gtIgnore   - [1xG] ignore flag for each gt
    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
    #
    # accumulate(): accumulates the per-image, per-category evaluation
    # results in "evalImgs" into the dictionary "eval" with fields:
    #  params     - parameters used for evaluation
    #  date       - date evaluation was performed
    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
    #  precision  - [TxRxKxAxM] precision for every evaluation setting
    #  recall     - [TxKxAxM] max recall for every evaluation setting
    # Note: precision and recall==-1 for settings with no gt objects.
    #
    # See also coco, mask, pycocoDemo, pycocoEvalDemo
    #
    # Microsoft COCO Toolbox.      version 2.0
    # Data, paper, and tutorials available at:  http://mscoco.org/
    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
    # Licensed under the Simplified BSD License [see coco/license.txt]
    def __init__(self, cocoGt=None, cocoDt=None):
        '''
        Initialize CocoEval using coco APIs for gt and dt
        :param cocoGt: coco object with ground truth annotations
        :param cocoDt: coco object with detection results
        :return: None
        '''
        self.cocoGt   = cocoGt              # ground truth COCO API
        self.cocoDt   = cocoDt              # detections COCO API
        self.params   = {}                  # evaluation parameters
        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
        self.eval     = {}                  # accumulated evaluation results
        self._gts = defaultdict(list)       # gt for evaluation
        self._dts = defaultdict(list)       # dt for evaluation
        self.params = Params()              # parameters
        self._paramsEval = {}               # parameters for evaluation
        self.stats = []                     # result summarization
        self.ious = {}                      # ious between all gts and dts
        if not cocoGt is None:
            self.params.imgIds = sorted(cocoGt.getImgIds())
            self.params.catIds = sorted(cocoGt.getCatIds())


    def _prepare(self):
        '''
        Prepare ._gts and ._dts for evaluation based on params
        :return: None
        '''
        #
        def _toMask(objs, coco):
            # modify segmentation by reference
            for obj in objs:
                t = coco.imgs[obj['image_id']]
                if type(obj['segmentation']) == list:
                    if type(obj['segmentation'][0]) == dict:
                        print 'debug'
                    obj['segmentation'] = mask.frPyObjects(obj['segmentation'],t['height'],t['width'])
                    if len(obj['segmentation']) == 1:
                        obj['segmentation'] = obj['segmentation'][0]
                    else:
                        # an object can have multiple polygon regions
                        # merge them into one RLE mask
                        obj['segmentation'] = mask.merge(obj['segmentation'])
                elif type(obj['segmentation']) == dict and type(obj['segmentation']['counts']) == list:
                    obj['segmentation'] = mask.frPyObjects([obj['segmentation']],t['height'],t['width'])[0]
                elif type(obj['segmentation']) == dict and \
                     type(obj['segmentation']['counts'] == unicode or type(obj['segmentation']['counts']) == str):
                    pass
                else:
                    raise Exception('segmentation format not supported.')
        p = self.params
        if p.useCats:
            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
        else:
            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))

        if p.useSegm:
            _toMask(gts, self.cocoGt)
            _toMask(dts, self.cocoDt)
        self._gts = defaultdict(list)       # gt for evaluation
        self._dts = defaultdict(list)       # dt for evaluation
        for gt in gts:
            self._gts[gt['image_id'], gt['category_id']].append(gt)
        for dt in dts:
            self._dts[dt['image_id'], dt['category_id']].append(dt)
        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
        self.eval     = {}                  # accumulated evaluation results

    def evaluate(self):
        '''
        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
        :return: None
        '''
        tic = time.time()
        print 'Running per image evaluation...      '
        p = self.params
        p.imgIds = list(np.unique(p.imgIds))
        if p.useCats:
            p.catIds = list(np.unique(p.catIds))
        p.maxDets = sorted(p.maxDets)
        self.params=p

        self._prepare()
        # loop through images, area range, max detection number
        catIds = p.catIds if p.useCats else [-1]

        computeIoU = self.computeIoU
        self.ious = {(imgId, catId): computeIoU(imgId, catId) \
                        for imgId in p.imgIds
                        for catId in catIds}

        evaluateImg = self.evaluateImg
        maxDet = p.maxDets[-1]
        self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
                 for catId in catIds
                 for areaRng in p.areaRng
                 for imgId in p.imgIds
             ]
        self._paramsEval = copy.deepcopy(self.params)
        toc = time.time()
        print 'DONE (t=%0.2fs).'%(toc-tic)

    def computeIoU(self, imgId, catId):
        p = self.params
        if p.useCats:
            gt = self._gts[imgId,catId]
            dt = self._dts[imgId,catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
        if len(gt) == 0 and len(dt) ==0:
            return []
        dt = sorted(dt, key=lambda x: -x['score'])
        if len(dt) > p.maxDets[-1]:
            dt=dt[0:p.maxDets[-1]]

        if p.useSegm:
            g = [g['segmentation'] for g in gt]
            d = [d['segmentation'] for d in dt]
        else:
            g = [g['bbox'] for g in gt]
            d = [d['bbox'] for d in dt]

        # compute iou between each dt and gt region
        iscrowd = [int(o['iscrowd']) for o in gt]
        ious = mask.iou(d,g,iscrowd)
        return ious

    def evaluateImg(self, imgId, catId, aRng, maxDet):
        '''
        perform evaluation for single category and image
        :return: dict (single image results)
        '''
        #
        p = self.params
        if p.useCats:
            gt = self._gts[imgId,catId]
            dt = self._dts[imgId,catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
        if len(gt) == 0 and len(dt) ==0:
            return None

        for g in gt:
            if 'ignore' not in g:
                g['ignore'] = 0
            if g['iscrowd'] == 1 or g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
                g['_ignore'] = 1
            else:
                g['_ignore'] = 0

        # sort dt highest score first, sort gt ignore last
        # gt = sorted(gt, key=lambda x: x['_ignore'])
        gtind = [ind for (ind, g) in sorted(enumerate(gt), key=lambda (ind, g): g['_ignore']) ]

        gt = [gt[ind] for ind in gtind]
        dt = sorted(dt, key=lambda x: -x['score'])[0:maxDet]
        iscrowd = [int(o['iscrowd']) for o in gt]
        # load computed ious
        N_iou = len(self.ious[imgId, catId])
        ious = self.ious[imgId, catId][0:maxDet, np.array(gtind)] if N_iou >0 else self.ious[imgId, catId]

        T = len(p.iouThrs)
        G = len(gt)
        D = len(dt)
        gtm  = np.zeros((T,G))
        dtm  = np.zeros((T,D))
        gtIg = np.array([g['_ignore'] for g in gt])
        dtIg = np.zeros((T,D))
        if not len(ious)==0:
            for tind, t in enumerate(p.iouThrs):
                for dind, d in enumerate(dt):
                    # information about best match so far (m=-1 -> unmatched)
                    iou = min([t,1-1e-10])
                    m   = -1
                    for gind, g in enumerate(gt):
                        # if this gt already matched, and not a crowd, continue
                        if gtm[tind,gind]>0 and not iscrowd[gind]:
                            continue
                        # if dt matched to reg gt, and on ignore gt, stop
                        if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
                            break
                        # continue to next gt unless better match made
                        if ious[dind,gind] < iou:
                            continue
                        # match successful and best so far, store appropriately
                        iou=ious[dind,gind]
                        m=gind
                    # if match made store id of match for both dt and gt
                    if m ==-1:
                        continue
                    dtIg[tind,dind] = gtIg[m]
                    dtm[tind,dind]  = gt[m]['id']
                    gtm[tind,m]     = d['id']
        # set unmatched detections outside of area range to ignore
        a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
        dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
        # store results for given image and category
        return {
                'image_id':     imgId,
                'category_id':  catId,
                'aRng':         aRng,
                'maxDet':       maxDet,
                'dtIds':        [d['id'] for d in dt],
                'gtIds':        [g['id'] for g in gt],
                'dtMatches':    dtm,
                'gtMatches':    gtm,
                'dtScores':     [d['score'] for d in dt],
                'gtIgnore':     gtIg,
                'dtIgnore':     dtIg,
            }

    def accumulate(self, p = None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print 'Accumulating evaluation results...   '
        tic = time.time()
        if not self.evalImgs:
            print 'Please run evaluate() first'
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T           = len(p.iouThrs)
        R           = len(p.recThrs)
        K           = len(p.catIds) if p.useCats else 1
        A           = len(p.areaRng)
        M           = len(p.maxDets)
        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
        recall      = -np.ones((T,K,A,M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
        # K0 = len(_pe.catIds)
        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0*A0*I0
            for a, a0 in enumerate(a_list):
                Na = a0*I0
                for m, maxDet in enumerate(m_list):
                    E = [self.evalImgs[Nk+Na+i] for i in i_list]
                    E = filter(None, E)
                    if len(E) == 0:
                        continue
                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    # mergesort is used to be consistent as Matlab implementation.
                    inds = np.argsort(-dtScores, kind='mergesort')

                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
                    gtIg = np.concatenate([e['gtIgnore']  for e in E])
                    npig = len([ig for ig in gtIg if ig == 0])
                    if npig == 0:
                        continue
                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                        tp = np.array(tp)
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp+tp+np.spacing(1))
                        q  = np.zeros((R,))

                        if nd:
                            recall[t,k,a,m] = rc[-1]
                        else:
                            recall[t,k,a,m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist(); q = q.tolist()

                        for i in range(nd-1, 0, -1):
                            if pr[i] > pr[i-1]:
                                pr[i-1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs)
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                        except:
                            pass
                        precision[t,:,k,a,m] = np.array(q)
        self.eval = {
            'params': p,
            'counts': [T, R, K, A, M],
            'date': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'precision': precision,
            'recall':   recall,
        }
        toc = time.time()
        print 'DONE (t=%0.2fs).'%( toc-tic )

    def summarize(self):
        '''
        Compute and display summary metrics for evaluation results.
        Note this functin can *only* be applied on the default parameter setting
        '''
        def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
            p = self.params
            iStr        = ' {:<18} {} @[ IoU={:<9} | area={:>6} | maxDets={:>3} ] = {}'
            titleStr    = 'Average Precision' if ap == 1 else 'Average Recall'
            typeStr     = '(AP)' if ap==1 else '(AR)'
            iouStr      = '%0.2f:%0.2f'%(p.iouThrs[0], p.iouThrs[-1]) if iouThr is None else '%0.2f'%(iouThr)
            areaStr     = areaRng
            maxDetsStr  = '%d'%(maxDets)

            aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng]
            mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets]
            if ap == 1:
                # dimension of precision: [TxRxKxAxM]
                s = self.eval['precision']
                # IoU
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                # areaRng
                s = s[:,:,:,aind,mind]
            else:
                # dimension of recall: [TxKxAxM]
                s = self.eval['recall']
                s = s[:,:,aind,mind]
            if len(s[s>-1])==0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s>-1])
            print iStr.format(titleStr, typeStr, iouStr, areaStr, maxDetsStr, '%.3f'%(float(mean_s)))
            return mean_s

        if not self.eval:
            raise Exception('Please run accumulate() first')
        self.stats = np.zeros((12,))
        self.stats[0] = _summarize(1)
        self.stats[1] = _summarize(1,iouThr=.5)
        self.stats[2] = _summarize(1,iouThr=.75)
        self.stats[3] = _summarize(1,areaRng='small')
        self.stats[4] = _summarize(1,areaRng='medium')
        self.stats[5] = _summarize(1,areaRng='large')
        self.stats[6] = _summarize(0,maxDets=1)
        self.stats[7] = _summarize(0,maxDets=10)
        self.stats[8] = _summarize(0,maxDets=100)
        self.stats[9]  = _summarize(0,areaRng='small')
        self.stats[10] = _summarize(0,areaRng='medium')
        self.stats[11] = _summarize(0,areaRng='large')

    def __str__(self):
        self.summarize()

class Params:
    '''
    Params for coco evaluation api
    '''
    def __init__(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95-.5)/.05)+1, endpoint=True)
        self.recThrs = np.linspace(.0, 1.00, np.round((1.00-.0)/.01)+1, endpoint=True)
        self.maxDets = [1,10,100]
        self.areaRng = [ [0**2,1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2] ]
        self.useSegm = 0
        self.useCats = 1

================================================
FILE: lib/pycocotools/license.txt
================================================
Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met: 

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer. 
2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies, 
either expressed or implied, of the FreeBSD Project.


================================================
FILE: lib/pycocotools/mask.py
================================================
__author__ = 'tsungyi'

import pycocotools._mask as _mask

# Interface for manipulating masks stored in RLE format.
#
# RLE is a simple yet efficient format for storing binary masks. RLE
# first divides a vector (or vectorized image) into a series of piecewise
# constant regions and then for each piece simply stores the length of
# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
# (note that the odd counts are always the numbers of zeros). Instead of
# storing the counts directly, additional compression is achieved with a
# variable bitrate representation based on a common scheme called LEB128.
#
# Compression is greatest given large piecewise constant regions.
# Specifically, the size of the RLE is proportional to the number of
# *boundaries* in M (or for an image the number of boundaries in the y
# direction). Assuming fairly simple shapes, the RLE representation is
# O(sqrt(n)) where n is number of pixels in the object. Hence space usage
# is substantially lower, especially for large simple objects (large n).
#
# Many common operations on masks can be computed directly using the RLE
# (without need for decoding). This includes computations such as area,
# union, intersection, etc. All of these operations are linear in the
# size of the RLE, in other words they are O(sqrt(n)) where n is the area
# of the object. Computing these operations on the original mask is O(n).
# Thus, using the RLE can result in substantial computational savings.
#
# The following API functions are defined:
#  encode         - Encode binary masks using RLE.
#  decode         - Decode binary masks encoded via RLE.
#  merge          - Compute union or intersection of encoded masks.
#  iou            - Compute intersection over union between masks.
#  area           - Compute area of encoded masks.
#  toBbox         - Get bounding boxes surrounding encoded masks.
#  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
#
# Usage:
#  Rs     = encode( masks )
#  masks  = decode( Rs )
#  R      = merge( Rs, intersect=false )
#  o      = iou( dt, gt, iscrowd )
#  a      = area( Rs )
#  bbs    = toBbox( Rs )
#  Rs     = frPyObjects( [pyObjects], h, w )
#
# In the API the following formats are used:
#  Rs      - [dict] Run-length encoding of binary masks
#  R       - dict Run-length encoding of binary mask
#  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
#  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
#  bbs     - [nx4] Bounding box(es) stored as [x y w h]
#  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
#  dt,gt   - May be either bounding boxes or encoded masks
# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
#
# Finally, a note about the intersection over union (iou) computation.
# The standard iou of a ground truth (gt) and detected (dt) object is
#  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
# For "crowd" regions, we use a modified criteria. If a gt object is
# marked as "iscrowd", we allow a dt to match any subregion of the gt.
# Choosing gt' in the crowd gt that best matches the dt can be done using
# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
#  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
# For crowd gt regions we use this modified criteria above for the iou.
#
# To compile run "python setup.py build_ext --inplace"
# Please do not contact us for help with compiling.
#
# Microsoft COCO Toolbox.      version 2.0
# Data, paper, and tutorials available at:  http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]

encode      = _mask.encode
decode      = _mask.decode
iou         = _mask.iou
merge       = _mask.merge
area        = _mask.area
toBbox      = _mask.toBbox
frPyObjects = _mask.frPyObjects

================================================
FILE: lib/pycocotools/maskApi.c
================================================
/**************************************************************************
* Microsoft COCO Toolbox.      version 2.0
* Data, paper, and tutorials available at:  http://mscoco.org/
* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
* Licensed under the Simplified BSD License [see coco/license.txt]
**************************************************************************/
#include "maskApi.h"
#include <math.h>
#include <stdlib.h>

uint umin( uint a, uint b ) { return (a<b) ? a : b; }
uint umax( uint a, uint b ) { return (a>b) ? a : b; }

void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
  R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
  if(cnts) for(siz j=0; j<m; j++) R->cnts[j]=cnts[j];
}

void rleFree( RLE *R ) {
  free(R->cnts); R->cnts=0;
}

void rlesInit( RLE **R, siz n ) {
  *R = (RLE*) malloc(sizeof(RLE)*n);
  for(siz i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
}

void rlesFree( RLE **R, siz n ) {
  for(siz i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
}

void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
  siz i, j, k, a=w*h; uint c, *cnts; byte p;
  cnts = malloc(sizeof(uint)*(a+1));
  for(i=0; i<n; i++) {
    const byte *T=M+a*i; k=0; p=0; c=0;
    for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
    cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
  }
  free(cnts);
}

void rleDecode( const RLE *R, byte *M, siz n ) {
  for( siz i=0; i<n; i++ ) {
    byte v=0; for( siz j=0; j<R[i].m; j++ ) {
      for( siz k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
}

void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
  uint *cnts, c, ca, cb, cc, ct; bool v, va, vb, vp;
  siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
  if(n==0) { rleInit(M,0,0,0,0); return; }
  if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
  cnts = malloc(sizeof(uint)*(h*w+1));
  for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
  for( i=1; i<n; i++ ) {
    B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
    rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
    v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
    while( ct>0 ) {
      c=umin(ca,cb); cc+=c; ct=0;
      ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
      cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
      vp=v; if(intersect) v=va&&vb; else v=va||vb;
      if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
    }
    rleFree(&A);
  }
  rleInit(M,h,w,m,cnts); free(cnts);
}

void rleArea( const RLE *R, siz n, uint *a ) {
  for( siz i=0; i<n; i++ ) {
    a[i]=0; for( siz j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
}

void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
  siz g, d; BB db, gb; bool crowd;
  db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
  gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
  bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
  for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
    crowd=iscrowd!=NULL && iscrowd[g];
    if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
    siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb;
    ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
    cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
    while( ct>0 ) {
      c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
      ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
      cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
    }
    if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
    o[g*m+d] = (double)i/(double)u;
  }
}

void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
  double h, w, i, u, ga, da; siz g, d; bool crowd;
  for( g=0; g<n; g++ ) {
    BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
    for( d=0; d<m; d++ ) {
      BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
      w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
      h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
      i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
    }
  }
}

void rleToBbox( const RLE *R, BB bb, siz n ) {
  for( siz i=0; i<n; i++ ) {
    uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
    h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
    m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
    if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
    for( j=0; j<m; j++ ) {
      cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
      xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
    }
    bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
    bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
  }
}

void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
  for( siz i=0; i<n; i++ ) {
    double xs=bb[4*i+0], xe=xs+bb[4*i+2];
    double ys=bb[4*i+1], ye=ys+bb[4*i+3];
    double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
    rleFrPoly( R+i, xy, 4, h, w );
  }
}

int uintCompare(const void *a, const void *b) {
  uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
}

void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
  // upsample and get discrete points densely along entire boundary
  siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
  x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
  for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
  for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
  for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
  u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
  for( j=0; j<k; j++ ) {
    int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t;
    bool flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
    flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
    if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
    s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
    if(dx>=dy) for( int d=0; d<=dx; d++ ) {
      t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
    } else for( int d=0; d<=dy; d++ ) {
      t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
    }
  }
  // get points along y-boundary and downsample
  free(x); free(y); k=m; m=0; double xd, yd;
  x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
  for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
    xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
    if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
    yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
    if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
    x[m]=(int) xd; y[m]=(int) yd; m++;
  }
  // compute rle encoding given y-boundary points
  k=m; a=malloc(sizeof(uint)*(k+1));
  for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
  a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
  qsort(a,k,sizeof(uint),uintCompare); uint p=0;
  for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
  b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
  while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
    j++; if(j<k) b[m-1]+=a[j++]; }
  rleInit(R,h,w,m,b); free(a); free(b);
}

char* rleToString( const RLE *R ) {
  // Similar to LEB128 but using 6 bits/char and ascii chars 48-111.
  siz i, m=R->m, p=0; long x; bool more;
  char *s=malloc(sizeof(char)*m*6);
  for( i=0; i<m; i++ ) {
    x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
    while( more ) {
      char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
      if(more) c |= 0x20; c+=48; s[p++]=c;
    }
  }
  s[p]=0; return s;
}

void rleFrString( RLE *R, char *s, siz h, siz w ) {
  siz m=0, p=0, k; long x; bool more; uint *cnts;
  while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
  while( s[p] ) {
    x=0; k=0; more=1;
    while( more ) {
      char c=s[p]-48; x |= (c & 0x1f) << 5*k;
      more = c & 0x20; p++; k++;
      if(!more && (c & 0x10)) x |= -1 << 5*k;
    }
    if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
  }
  rleInit(R,h,w,m,cnts); free(cnts);
}


================================================
FILE: lib/pycocotools/maskApi.h
================================================
/**************************************************************************
* Microsoft COCO Toolbox.      version 2.0
* Data, paper, and tutorials available at:  http://mscoco.org/
* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
* Licensed under the Simplified BSD License [see coco/license.txt]
**************************************************************************/
#pragma once
#include <stdbool.h>

typedef unsigned int uint;
typedef unsigned long siz;
typedef unsigned char byte;
typedef double* BB;
typedef struct { siz h, w, m; uint *cnts; } RLE;

// Initialize/destroy RLE.
void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
void rleFree( RLE *R );

// Initialize/destroy RLE array.
void rlesInit( RLE **R, siz n );
void rlesFree( RLE **R, siz n );

// Encode binary masks using RLE.
void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );

// Decode binary masks encoded via RLE.
void rleDecode( const RLE *R, byte *mask, siz n );

// Compute union or intersection of encoded masks.
void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );

// Compute area of encoded masks.
void rleArea( const RLE *R, siz n, uint *a );

// Compute intersection over union between masks.
void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );

// Compute intersection over union between bounding boxes.
void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );

// Get bounding boxes surrounding encoded masks.
void rleToBbox( const RLE *R, BB bb, siz n );

// Convert bounding boxes to encoded masks.
void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );

// Convert polygon to encoded mask.
void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );

// Get compressed string representation of encoded mask.
char* rleToString( const RLE *R );

// Convert from compressed string representation of encoded mask.
void rleFrString( RLE *R, char *s, siz h, siz w );


================================================
FILE: lib/roi_data_layer/__init__.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------


================================================
FILE: lib/roi_data_layer/layer.py
================================================
# --------------------------------------------------------
# Fast R-CNN with OHEM
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Abhinav Shrivastava
# --------------------------------------------------------

"""
The data layer used during training to train a Fast R-CNN network.
RoIDataLayer implements a Caffe Python layer.
"""

import caffe
from fast_rcnn.config import cfg
from roi_data_layer.minibatch import get_minibatch, get_allrois_minibatch, get_ohem_minibatch
import numpy as np
import yaml
from multiprocessing import Process, Queue

class RoIDataLayer(caffe.Layer):
    """Fast R-CNN data layer used for training."""

    def _shuffle_roidb_inds(self):
        """Randomly permute the training roidb."""
        if cfg.TRAIN.ASPECT_GROUPING:
            widths = np.array([r['width'] for r in self._roidb])
            heights = np.array([r['height'] for r in self._roidb])
            horz = (widths >= heights)
            vert = np.logical_not(horz)
            horz_inds = np.where(horz)[0]
            vert_inds = np.where(vert)[0]
            inds = np.hstack((
                np.random.permutation(horz_inds),
                np.random.permutation(vert_inds)))
            inds = np.reshape(inds, (-1, 2))
            row_perm = np.random.permutation(np.arange(inds.shape[0]))
            inds = np.reshape(inds[row_perm, :], (-1,))
            self._perm = inds
        else:
            self._perm = np.random.permutation(np.arange(len(self._roidb)))
        self._cur = 0

    def _get_next_minibatch_inds(self):
        """Return the roidb indices for the next minibatch."""
        if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
            self._shuffle_roidb_inds()

        db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
        self._cur += cfg.TRAIN.IMS_PER_BATCH
        return db_inds

    def _get_next_minibatch(self):
        """Return the blobs to be used for the next minibatch.

        If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
        separate process and made available through self._blob_queue.
        """
        if cfg.TRAIN.USE_PREFETCH:
            return self._blob_queue.get()
        else:
            db_inds = self._get_next_minibatch_inds()
            minibatch_db = [self._roidb[i] for i in db_inds]
            if cfg.TRAIN.USE_OHEM:
                blobs = get_allrois_minibatch(minibatch_db, self._num_classes)
            else:
                blobs = get_minibatch(minibatch_db, self._num_classes)

            return blobs

    def set_roidb(self, roidb):
        """Set the roidb to be used by this layer during training."""
        self._roidb = roidb
        self._shuffle_roidb_inds()
        if cfg.TRAIN.USE_PREFETCH:
            self._blob_queue = Queue(10)
            self._prefetch_process = BlobFetcher(self._blob_queue,
                                                 self._roidb,
                                                 self._num_classes)
            self._prefetch_process.start()
            # Terminate the child process when the parent exists
            def cleanup():
                print 'Terminating BlobFetcher'
                self._prefetch_process.terminate()
                self._prefetch_process.join()
            import atexit
            atexit.register(cleanup)

    def setup(self, bottom, top):
        """Setup the RoIDataLayer."""

        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)

        self._num_classes = layer_params['num_classes']

        self._name_to_top_map = {}

        # data blob: holds a batch of N images, each with 3 channels
        idx = 0
        top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
            max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
        self._name_to_top_map['data'] = idx
        idx += 1

        if cfg.TRAIN.HAS_RPN:
            top[idx].reshape(1, 3)
            self._name_to_top_map['im_info'] = idx
            idx += 1

            top[idx].reshape(1, 4)
            self._name_to_top_map['gt_boxes'] = idx
            idx += 1
        else: # not using RPN
            # rois blob: holds R regions of interest, each is a 5-tuple
            # (n, x1, y1, x2, y2) specifying an image batch index n and a
            # rectangle (x1, y1, x2, y2)
            top[idx].reshape(1, 5)
            self._name_to_top_map['rois'] = idx
            idx += 1

            # labels blob: R categorical labels in [0, ..., K] for K foreground
            # classes plus background
            top[idx].reshape(1)
            self._name_to_top_map['labels'] = idx
            idx += 1

            if cfg.TRAIN.BBOX_REG:
                # bbox_targets blob: R bounding-box regression targets with 4
                # targets per class
                top[idx].reshape(1, self._num_classes * 4)
                self._name_to_top_map['bbox_targets'] = idx
                idx += 1

                # bbox_inside_weights blob: At most 4 targets per roi are active;
                # thisbinary vector sepcifies the subset of active targets
                top[idx].reshape(1, self._num_classes * 4)
                self._name_to_top_map['bbox_inside_weights'] = idx
                idx += 1

                top[idx].reshape(1, self._num_classes * 4)
                self._name_to_top_map['bbox_outside_weights'] = idx
                idx += 1

        print 'RoiDataLayer: name_to_top:', self._name_to_top_map
        assert len(top) == len(self._name_to_top_map)

    def forward(self, bottom, top):
        """Get blobs and copy them into this layer's top blob vector."""
        blobs = self._get_next_minibatch()

        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

class OHEMDataLayer(caffe.Layer):
    """Online Hard-example Mining Layer."""
    def setup(self, bottom, top):
        """Setup the OHEMDataLayer."""

        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)

        self._num_classes = layer_params['num_classes']

        self._name_to_bottom_map = {
            'cls_prob_readonly': 0,
            'bbox_pred_readonly': 1,
            'rois': 2,
            'labels': 3}

        if cfg.TRAIN.BBOX_REG:
            self._name_to_bottom_map['bbox_targets'] = 4
            self._name_to_bottom_map['bbox_loss_weights'] = 5

        self._name_to_top_map = {}

        # assert cfg.TRAIN.HAS_RPN == False
        # data blob: holds a batch of N images, each with 3 channels
        idx = 0
        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[idx].reshape(1, 5)
        self._name_to_top_map['rois_hard'] = idx
        idx += 1

        # labels blob: R categorical labels in [0, ..., K] for K foreground
        # classes plus background
        top[idx].reshape(1)
        self._name_to_top_map['labels_hard'] = idx
        idx += 1

        if cfg.TRAIN.BBOX_REG:
            # bbox_targets blob: R bounding-box regression targets with 4
            # targets per class
            top[idx].reshape(1, self._num_classes * 4)
            self._name_to_top_map['bbox_targets_hard'] = idx
            idx += 1

            # bbox_inside_weights blob: At most 4 targets per roi are active;
            # thisbinary vector sepcifies the subset of active targets
            top[idx].reshape(1, self._num_classes * 4)
            self._name_to_top_map['bbox_inside_weights_hard'] = idx
            idx += 1

            top[idx].reshape(1, self._num_classes * 4)
            self._name_to_top_map['bbox_outside_weights_hard'] = idx
            idx += 1

        print 'OHEMDataLayer: name_to_top:', self._name_to_top_map
        assert len(top) == len(self._name_to_top_map)

    def forward(self, bottom, top):
        """Compute loss, select RoIs using OHEM. Use RoIs to get blobs and copy them into this layer's top blob vector."""

        cls_prob = bottom[0].data
        bbox_pred = bottom[1].data
        rois = bottom[2].data
        labels = bottom[3].data
        if cfg.TRAIN.BBOX_REG:
            bbox_target = bottom[4].data
            bbox_inside_weights = bottom[5].data
            bbox_outside_weights = bottom[6].data
        else:
            bbox_target = None
            bbox_inside_weights = None
            bbox_outside_weights = None

        flt_min = np.finfo(float).eps
        # classification loss
        loss = [ -1 * np.log(max(x, flt_min)) \
            for x in [cls_prob[i,label] for i, label in enumerate(labels)]]

        if cfg.TRAIN.BBOX_REG:
            # bounding-box regression loss
            # d := w * (b0 - b1)
            # smoothL1(x) = 0.5 * x^2    if |x| < 1
            #               |x| - 0.5    otherwise
            def smoothL1(x):
                if abs(x) < 1:
                    return 0.5 * x * x
                else:
                    return abs(x) - 0.5

            bbox_loss = np.zeros(labels.shape[0])
            for i in np.where(labels > 0 )[0]:
                indices = np.where(bbox_inside_weights[i,:] != 0)[0]
                bbox_loss[i] = sum(bbox_outside_weights[i,indices] * [smoothL1(x) \
                    for x in bbox_inside_weights[i,indices] * (bbox_pred[i,indices] - bbox_target[i,indices])])
            loss += bbox_loss

        blobs = get_ohem_minibatch(loss, rois, labels, bbox_target, \
            bbox_inside_weights, bbox_outside_weights)

        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

class BlobFetcher(Process):
    """Experimental class for prefetching blobs in a separate process."""
    def __init__(self, queue, roidb, num_classes):
        super(BlobFetcher, self).__init__()
        self._queue = queue
        self._roidb = roidb
        self._num_classes = num_classes
        self._perm = None
        self._cur = 0
        self._shuffle_roidb_inds()
        # fix the random seed for reproducibility
        np.random.seed(cfg.RNG_SEED)

    def _shuffle_roidb_inds(self):
        """Randomly permute the training roidb."""
        # TODO(rbg): remove duplicated code
        self._perm = np.random.permutation(np.arange(len(self._roidb)))
        self._cur = 0

    def _get_next_minibatch_inds(self):
        """Return the roidb indices for the next minibatch."""
        # TODO(rbg): remove duplicated code
        if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
            self._shuffle_roidb_inds()

        db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
        self._cur += cfg.TRAIN.IMS_PER_BATCH
        return db_inds

    def run(self):
        print 'BlobFetcher started'
        while True:
            db_inds = self._get_next_minibatch_inds()
            minibatch_db = [self._roidb[i] for i in db_inds]
            if cfg.TRAIN.USE_OHEM:
                blobs = get_allrois_minibatch(minibatch_db, self._num_classes)
            else:
                blobs = get_minibatch(minibatch_db, self._num_classes)
            self._queue.put(blobs)


================================================
FILE: lib/roi_data_layer/minibatch.py
================================================
# --------------------------------------------------------
# Fast R-CNN with OHEM
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Abhinav Shrivastava
# --------------------------------------------------------

"""Compute minibatch blobs for training a Fast R-CNN network."""

import numpy as np
import numpy.random as npr
import cv2
from fast_rcnn.config import cfg
from utils.blob import prep_im_for_blob, im_list_to_blob
from fast_rcnn.nms_wrapper import nms

def get_minibatch(roidb, num_classes):
    """Given a roidb, construct a minibatch sampled from it."""
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
                                    size=num_images)
    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0) or cfg.TRAIN.USE_OHEM, \
        'num_images ({}) must divide BATCH_SIZE ({})'. \
        format(num_images, cfg.TRAIN.BATCH_SIZE)
    rois_per_image = np.inf if cfg.TRAIN.USE_OHEM else cfg.TRAIN.BATCH_SIZE / num_images
    fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)

    # Get the input image blob, formatted for caffe
    im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)

    blobs = {'data': im_blob}

    if cfg.TRAIN.HAS_RPN:
        assert len(im_scales) == 1, "Single batch only"
        assert len(roidb) == 1, "Single batch only"
        # gt boxes: (x1, y1, x2, y2, cls)
        gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
        gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
        blobs['gt_boxes'] = gt_boxes
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)
    else: # not using RPN
        # Now, build the region of interest and label blobs
        rois_blob = np.zeros((0, 5), dtype=np.float32)
        labels_blob = np.zeros((0), dtype=np.float32)
        bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
        bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
        # all_overlaps = []
        for im_i in xrange(num_images):
            labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
                = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
                               num_classes)

            # Add to RoIs blob
            rois = _project_im_rois(im_rois, im_scales[im_i])
            batch_ind = im_i * np.ones((rois.shape[0], 1))
            rois_blob_this_image = np.hstack((batch_ind, rois))
            rois_blob = np.vstack((rois_blob, rois_blob_this_image))

            # Add to labels, bbox targets, and bbox loss blobs
            labels_blob = np.hstack((labels_blob, labels))
            bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
            bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
            # all_overlaps = np.hstack((all_overlaps, overlaps))

        # For debug visualizations
        # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)

        blobs['rois'] = rois_blob
        blobs['labels'] = labels_blob

        if cfg.TRAIN.BBOX_REG:
            blobs['bbox_targets'] = bbox_targets_blob
            blobs['bbox_inside_weights'] = bbox_inside_blob
            blobs['bbox_outside_weights'] = \
                np.array(bbox_inside_blob > 0).astype(np.float32)

    return blobs

def get_allrois_minibatch(roidb, num_classes):
    """Given a roidb, construct a minibatch sampled from it."""
    num_images = len(roidb)
    # Sample random scales to use for each image in this batch
    random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
                                    size=num_images)
    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
        'num_images ({}) must divide BATCH_SIZE ({})'. \
        format(num_images, cfg.TRAIN.BATCH_SIZE)

    # Get the input image blob, formatted for caffe
    im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)

    blobs = {'data': im_blob}

    if cfg.TRAIN.HAS_RPN:
        # Doesn't support RPN yet.
        # assert False
        assert len(im_scales) == 1, "Single batch only"
        assert len(roidb) == 1, "Single batch only"
        # gt boxes: (x1, y1, x2, y2, cls)
        gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
        gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
        blobs['gt_boxes'] = gt_boxes
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)
    else: # not using RPN
        # Now, build the region of interest and label blobs
        rois_blob = np.zeros((0, 5), dtype=np.float32)
        labels_blob = np.zeros((0), dtype=np.float32)
        bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
        bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)

        for im_i in xrange(num_images):
            labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
                = _all_rois(roidb[im_i], num_classes)

            # Add to RoIs blob
            rois = _project_im_rois(im_rois, im_scales[im_i])
            batch_ind = im_i * np.ones((rois.shape[0], 1))
            rois_blob_this_image = np.hstack((batch_ind, rois))
            rois_blob = np.vstack((rois_blob, rois_blob_this_image))

            # Add to labels, bbox targets, and bbox loss blobs
            labels_blob = np.hstack((labels_blob, labels))
            bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
            bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))

        blobs['rois'] = rois_blob
        blobs['labels'] = labels_blob

        if cfg.TRAIN.BBOX_REG:
            blobs['bbox_targets'] = bbox_targets_blob
            blobs['bbox_inside_weights'] = bbox_inside_blob
            blobs['bbox_outside_weights'] = \
                np.array(bbox_inside_blob > 0).astype(np.float32)

    return blobs


def get_ohem_minibatch(loss, rois, labels, bbox_targets=None,
                       bbox_inside_weights=None, bbox_outside_weights=None):
    """Given rois and their loss, construct a minibatch using OHEM."""
    loss = np.array(loss)

    if cfg.TRAIN.OHEM_USE_NMS:
        # Do NMS using loss for de-dup and diversity
        keep_inds = []
        nms_thresh = cfg.TRAIN.OHEM_NMS_THRESH
        source_img_ids = [roi[0] for roi in rois]
        for img_id in np.unique(source_img_ids):
            for label in np.unique(labels):
                sel_indx = np.where(np.logical_and(labels == label, \
                                    source_img_ids == img_id))[0]
                if not len(sel_indx):
                    continue
                boxes = np.concatenate((rois[sel_indx, 1:],
                        loss[sel_indx][:,np.newaxis]), axis=1).astype(np.float32)
                keep_inds.extend(sel_indx[nms(boxes, nms_thresh)])

        hard_keep_inds = select_hard_examples(loss[keep_inds])
        hard_inds = np.array(keep_inds)[hard_keep_inds]
    else:
        hard_inds = select_hard_examples(loss)

    blobs = {'rois_hard': rois[hard_inds, :].copy(),
             'labels_hard': labels[hard_inds].copy()}
    if bbox_targets is not None:
        assert cfg.TRAIN.BBOX_REG
        blobs['bbox_targets_hard'] = bbox_targets[hard_inds, :].copy()
        blobs['bbox_inside_weights_hard'] = bbox_inside_weights[hard_inds, :].copy()
        blobs['bbox_outside_weights_hard'] = bbox_outside_weights[hard_inds, :].copy()

    return blobs

def select_hard_examples(loss):
    """Select hard rois."""
    # Sort and select top hard examples.
    sorted_indices = np.argsort(loss)[::-1]
    hard_keep_inds = sorted_indices[0:np.minimum(len(loss), cfg.TRAIN.BATCH_SIZE)]
    # (explore more ways of selecting examples in this function; e.g., sampling)

    return hard_keep_inds

def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # label = class RoI has max overlap with
    labels = roidb['max_classes']
    overlaps = roidb['max_overlaps']
    rois = roidb['boxes']

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(
                fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of backgrou
Download .txt
gitextract_z_8jtbwf/

├── .gitignore
├── LICENSE
├── README.md
├── data/
│   ├── .gitignore
│   ├── README.md
│   ├── pylintrc
│   └── scripts/
│       ├── fetch_faster_rcnn_models.sh
│       ├── fetch_imagenet_models.sh
│       └── fetch_selective_search_data.sh
├── experiments/
│   ├── README.md
│   ├── cfgs/
│   │   ├── faster_rcnn_alt_opt.yml
│   │   ├── faster_rcnn_end2end.yml
│   │   └── faster_rcnn_end2end_ohem.yml
│   ├── logs/
│   │   └── .gitignore
│   └── scripts/
│       ├── fast_rcnn.sh
│       ├── faster_rcnn_alt_opt.sh
│       ├── faster_rcnn_end2end.sh
│       ├── test_resnet101_bn_scale_merged_0712_end2end.sh
│       ├── test_resnet101_bn_scale_merged_0712_end2end_ohem.sh
│       ├── train_resnet101_bn_scale_merged_0712_end2end.sh
│       └── train_resnet101_bn_scale_merged_0712_end2end_ohem.sh
├── lib/
│   ├── Makefile
│   ├── datasets/
│   │   ├── VOCdevkit-matlab-wrapper/
│   │   │   ├── get_voc_opts.m
│   │   │   ├── voc_eval.m
│   │   │   └── xVOCap.m
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── ds_utils.py
│   │   ├── factory.py
│   │   ├── imdb.py
│   │   ├── pascal_voc.py
│   │   ├── tools/
│   │   │   └── mcg_munge.py
│   │   └── voc_eval.py
│   ├── fast_rcnn/
│   │   ├── __init__.py
│   │   ├── bbox_transform.py
│   │   ├── config.py
│   │   ├── nms_wrapper.py
│   │   ├── test.py
│   │   └── train.py
│   ├── nms/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── cpu_nms.pyx
│   │   ├── gpu_nms.hpp
│   │   ├── gpu_nms.pyx
│   │   ├── nms_kernel.cu
│   │   └── py_cpu_nms.py
│   ├── pycocotools/
│   │   ├── UPSTREAM_REV
│   │   ├── __init__.py
│   │   ├── _mask.pyx
│   │   ├── coco.py
│   │   ├── cocoeval.py
│   │   ├── license.txt
│   │   ├── mask.py
│   │   ├── maskApi.c
│   │   └── maskApi.h
│   ├── roi_data_layer/
│   │   ├── __init__.py
│   │   ├── layer.py
│   │   ├── minibatch.py
│   │   └── roidb.py
│   ├── rpn/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── anchor_target_layer.py
│   │   ├── generate.py
│   │   ├── generate_anchors.py
│   │   ├── proposal_layer.py
│   │   └── proposal_target_layer.py
│   ├── setup.py
│   ├── transform/
│   │   ├── __init__.py
│   │   └── torch_image_transform_layer.py
│   └── utils/
│       ├── .gitignore
│       ├── __init__.py
│       ├── bbox.pyx
│       ├── blob.py
│       └── timer.py
├── models/
│   ├── README.md
│   ├── coco/
│   │   ├── VGG16/
│   │   │   ├── fast_rcnn/
│   │   │   │   ├── solver.prototxt
│   │   │   │   ├── test.prototxt
│   │   │   │   └── train.prototxt
│   │   │   └── faster_rcnn_end2end/
│   │   │       ├── solver.prototxt
│   │   │       ├── test.prototxt
│   │   │       └── train.prototxt
│   │   └── VGG_CNN_M_1024/
│   │       ├── fast_rcnn/
│   │       │   ├── solver.prototxt
│   │       │   ├── test.prototxt
│   │       │   └── train.prototxt
│   │       └── faster_rcnn_end2end/
│   │           ├── solver.prototxt
│   │           ├── test.prototxt
│   │           └── train.prototxt
│   └── pascal_voc/
│       ├── ResNet-50/
│       │   ├── faster_rcnn_BN_SCALE_Merged/
│       │   │   ├── faster_rcnn_alt_opt/
│       │   │   │   ├── faster_rcnn_test.pt
│       │   │   │   ├── rpn_test.pt
│       │   │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   │   ├── stage1_rpn_train.pt
│       │   │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   │   └── stage2_rpn_train.pt
│       │   │   └── faster_rcnn_end2end/
│       │   │       ├── solver.prototxt
│       │   │       ├── test.prototxt
│       │   │       └── train.prototxt
│       │   ├── faster_rcnn_BN_SCALE_Merged_OHEM/
│       │   │   └── faster_rcnn_end2end/
│       │   │       ├── solver.prototxt
│       │   │       ├── test.prototxt
│       │   │       └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   ├── rfcn_alt_opt_5step_ohem/
│       │   │   ├── rfcn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_rfcn_ohem_solver80k120k.pt
│       │   │   ├── stage1_rfcn_ohem_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_rfcn_ohem_solver80k120k.pt
│       │   │   ├── stage2_rfcn_ohem_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   ├── stage2_rpn_train.pt
│       │   │   ├── stage3_rpn_solver60k80k.pt
│       │   │   └── stage3_rpn_train.pt
│       │   └── rfcn_end2end/
│       │       ├── class-aware/
│       │       │   ├── test.prototxt
│       │       │   └── train_ohem.prototxt
│       │       ├── solver.prototxt
│       │       ├── solver_ohem.prototxt
│       │       ├── test_agnostic.prototxt
│       │       ├── train_agnostic.prototxt
│       │       └── train_agnostic_ohem.prototxt
│       ├── ResNet101/
│       │   └── faster_rcnn_alt_opt/
│       │       ├── faster_rcnn_test.pt
│       │       ├── rpn_test.pt
│       │       ├── stage1_fast_rcnn_solver30k40k.pt
│       │       ├── stage1_fast_rcnn_train.pt
│       │       ├── stage1_rpn_solver60k80k.pt
│       │       ├── stage1_rpn_train.pt
│       │       ├── stage2_fast_rcnn_solver30k40k.pt
│       │       ├── stage2_fast_rcnn_train.pt
│       │       ├── stage2_rpn_solver60k80k.pt
│       │       └── stage2_rpn_train.pt
│       ├── ResNet101_BN_SCALE_Merged/
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       ├── ResNet101_BN_SCALE_Merged_OHEM/
│       │   └── faster_rcnn_end2end_ohem/
│       │       ├── solver.prototxt
│       │       └── train.prototxt
│       ├── VGG16/
│       │   ├── fast_rcnn/
│       │   │   ├── solver.prototxt
│       │   │   ├── test.prototxt
│       │   │   └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       ├── VGG_CNN_M_1024/
│       │   ├── fast_rcnn/
│       │   │   ├── solver.prototxt
│       │   │   ├── test.prototxt
│       │   │   └── train.prototxt
│       │   ├── faster_rcnn_alt_opt/
│       │   │   ├── faster_rcnn_test.pt
│       │   │   ├── rpn_test.pt
│       │   │   ├── stage1_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage1_fast_rcnn_train.pt
│       │   │   ├── stage1_rpn_solver60k80k.pt
│       │   │   ├── stage1_rpn_train.pt
│       │   │   ├── stage2_fast_rcnn_solver30k40k.pt
│       │   │   ├── stage2_fast_rcnn_train.pt
│       │   │   ├── stage2_rpn_solver60k80k.pt
│       │   │   └── stage2_rpn_train.pt
│       │   └── faster_rcnn_end2end/
│       │       ├── solver.prototxt
│       │       ├── test.prototxt
│       │       └── train.prototxt
│       └── ZF/
│           ├── fast_rcnn/
│           │   ├── solver.prototxt
│           │   ├── test.prototxt
│           │   └── train.prototxt
│           ├── faster_rcnn_alt_opt/
│           │   ├── faster_rcnn_test.pt
│           │   ├── rpn_test.pt
│           │   ├── stage1_fast_rcnn_solver30k40k.pt
│           │   ├── stage1_fast_rcnn_train.pt
│           │   ├── stage1_rpn_solver60k80k.pt
│           │   ├── stage1_rpn_train.pt
│           │   ├── stage2_fast_rcnn_solver30k40k.pt
│           │   ├── stage2_fast_rcnn_train.pt
│           │   ├── stage2_rpn_solver60k80k.pt
│           │   └── stage2_rpn_train.pt
│           └── faster_rcnn_end2end/
│               ├── solver.prototxt
│               ├── test.prototxt
│               └── train.prototxt
└── tools/
    ├── README.md
    ├── _init_paths.py
    ├── compress_net.py
    ├── demo.py
    ├── eval_recall.py
    ├── merge_bn_scale.py
    ├── reval.py
    ├── rpn_generate.py
    ├── test_net.py
    ├── train_faster_rcnn_alt_opt.py
    ├── train_net.py
    └── train_svms.py
Download .txt
SYMBOL INDEX (258 symbols across 39 files)

FILE: lib/datasets/coco.py
  function _filter_crowd_proposals (line 24) | def _filter_crowd_proposals(roidb, crowd_thresh):
  class coco (line 45) | class coco(imdb):
    method __init__ (line 46) | def __init__(self, image_set, year):
    method _get_ann_file (line 85) | def _get_ann_file(self):
    method _load_image_set_index (line 91) | def _load_image_set_index(self):
    method _get_widths (line 98) | def _get_widths(self):
    method image_path_at (line 103) | def image_path_at(self, i):
    method image_path_from_index (line 109) | def image_path_from_index(self, index):
    method selective_search_roidb (line 123) | def selective_search_roidb(self):
    method edge_boxes_roidb (line 126) | def edge_boxes_roidb(self):
    method mcg_roidb (line 129) | def mcg_roidb(self):
    method _roidb_from_proposals (line 132) | def _roidb_from_proposals(self, method):
    method _load_proposals (line 161) | def _load_proposals(self, method, gt_roidb):
    method gt_roidb (line 208) | def gt_roidb(self):
    method _load_coco_annotation (line 228) | def _load_coco_annotation(self, index):
    method _get_box_file (line 284) | def _get_box_file(self, index):
    method _print_detection_eval_metrics (line 291) | def _print_detection_eval_metrics(self, coco_eval):
    method _do_detection_eval (line 323) | def _do_detection_eval(self, res_file, output_dir):
    method _coco_results_one_category (line 336) | def _coco_results_one_category(self, boxes, cat_id):
    method _write_coco_results_file (line 354) | def _write_coco_results_file(self, all_boxes, res_file):
    method evaluate_detections (line 372) | def evaluate_detections(self, all_boxes, output_dir):
    method competition_mode (line 388) | def competition_mode(self, on):

FILE: lib/datasets/ds_utils.py
  function unique_boxes (line 9) | def unique_boxes(boxes, scale=1.0):
  function xywh_to_xyxy (line 16) | def xywh_to_xyxy(boxes):
  function xyxy_to_xywh (line 20) | def xyxy_to_xywh(boxes):
  function validate_boxes (line 24) | def validate_boxes(boxes, width=0, height=0):
  function filter_small_boxes (line 37) | def filter_small_boxes(boxes, min_size):

FILE: lib/datasets/factory.py
  function get_imdb (line 34) | def get_imdb(name):
  function list_imdbs (line 40) | def list_imdbs():

FILE: lib/datasets/imdb.py
  class imdb (line 16) | class imdb(object):
    method __init__ (line 19) | def __init__(self, name):
    method name (line 31) | def name(self):
    method num_classes (line 35) | def num_classes(self):
    method classes (line 39) | def classes(self):
    method image_index (line 43) | def image_index(self):
    method roidb_handler (line 47) | def roidb_handler(self):
    method roidb_handler (line 51) | def roidb_handler(self, val):
    method set_proposal_method (line 54) | def set_proposal_method(self, method):
    method roidb (line 59) | def roidb(self):
    method cache_path (line 71) | def cache_path(self):
    method num_images (line 78) | def num_images(self):
    method image_path_at (line 81) | def image_path_at(self, i):
    method default_roidb (line 84) | def default_roidb(self):
    method evaluate_detections (line 87) | def evaluate_detections(self, all_boxes, output_dir=None):
    method _get_widths (line 98) | def _get_widths(self):
    method append_flipped_images (line 102) | def append_flipped_images(self):
    method evaluate_recall (line 119) | def evaluate_recall(self, candidate_boxes=None, thresholds=None,
    method create_roidb_from_box_list (line 209) | def create_roidb_from_box_list(self, box_list, gt_roidb):
    method merge_roidbs (line 239) | def merge_roidbs(a, b):
    method competition_mode (line 251) | def competition_mode(self, on):

FILE: lib/datasets/pascal_voc.py
  class pascal_voc (line 22) | class pascal_voc(imdb):
    method __init__ (line 23) | def __init__(self, image_set, year, devkit_path=None):
    method image_path_at (line 57) | def image_path_at(self, i):
    method image_path_from_index (line 63) | def image_path_from_index(self, index):
    method _load_image_set_index (line 73) | def _load_image_set_index(self):
    method _get_default_path (line 87) | def _get_default_path(self):
    method gt_roidb (line 93) | def gt_roidb(self):
    method selective_search_roidb (line 114) | def selective_search_roidb(self):
    method rpn_roidb (line 142) | def rpn_roidb(self):
    method _load_rpn_roidb (line 152) | def _load_rpn_roidb(self, gt_roidb):
    method _load_selective_search_roidb (line 161) | def _load_selective_search_roidb(self, gt_roidb):
    method _load_pascal_annotation (line 180) | def _load_pascal_annotation(self, index):
    method _get_comp_id (line 226) | def _get_comp_id(self):
    method _get_voc_results_file_template (line 231) | def _get_voc_results_file_template(self):
    method _write_voc_results_file (line 242) | def _write_voc_results_file(self, all_boxes):
    method _do_python_eval (line 260) | def _do_python_eval(self, output_dir = 'output'):
    method _do_matlab_eval (line 305) | def _do_matlab_eval(self, output_dir='output'):
    method evaluate_detections (line 320) | def evaluate_detections(self, all_boxes, output_dir):
    method competition_mode (line 332) | def competition_mode(self, on):

FILE: lib/datasets/tools/mcg_munge.py
  function munge (line 15) | def munge(src_dir):

FILE: lib/datasets/voc_eval.py
  function parse_rec (line 12) | def parse_rec(filename):
  function voc_ap (line 31) | def voc_ap(rec, prec, use_07_metric=False):
  function voc_eval (line 64) | def voc_eval(detpath,

FILE: lib/fast_rcnn/bbox_transform.py
  function bbox_transform (line 10) | def bbox_transform(ex_rois, gt_rois):
  function bbox_transform_inv (line 30) | def bbox_transform_inv(boxes, deltas):
  function clip_boxes (line 63) | def clip_boxes(boxes, im_shape):

FILE: lib/fast_rcnn/config.py
  function get_output_dir (line 215) | def get_output_dir(imdb, net=None):
  function _merge_a_into_b (line 229) | def _merge_a_into_b(a, b):
  function cfg_from_file (line 261) | def cfg_from_file(filename):
  function cfg_from_list (line 269) | def cfg_from_list(cfg_list):

FILE: lib/fast_rcnn/nms_wrapper.py
  function nms (line 12) | def nms(dets, thresh, force_cpu=False):

FILE: lib/fast_rcnn/test.py
  function _get_image_blob (line 22) | def _get_image_blob(im):
  function _get_rois_blob (line 58) | def _get_rois_blob(im_rois, im_scale_factors):
  function _project_im_rois (line 72) | def _project_im_rois(im_rois, scales):
  function _get_blobs (line 100) | def _get_blobs(im, rois):
  function im_detect (line 108) | def im_detect(net, im, boxes=None):
  function vis_detections (line 186) | def vis_detections(im, class_name, dets, thresh=0.3):
  function apply_nms (line 205) | def apply_nms(all_boxes, thresh):
  function test_net (line 227) | def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):

FILE: lib/fast_rcnn/train.py
  class SolverWrapper (line 20) | class SolverWrapper(object):
    method __init__ (line 26) | def __init__(self, solver_prototxt, roidb, output_dir,
    method snapshot (line 55) | def snapshot(self):
    method train_model (line 93) | def train_model(self, max_iters):
  function get_training_roidb (line 114) | def get_training_roidb(imdb):
  function filter_roidb (line 127) | def filter_roidb(roidb):
  function train_net (line 151) | def train_net(solver_prototxt, roidb, output_dir,

FILE: lib/nms/py_cpu_nms.py
  function py_cpu_nms (line 10) | def py_cpu_nms(dets, thresh):

FILE: lib/pycocotools/coco.py
  class COCO (line 61) | class COCO:
    method __init__ (line 62) | def __init__(self, annotation_file=None):
    method createIndex (line 84) | def createIndex(self):
    method info (line 122) | def info(self):
    method getAnnIds (line 130) | def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
    method getCatIds (line 159) | def getCatIds(self, catNms=[], supNms=[], catIds=[]):
    method getImgIds (line 181) | def getImgIds(self, imgIds=[], catIds=[]):
    method loadAnns (line 202) | def loadAnns(self, ids=[]):
    method loadCats (line 213) | def loadCats(self, ids=[]):
    method loadImgs (line 224) | def loadImgs(self, ids=[]):
    method showAnns (line 235) | def showAnns(self, anns):
    method loadRes (line 281) | def loadRes(self, resFile):
    method download (line 329) | def download( self, tarDir = None, imgIds = [] ):

FILE: lib/pycocotools/cocoeval.py
  class COCOeval (line 10) | class COCOeval:
    method __init__ (line 59) | def __init__(self, cocoGt=None, cocoDt=None):
    method _prepare (line 82) | def _prepare(self):
    method evaluate (line 129) | def evaluate(self):
    method computeIoU (line 163) | def computeIoU(self, imgId, catId):
    method evaluateImg (line 189) | def evaluateImg(self, imgId, catId, aRng, maxDet):
    method accumulate (line 274) | def accumulate(self, p = None):
    method summarize (line 376) | def summarize(self):
    method __str__ (line 428) | def __str__(self):
  class Params (line 431) | class Params:
    method __init__ (line 435) | def __init__(self):

FILE: lib/pycocotools/maskApi.c
  function uint (line 11) | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
  function uint (line 12) | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
  function rleInit (line 14) | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
  function rleFree (line 19) | void rleFree( RLE *R ) {
  function rlesInit (line 23) | void rlesInit( RLE **R, siz n ) {
  function rlesFree (line 28) | void rlesFree( RLE **R, siz n ) {
  function rleEncode (line 32) | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
  function rleDecode (line 43) | void rleDecode( const RLE *R, byte *M, siz n ) {
  function rleMerge (line 49) | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
  function rleArea (line 72) | void rleArea( const RLE *R, siz n, uint *a ) {
  function rleIou (line 77) | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
  function bbIou (line 98) | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
  function rleToBbox (line 111) | void rleToBbox( const RLE *R, BB bb, siz n ) {
  function rleFrBbox (line 126) | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
  function uintCompare (line 135) | int uintCompare(const void *a, const void *b) {
  function rleFrPoly (line 139) | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
  function rleFrString (line 195) | void rleFrString( RLE *R, char *s, siz h, siz w ) {

FILE: lib/pycocotools/maskApi.h
  type uint (line 10) | typedef unsigned int uint;
  type siz (line 11) | typedef unsigned long siz;
  type byte (line 12) | typedef unsigned char byte;
  type RLE (line 14) | typedef struct { siz h, w, m; uint *cnts; } RLE;

FILE: lib/roi_data_layer/layer.py
  class RoIDataLayer (line 19) | class RoIDataLayer(caffe.Layer):
    method _shuffle_roidb_inds (line 22) | def _shuffle_roidb_inds(self):
    method _get_next_minibatch_inds (line 42) | def _get_next_minibatch_inds(self):
    method _get_next_minibatch (line 51) | def _get_next_minibatch(self):
    method set_roidb (line 69) | def set_roidb(self, roidb):
    method setup (line 87) | def setup(self, bottom, top):
    method forward (line 146) | def forward(self, bottom, top):
    method backward (line 157) | def backward(self, top, propagate_down, bottom):
    method reshape (line 161) | def reshape(self, bottom, top):
  class OHEMDataLayer (line 165) | class OHEMDataLayer(caffe.Layer):
    method setup (line 167) | def setup(self, bottom, top):
    method forward (line 223) | def forward(self, bottom, top):
    method backward (line 272) | def backward(self, top, propagate_down, bottom):
    method reshape (line 276) | def reshape(self, bottom, top):
  class BlobFetcher (line 280) | class BlobFetcher(Process):
    method __init__ (line 282) | def __init__(self, queue, roidb, num_classes):
    method _shuffle_roidb_inds (line 293) | def _shuffle_roidb_inds(self):
    method _get_next_minibatch_inds (line 299) | def _get_next_minibatch_inds(self):
    method run (line 309) | def run(self):

FILE: lib/roi_data_layer/minibatch.py
  function get_minibatch (line 16) | def get_minibatch(roidb, num_classes):
  function get_allrois_minibatch (line 83) | def get_allrois_minibatch(roidb, num_classes):
  function get_ohem_minibatch (line 146) | def get_ohem_minibatch(loss, rois, labels, bbox_targets=None,
  function select_hard_examples (line 181) | def select_hard_examples(loss):
  function _sample_rois (line 190) | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
  function _all_rois (line 235) | def _all_rois(roidb, num_classes):
  function _get_image_blob (line 267) | def _get_image_blob(roidb, scale_inds):
  function _project_im_rois (line 289) | def _project_im_rois(im_rois, im_scale_factor):
  function _get_bbox_regression_labels (line 294) | def _get_bbox_regression_labels(bbox_target_data, num_classes):
  function _vis_minibatch (line 318) | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):

FILE: lib/roi_data_layer/roidb.py
  function prepare_roidb (line 16) | def prepare_roidb(imdb):
  function add_bbox_regression_targets (line 46) | def add_bbox_regression_targets(roidb):
  function _compute_targets (line 109) | def _compute_targets(rois, overlaps, labels):

FILE: lib/rpn/anchor_target_layer.py
  class AnchorTargetLayer (line 20) | class AnchorTargetLayer(caffe.Layer):
    method setup (line 26) | def setup(self, bottom, top):
    method forward (line 65) | def forward(self, bottom, top):
    method backward (line 251) | def backward(self, top, propagate_down, bottom):
    method reshape (line 255) | def reshape(self, bottom, top):
  function _unmap (line 260) | def _unmap(data, count, inds, fill=0):
  function _compute_targets (line 274) | def _compute_targets(ex_rois, gt_rois):

FILE: lib/rpn/generate.py
  function _vis_proposals (line 14) | def _vis_proposals(im, dets, thresh=0.5):
  function _get_image_blob (line 47) | def _get_image_blob(im):
  function im_proposals (line 84) | def im_proposals(net, im):
  function imdb_proposals (line 99) | def imdb_proposals(net, imdb):

FILE: lib/rpn/generate_anchors.py
  function generate_anchors (line 37) | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
  function _whctrs (line 50) | def _whctrs(anchor):
  function _mkanchors (line 61) | def _mkanchors(ws, hs, x_ctr, y_ctr):
  function _ratio_enum (line 75) | def _ratio_enum(anchor, ratios):
  function _scale_enum (line 88) | def _scale_enum(anchor, scales):

FILE: lib/rpn/proposal_layer.py
  class ProposalLayer (line 18) | class ProposalLayer(caffe.Layer):
    method setup (line 24) | def setup(self, bottom, top):
    method forward (line 47) | def forward(self, bottom, top):
    method backward (line 163) | def backward(self, top, propagate_down, bottom):
    method reshape (line 167) | def reshape(self, bottom, top):
  function _filter_boxes (line 171) | def _filter_boxes(boxes, min_size):

FILE: lib/rpn/proposal_target_layer.py
  class ProposalTargetLayer (line 18) | class ProposalTargetLayer(caffe.Layer):
    method setup (line 24) | def setup(self, bottom, top):
    method forward (line 39) | def forward(self, bottom, top):
    method backward (line 98) | def backward(self, top, propagate_down, bottom):
    method reshape (line 102) | def reshape(self, bottom, top):
  function _get_bbox_regression_labels (line 107) | def _get_bbox_regression_labels(bbox_target_data, num_classes):
  function _compute_targets (line 132) | def _compute_targets(ex_rois, gt_rois, labels):
  function _sample_rois (line 147) | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, ...

FILE: lib/setup.py
  function find_in_path (line 16) | def find_in_path(name, path):
  function locate_cuda (line 27) | def locate_cuda():
  function customize_compiler_for_nvcc (line 67) | def customize_compiler_for_nvcc(self):
  class custom_build_ext (line 106) | class custom_build_ext(build_ext):
    method build_extensions (line 107) | def build_extensions(self):

FILE: lib/transform/torch_image_transform_layer.py
  class TorchImageTransformLayer (line 27) | class TorchImageTransformLayer(caffe.Layer):
    method setup (line 28) | def setup(self, bottom, top):
    method forward (line 45) | def forward(self, bottom, top):
    method backward (line 58) | def backward(self, top, propagate_down, bottom):
    method reshape (line 62) | def reshape(self, bottom, top):

FILE: lib/utils/blob.py
  function im_list_to_blob (line 13) | def im_list_to_blob(ims):
  function prep_im_for_blob (line 31) | def prep_im_for_blob(im, pixel_means, target_size, max_size):

FILE: lib/utils/timer.py
  class Timer (line 10) | class Timer(object):
    method __init__ (line 12) | def __init__(self):
    method tic (line 19) | def tic(self):
    method toc (line 24) | def toc(self, average=True):

FILE: tools/_init_paths.py
  function add_path (line 13) | def add_path(path):

FILE: tools/compress_net.py
  function parse_args (line 18) | def parse_args():
  function compress_weights (line 38) | def compress_weights(W, l):
  function main (line 61) | def main():

FILE: tools/demo.py
  function vis_detections (line 40) | def vis_detections(im, class_name, dets, thresh=0.5):
  function demo (line 72) | def demo(net, image_name):
  function parse_args (line 100) | def parse_args():

FILE: tools/eval_recall.py
  function parse_args (line 10) | def parse_args():
  function recall_at (line 54) | def recall_at(t):

FILE: tools/reval.py
  function parse_args (line 20) | def parse_args():
  function from_dets (line 45) | def from_dets(imdb_name, output_dir, args):

FILE: tools/rpn_generate.py
  function parse_args (line 23) | def parse_args():

FILE: tools/test_net.py
  function parse_args (line 21) | def parse_args():

FILE: tools/train_faster_rcnn_alt_opt.py
  function parse_args (line 29) | def parse_args():
  function get_roidb (line 60) | def get_roidb(imdb_name, rpn_file=None):
  function get_solvers (line 70) | def get_solvers(net_name):
  function _init_caffe (line 93) | def _init_caffe(cfg):
  function train_rpn (line 105) | def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None,
  function rpn_generate (line 137) | def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None,
  function train_fast_rcnn (line 173) | def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=...

FILE: tools/train_net.py
  function parse_args (line 23) | def parse_args():
  function combined_roidb (line 60) | def combined_roidb(imdb_names):

FILE: tools/train_svms.py
  class SVMTrainer (line 29) | class SVMTrainer(object):
    method __init__ (line 35) | def __init__(self, net, imdb):
    method _get_feature_scale (line 49) | def _get_feature_scale(self, num_images=100):
    method _get_pos_counts (line 72) | def _get_pos_counts(self):
    method get_pos_examples (line 86) | def get_pos_examples(self):
    method initialize_net (line 114) | def initialize_net(self):
    method update_net (line 131) | def update_net(self, cls_ind, w, b):
    method train_with_hard_negatives (line 135) | def train_with_hard_negatives(self):
    method train (line 164) | def train(self):
  class SVMClassTrainer (line 192) | class SVMClassTrainer(object):
    method __init__ (line 195) | def __init__(self, cls, dim, feature_scale=1.0,
    method alloc_pos (line 215) | def alloc_pos(self, count):
    method append_pos (line 219) | def append_pos(self, feat):
    method train (line 224) | def train(self):
    method append_neg_and_retrain (line 259) | def append_neg_and_retrain(self, feat=None, force=False):
  function parse_args (line 282) | def parse_args():
Condensed preview — 217 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,940K chars).
[
  {
    "path": ".gitignore",
    "chars": 97,
    "preview": "*.pyc\n.ipynb_checkpoints\nlib/build\nlib/pycocotools/_mask.c\nlib/pycocotools/_mask.so\noutput\n.idea\n"
  },
  {
    "path": "LICENSE",
    "chars": 1079,
    "preview": "The MIT License (MIT)\n\nCopyright (c) 2017 Chen-Wei Xie\n\nPermission is hereby granted, free of charge, to any person obta"
  },
  {
    "path": "README.md",
    "chars": 3338,
    "preview": "# Faster-RCNN-ResNet\n\nThis code extends [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by adding ResNet "
  },
  {
    "path": "data/.gitignore",
    "chars": 70,
    "preview": "selective_search*\nimagenet_models*\nfast_rcnn_models*\nVOCdevkit*\ncache\n"
  },
  {
    "path": "data/README.md",
    "chars": 2500,
    "preview": "This directory holds (*after you download them*):\n- Caffe models pre-trained on ImageNet\n- Faster R-CNN models\n- Symlink"
  },
  {
    "path": "data/pylintrc",
    "chars": 56,
    "preview": "[TYPECHECK]\n\nignored-modules = numpy, numpy.random, cv2\n"
  },
  {
    "path": "data/scripts/fetch_faster_rcnn_models.sh",
    "chars": 817,
    "preview": "#!/bin/bash\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )/../\" && pwd )\"\ncd $DIR\n\nFILE=faster_rcnn_models.tgz\nURL=http:/"
  },
  {
    "path": "data/scripts/fetch_imagenet_models.sh",
    "chars": 814,
    "preview": "#!/bin/bash\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )/../\" && pwd )\"\ncd $DIR\n\nFILE=imagenet_models.tgz\nURL=http://ww"
  },
  {
    "path": "data/scripts/fetch_selective_search_data.sh",
    "chars": 828,
    "preview": "#!/bin/bash\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )/../\" && pwd )\"\ncd $DIR\n\nFILE=selective_search_data.tgz\nURL=htt"
  },
  {
    "path": "experiments/README.md",
    "chars": 185,
    "preview": "Scripts are under `experiments/scripts`.\n\nEach script saves a log file under `experiments/logs`.\n\nConfiguration override"
  },
  {
    "path": "experiments/cfgs/faster_rcnn_alt_opt.yml",
    "chars": 78,
    "preview": "EXP_DIR: faster_rcnn_alt_opt\nTRAIN:\n  BG_THRESH_LO: 0.0\nTEST:\n  HAS_RPN: True\n"
  },
  {
    "path": "experiments/cfgs/faster_rcnn_end2end.yml",
    "chars": 227,
    "preview": "EXP_DIR: faster_rcnn_end2end\nTRAIN:\n  HAS_RPN: True\n  IMS_PER_BATCH: 1\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n  RPN_"
  },
  {
    "path": "experiments/cfgs/faster_rcnn_end2end_ohem.yml",
    "chars": 317,
    "preview": "EXP_DIR: faster_rcnn_end2end_ohem\nTRAIN:\n  HAS_RPN: True\n  IMS_PER_BATCH: 1\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n "
  },
  {
    "path": "experiments/logs/.gitignore",
    "chars": 7,
    "preview": "*.txt*\n"
  },
  {
    "path": "experiments/scripts/fast_rcnn.sh",
    "chars": 1448,
    "preview": "#!/bin/bash\n# Usage:\n# ./experiments/scripts/fast_rcnn.sh GPU NET DATASET [options args to {train,test}_net.py]\n# DATASE"
  },
  {
    "path": "experiments/scripts/faster_rcnn_alt_opt.sh",
    "chars": 1509,
    "preview": "#!/bin/bash\n# Usage:\n# ./experiments/scripts/faster_rcnn_alt_opt.sh GPU NET DATASET [options args to {train,test}_net.py"
  },
  {
    "path": "experiments/scripts/faster_rcnn_end2end.sh",
    "chars": 1781,
    "preview": "#!/bin/bash\n# Usage:\n# ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py"
  },
  {
    "path": "experiments/scripts/test_resnet101_bn_scale_merged_0712_end2end.sh",
    "chars": 317,
    "preview": "./tools/test_net.py --gpu 1 \\\n  --def models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt \\\n  "
  },
  {
    "path": "experiments/scripts/test_resnet101_bn_scale_merged_0712_end2end_ohem.sh",
    "chars": 328,
    "preview": "./tools/test_net.py --gpu 1 \\\n  --def models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt \\\n  "
  },
  {
    "path": "experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end.sh",
    "chars": 297,
    "preview": "./tools/train_net.py --gpu 1 \\\n  --solver models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/solver.prototx"
  },
  {
    "path": "experiments/scripts/train_resnet101_bn_scale_merged_0712_end2end_ohem.sh",
    "chars": 312,
    "preview": "./tools/train_net.py --gpu 1 \\\n  --solver models/pascal_voc/ResNet101_BN_SCALE_Merged_OHEM/faster_rcnn_end2end_ohem/solv"
  },
  {
    "path": "lib/Makefile",
    "chars": 56,
    "preview": "all:\n\tpython setup.py build_ext --inplace\n\trm -rf build\n"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m",
    "chars": 231,
    "preview": "function VOCopts = get_voc_opts(path)\n\ntmp = pwd;\ncd(path);\ntry\n  addpath('VOCcode');\n  VOCinit;\ncatch\n  rmpath('VOCcode"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m",
    "chars": 1332,
    "preview": "function res = voc_eval(path, comp_id, test_set, output_dir)\n\nVOCopts = get_voc_opts(path);\nVOCopts.testset = test_set;\n"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m",
    "chars": 258,
    "preview": "function ap = xVOCap(rec,prec)\r\n% From the PASCAL VOC 2011 devkit\r\n\r\nmrec=[0 ; rec ; 1];\r\nmpre=[0 ; prec ; 0];\r\nfor i=nu"
  },
  {
    "path": "lib/datasets/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/coco.py",
    "chars": 16560,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/datasets/ds_utils.py",
    "chars": 1336,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/datasets/factory.py",
    "chars": 1411,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/imdb.py",
    "chars": 9811,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/pascal_voc.py",
    "chars": 14217,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/tools/mcg_munge.py",
    "chars": 1451,
    "preview": "import os\nimport sys\n\n\"\"\"Hacky tool to convert file system layout of MCG boxes downloaded from\nhttp://www.eecs.berkeley."
  },
  {
    "path": "lib/datasets/voc_eval.py",
    "chars": 6938,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/fast_rcnn/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/fast_rcnn/bbox_transform.py",
    "chars": 2540,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/fast_rcnn/config.py",
    "chars": 9362,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Licensed under The MIT License [see LICENSE fo"
  },
  {
    "path": "lib/fast_rcnn/nms_wrapper.py",
    "chars": 642,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/fast_rcnn/test.py",
    "chars": 11120,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/fast_rcnn/train.py",
    "chars": 6076,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/nms/.gitignore",
    "chars": 15,
    "preview": "*.c\n*.cpp\n*.so\n"
  },
  {
    "path": "lib/nms/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/nms/cpu_nms.pyx",
    "chars": 2241,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/nms/gpu_nms.hpp",
    "chars": 146,
    "preview": "void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,\n          int boxes_dim, float nms_overla"
  },
  {
    "path": "lib/nms/gpu_nms.pyx",
    "chars": 1110,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/nms/nms_kernel.cu",
    "chars": 5064,
    "preview": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n//"
  },
  {
    "path": "lib/nms/py_cpu_nms.py",
    "chars": 1051,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/pycocotools/UPSTREAM_REV",
    "chars": 80,
    "preview": "https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574\n"
  },
  {
    "path": "lib/pycocotools/__init__.py",
    "chars": 21,
    "preview": "__author__ = 'tylin'\n"
  },
  {
    "path": "lib/pycocotools/_mask.pyx",
    "chars": 10709,
    "preview": "# distutils: language = c\n# distutils: sources = ../MatlabAPI/private/maskApi.c\n\n#**************************************"
  },
  {
    "path": "lib/pycocotools/coco.py",
    "chars": 14881,
    "preview": "__author__ = 'tylin'\n__version__ = '1.0.1'\n# Interface for accessing the Microsoft COCO dataset.\n\n# Microsoft COCO is a "
  },
  {
    "path": "lib/pycocotools/cocoeval.py",
    "chars": 19735,
    "preview": "__author__ = 'tsungyi'\n\nimport numpy as np\nimport datetime\nimport time\nfrom collections import defaultdict\nimport mask\ni"
  },
  {
    "path": "lib/pycocotools/license.txt",
    "chars": 1533,
    "preview": "Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin\nAll rights reserved.\n\nRedistribution and use in source and binary form"
  },
  {
    "path": "lib/pycocotools/mask.py",
    "chars": 4058,
    "preview": "__author__ = 'tsungyi'\n\nimport pycocotools._mask as _mask\n\n# Interface for manipulating masks stored in RLE format.\n#\n# "
  },
  {
    "path": "lib/pycocotools/maskApi.c",
    "chars": 7704,
    "preview": "/**************************************************************************\n* Microsoft COCO Toolbox.      version 2.0\n*"
  },
  {
    "path": "lib/pycocotools/maskApi.h",
    "chars": 1928,
    "preview": "/**************************************************************************\n* Microsoft COCO Toolbox.      version 2.0\n*"
  },
  {
    "path": "lib/roi_data_layer/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/roi_data_layer/layer.py",
    "chars": 12133,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN with OHEM\n# Licensed under The MIT License [see "
  },
  {
    "path": "lib/roi_data_layer/minibatch.py",
    "chars": 13966,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN with OHEM\n# Licensed under The MIT License [see "
  },
  {
    "path": "lib/roi_data_layer/roidb.py",
    "chars": 5611,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/rpn/README.md",
    "chars": 780,
    "preview": "### `rpn` module overview\n\n##### `generate_anchors.py`\n\nGenerates a regular grid of multi-scale, multi-aspect anchor box"
  },
  {
    "path": "lib/rpn/__init__.py",
    "chars": 262,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/rpn/anchor_target_layer.py",
    "chars": 11344,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/rpn/generate.py",
    "chars": 3894,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/rpn/generate_anchors.py",
    "chars": 3110,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/rpn/proposal_layer.py",
    "chars": 6803,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/rpn/proposal_target_layer.py",
    "chars": 7531,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "lib/setup.py",
    "chars": 5665,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/transform/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/transform/torch_image_transform_layer.py",
    "chars": 2000,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/utils/.gitignore",
    "chars": 9,
    "preview": "*.c\n*.so\n"
  },
  {
    "path": "lib/utils/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/utils/bbox.pyx",
    "chars": 1756,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/utils/blob.py",
    "chars": 1625,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/utils/timer.py",
    "chars": 948,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "models/README.md",
    "chars": 2500,
    "preview": "## Model Zoo\n\n### COCO Faster R-CNN VGG-16 trained using end-to-end\n\nModel URL: www.cs.berkeley.edu/~rbg/faster-rcnn-dat"
  },
  {
    "path": "models/coco/VGG16/fast_rcnn/solver.prototxt",
    "chars": 395,
    "preview": "train_net: \"models/coco/VGG16/fast_rcnn/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstepsize: 200000\ndis"
  },
  {
    "path": "models/coco/VGG16/fast_rcnn/test.prototxt",
    "chars": 6774,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"rois\"\ninput"
  },
  {
    "path": "models/coco/VGG16/fast_rcnn/train.prototxt",
    "chars": 6625,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: "
  },
  {
    "path": "models/coco/VGG16/faster_rcnn_end2end/solver.prototxt",
    "chars": 387,
    "preview": "train_net: \"models/coco/VGG16/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstepsize: "
  },
  {
    "path": "models/coco/VGG16/faster_rcnn_end2end/test.prototxt",
    "chars": 8754,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\nin"
  },
  {
    "path": "models/coco/VGG16/faster_rcnn_end2end/train.prototxt",
    "chars": 9840,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_box"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/fast_rcnn/solver.prototxt",
    "chars": 398,
    "preview": "train_net: \"models/coco/VGG_CNN_M_1024/fast_rcnn/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstepsize: 2"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/fast_rcnn/test.prototxt",
    "chars": 4037,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"rois\"\ninput_shape {"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/fast_rcnn/train.prototxt",
    "chars": 4051,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt",
    "chars": 392,
    "preview": "train_net: \"models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\ns"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt",
    "chars": 6973,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"im_info\"\ninput_shap"
  },
  {
    "path": "models/coco/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt",
    "chars": 7282,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  "
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 56446,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 48289,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 430,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nl"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 56367,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  t"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 418,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_poli"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 56279,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'g"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 430,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nl"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 56367,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  t"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 418,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_poli"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 56283,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'g"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_end2end/solver.prototxt",
    "chars": 467,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nl"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt",
    "chars": 58005,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'g"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged/faster_rcnn_end2end/train.prototxt",
    "chars": 58251,
    "preview": "name: \"ResNet50_BN_SCALE_Merged\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged_OHEM/faster_rcnn_end2end/solver.prototxt",
    "chars": 480,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged_OHEM/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0."
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged_OHEM/faster_rcnn_end2end/test.prototxt",
    "chars": 70134,
    "preview": "name: \"ResNet-50\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_BN_SCALE_Merged_OHEM/faster_rcnn_end2end/train.prototxt",
    "chars": 70535,
    "preview": "name: \"ResNet-50\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 60054,
    "preview": "name: \"ResNet50\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 48272,
    "preview": "name: \"ResNet50\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 398,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\""
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 59973,
    "preview": "name: \"ResNet50\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_target"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 386,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 59886,
    "preview": "name: \"ResNet50\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 398,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\""
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 59973,
    "preview": "name: \"ResNet50\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_target"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 386,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma"
  },
  {
    "path": "models/pascal_voc/ResNet-50/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 59886,
    "preview": "name: \"ResNet50\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/rfcn_test.pt",
    "chars": 60191,
    "preview": "name: \"ResNet-50\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"rois\"\ninput_shape {\n  "
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/rpn_test.pt",
    "chars": 48249,
    "preview": "name: \"ZF\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n  dim:"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_solver80k120k.pt",
    "chars": 413,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"s"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_train.pt",
    "chars": 62075,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targe"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rpn_solver60k80k.pt",
    "chars": 389,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ng"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage1_rpn_train.pt",
    "chars": 48658,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rfcn_ohem_solver80k120k.pt",
    "chars": 413,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rfcn_ohem_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"s"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rfcn_ohem_train.pt",
    "chars": 62075,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targe"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rpn_solver60k80k.pt",
    "chars": 389,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ng"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage2_rpn_train.pt",
    "chars": 48658,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage3_rpn_solver60k80k.pt",
    "chars": 389,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage3_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ng"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_alt_opt_5step_ohem/stage3_rpn_train.pt",
    "chars": 48658,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/class-aware/test.prototxt",
    "chars": 62100,
    "preview": "name: \"ResNet50\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/class-aware/train_ohem.prototxt",
    "chars": 65353,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/solver.prototxt",
    "chars": 396,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_end2end/train_agnostic.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0."
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/solver_ohem.prototxt",
    "chars": 406,
    "preview": "train_net: \"models/pascal_voc/ResNet-50/rfcn_end2end/train_agnostic_ohem.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamm"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt",
    "chars": 62093,
    "preview": "name: \"ResNet50\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/train_agnostic.prototxt",
    "chars": 63907,
    "preview": "name: \"ResNet-50\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet-50/rfcn_end2end/train_agnostic_ohem.prototxt",
    "chars": 65350,
    "preview": "name: \"ResNet-50\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pytho"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 116773,
    "preview": "name: \"ResNet101\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 104988,
    "preview": "name: \"ResNet101\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 398,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\""
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 116691,
    "preview": "name: \"ResNet101\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targe"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 386,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 116610,
    "preview": "name: \"ResNet101\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 398,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\""
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 116691,
    "preview": "name: \"ResNet101\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targe"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 386,
    "preview": "train_net: \"models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma"
  },
  {
    "path": "models/pascal_voc/ResNet101/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 116610,
    "preview": "name: \"ResNet101\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 78739,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_inf"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 70580,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_inf"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 430,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nl"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 78660,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  "
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 418,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_poli"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 78578,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: '"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 430,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nl"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 78660,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  "
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 418,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_poli"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 78582,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: '"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/solver.prototxt",
    "chars": 455,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"s"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/test.prototxt",
    "chars": 78739,
    "preview": "name: \"ResNet101_BN_SCALE_Merged\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_inf"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged/faster_rcnn_end2end/train.prototxt",
    "chars": 80528,
    "preview": "name: \"ResNet101\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged_OHEM/faster_rcnn_end2end_ohem/solver.prototxt",
    "chars": 473,
    "preview": "train_net: \"models/pascal_voc/ResNet101_BN_SCALE_Merged_OHEM/faster_rcnn_end2end_ohem/train.prototxt\"\nbase_lr: 0.001\nlr_"
  },
  {
    "path": "models/pascal_voc/ResNet101_BN_SCALE_Merged_OHEM/faster_rcnn_end2end_ohem/train.prototxt",
    "chars": 92754,
    "preview": "name: \"ResNet101\"\n\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  pyth"
  },
  {
    "path": "models/pascal_voc/VGG16/fast_rcnn/solver.prototxt",
    "chars": 400,
    "preview": "train_net: \"models/pascal_voc/VGG16/fast_rcnn/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstepsize: 3000"
  },
  {
    "path": "models/pascal_voc/VGG16/fast_rcnn/test.prototxt",
    "chars": 7009,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"rois\"\ninput"
  },
  {
    "path": "models/pascal_voc/VGG16/fast_rcnn/train.prototxt",
    "chars": 6860,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: "
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 6263,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\nin"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 5305,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\nin"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 390,
    "preview": "train_net: \"models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngam"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 8241,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: "
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 378,
    "preview": "train_net: \"models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0."
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 8062,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_box"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 390,
    "preview": "train_net: \"models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngam"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 8337,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: "
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 378,
    "preview": "train_net: \"models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0."
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 8126,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_box"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_end2end/solver.prototxt",
    "chars": 407,
    "preview": "train_net: \"models/pascal_voc/VGG16/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstep"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt",
    "chars": 8945,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\nin"
  },
  {
    "path": "models/pascal_voc/VGG16/faster_rcnn_end2end/train.prototxt",
    "chars": 10209,
    "preview": "name: \"VGG_ILSVRC_16_layers\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_box"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/solver.prototxt",
    "chars": 403,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nsteps"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/test.prototxt",
    "chars": 4272,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"rois\"\ninput_shape {"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/fast_rcnn/train.prototxt",
    "chars": 4286,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 4343,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"im_info\"\ninput_shap"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 3387,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"im_info\"\ninput_shap"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 408,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \""
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 5687,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 396,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\n"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 5143,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  "
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 408,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \""
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 5799,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_solver60k80k.pt",
    "chars": 396,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\n"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_alt_opt/stage2_rpn_train.pt",
    "chars": 5255,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  "
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt",
    "chars": 397,
    "preview": "train_net: \"models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt\"\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma:"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt",
    "chars": 7165,
    "preview": "name: \"VGG_CNN_M_1024\"\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\ninput: \"im_info\"\ninput_shap"
  },
  {
    "path": "models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt",
    "chars": 7651,
    "preview": "name: \"VGG_CNN_M_1024\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  "
  },
  {
    "path": "models/pascal_voc/ZF/fast_rcnn/solver.prototxt",
    "chars": 395,
    "preview": "train_net: \"models/pascal_voc/ZF/fast_rcnn/train.prototxt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\nstepsize: 30000\n"
  },
  {
    "path": "models/pascal_voc/ZF/fast_rcnn/test.prototxt",
    "chars": 3372,
    "preview": "name: \"ZF\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"rois\"\ninput_shape {\n  dim: 1 "
  },
  {
    "path": "models/pascal_voc/ZF/fast_rcnn/train.prototxt",
    "chars": 4427,
    "preview": "name: \"ZF\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targets'\n  t"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt",
    "chars": 4739,
    "preview": "name: \"ZF\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n  dim:"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/rpn_test.pt",
    "chars": 3492,
    "preview": "name: \"ZF\"\n\ninput: \"data\"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 224\n  dim: 224\n}\n\ninput: \"im_info\"\ninput_shape {\n  dim:"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_solver30k40k.pt",
    "chars": 384,
    "preview": "train_net: \"models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma:"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt",
    "chars": 5874,
    "preview": "name: \"ZF\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targets'\n  t"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt",
    "chars": 372,
    "preview": "train_net: \"models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma: 0.1\ns"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage1_rpn_train.pt",
    "chars": 5326,
    "preview": "name: \"ZF\"\nlayer {\n  name: 'input-data'\n  type: 'Python'\n  top: 'data'\n  top: 'im_info'\n  top: 'gt_boxes'\n  python_param"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_solver30k40k.pt",
    "chars": 384,
    "preview": "train_net: \"models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt\"\n\nbase_lr: 0.001\nlr_policy: \"step\"\ngamma:"
  },
  {
    "path": "models/pascal_voc/ZF/faster_rcnn_alt_opt/stage2_fast_rcnn_train.pt",
    "chars": 6004,
    "preview": "name: \"ZF\"\nlayer {\n  name: 'data'\n  type: 'Python'\n  top: 'data'\n  top: 'rois'\n  top: 'labels'\n  top: 'bbox_targets'\n  t"
  }
]

// ... and 17 more files (download for full content)

About this extraction

This page contains the full source code of the Eniac-Xie/faster-rcnn-resnet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 217 files (3.4 MB), approximately 911.6k tokens, and a symbol index with 258 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!