Full Code of jwyang/faster-rcnn.pytorch for AI

master f9d984d27b48 cached
122 files
444.5 KB
126.7k tokens
355 symbols
1 requests
Download .txt
Showing preview only (476K chars total). Download the full file or copy to clipboard to get everything.
Repository: jwyang/faster-rcnn.pytorch
Branch: master
Commit: f9d984d27b48
Files: 122
Total size: 444.5 KB

Directory structure:
gitextract_3vzfjfkd/

├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs/
│   ├── res101.yml
│   ├── res101_ls.yml
│   ├── res50.yml
│   └── vgg16.yml
├── demo.py
├── lib/
│   ├── datasets/
│   │   ├── VOCdevkit-matlab-wrapper/
│   │   │   ├── get_voc_opts.m
│   │   │   ├── voc_eval.m
│   │   │   └── xVOCap.m
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── ds_utils.py
│   │   ├── factory.py
│   │   ├── imagenet.py
│   │   ├── imdb.py
│   │   ├── pascal_voc.py
│   │   ├── pascal_voc_rbg.py
│   │   ├── tools/
│   │   │   └── mcg_munge.py
│   │   ├── vg.py
│   │   ├── vg_eval.py
│   │   └── voc_eval.py
│   ├── make.sh
│   ├── model/
│   │   ├── __init__.py
│   │   ├── faster_rcnn/
│   │   │   ├── __init__.py
│   │   │   ├── faster_rcnn.py
│   │   │   ├── resnet.py
│   │   │   └── vgg16.py
│   │   ├── nms/
│   │   │   ├── .gitignore
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── nms/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── make.sh
│   │   │   ├── nms_cpu.py
│   │   │   ├── nms_gpu.py
│   │   │   ├── nms_kernel.cu
│   │   │   ├── nms_wrapper.py
│   │   │   └── src/
│   │   │       ├── nms_cuda.h
│   │   │       ├── nms_cuda_kernel.cu
│   │   │       └── nms_cuda_kernel.h
│   │   ├── roi_align/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align.py
│   │   │   ├── make.sh
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align.py
│   │   │   └── src/
│   │   │       ├── roi_align.c
│   │   │       ├── roi_align.h
│   │   │       ├── roi_align_cuda.c
│   │   │       ├── roi_align_cuda.h
│   │   │       ├── roi_align_kernel.cu
│   │   │       └── roi_align_kernel.h
│   │   ├── roi_crop/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── crop_resize/
│   │   │   │   │   └── __init__.py
│   │   │   │   └── roi_crop/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── crop_resize.py
│   │   │   │   ├── gridgen.py
│   │   │   │   └── roi_crop.py
│   │   │   ├── make.sh
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gridgen.py
│   │   │   │   └── roi_crop.py
│   │   │   └── src/
│   │   │       ├── roi_crop.c
│   │   │       ├── roi_crop.h
│   │   │       ├── roi_crop_cuda.c
│   │   │       ├── roi_crop_cuda.h
│   │   │       ├── roi_crop_cuda_kernel.cu
│   │   │       └── roi_crop_cuda_kernel.h
│   │   ├── roi_pooling/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pooling/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pool.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pool.py
│   │   │   └── src/
│   │   │       ├── roi_pooling.c
│   │   │       ├── roi_pooling.h
│   │   │       ├── roi_pooling_cuda.c
│   │   │       ├── roi_pooling_cuda.h
│   │   │       ├── roi_pooling_kernel.cu
│   │   │       └── roi_pooling_kernel.h
│   │   ├── rpn/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_target_layer.py
│   │   │   ├── bbox_transform.py
│   │   │   ├── generate_anchors.py
│   │   │   ├── proposal_layer.py
│   │   │   ├── proposal_target_layer_cascade.py
│   │   │   └── rpn.py
│   │   └── utils/
│   │       ├── .gitignore
│   │       ├── __init__.py
│   │       ├── bbox.pyx
│   │       ├── blob.py
│   │       ├── config.py
│   │       ├── logger.py
│   │       └── net_utils.py
│   ├── pycocotools/
│   │   ├── UPSTREAM_REV
│   │   ├── __init__.py
│   │   ├── _mask.pyx
│   │   ├── coco.py
│   │   ├── cocoeval.py
│   │   ├── license.txt
│   │   ├── mask.py
│   │   ├── maskApi.c
│   │   └── maskApi.h
│   ├── roi_data_layer/
│   │   ├── __init__.py
│   │   ├── minibatch.py
│   │   ├── roibatchLoader.py
│   │   └── roidb.py
│   └── setup.py
├── requirements.txt
├── test_net.py
└── trainval_net.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
data/*

# READ THIS BEFORE YOU REFACTOR ME
#
# setup.py uses the list of patterns in this file to decide
# what to delete, but it's not 100% sound.  So, for example,
# if you delete aten/build/ because it's redundant with build/,
# aten/build/ will stop being cleaned.  So be careful when
# refactoring this file!

## PyTorch

.mypy_cache
*.pyc
*/*.pyc
*/*.so*
*/**/__pycache__
*/**/*.dylib*
*/**/*.pyc
*/**/*.pyd
*/**/*.so*
*/**/**/*.pyc
*/**/**/**/*.pyc
*/**/**/**/**/*.pyc
aten/build/
aten/src/ATen/Config.h
aten/src/ATen/cuda/CUDAConfig.h
build/
dist/
docs/src/**/*
test/.coverage
test/cpp/api/mnist
test/data/gpu_tensors.pt
test/data/legacy_modules.t7
test/data/legacy_serialized.pt
test/data/linear.pt
test/htmlcov
third_party/build/
tools/shared/_utils_internal.py
torch.egg-info/
torch/csrc/autograd/generated/*
torch/csrc/cudnn/cuDNN.cpp
torch/csrc/generated
torch/csrc/generic/TensorMethods.cpp
torch/csrc/jit/generated/*
torch/csrc/nn/THCUNN.cpp
torch/csrc/nn/THCUNN.cwrap
torch/csrc/nn/THNN_generic.cpp
torch/csrc/nn/THNN_generic.cwrap
torch/csrc/nn/THNN_generic.h
torch/csrc/nn/THNN.cpp
torch/csrc/nn/THNN.cwrap
torch/lib/*.a*
torch/lib/*.dll*
torch/lib/*.dylib*
torch/lib/*.h
torch/lib/*.lib
torch/lib/*.so*
torch/lib/build
torch/lib/cmake
torch/lib/include
torch/lib/pkgconfig
torch/lib/protoc
torch/lib/tmp_install
torch/lib/torch_shm_manager
torch/version.py

# IPython notebook checkpoints
.ipynb_checkpoints

# Editor temporaries
*.swn
*.swo
*.swp
*.swm
*~

# macOS dir files
.DS_Store

# Symbolic files
tools/shared/cwrap_common.py

# Ninja files
.ninja_deps
.ninja_log
compile_commands.json
*.egg-info/
docs/source/scripts/activation_images/

## General

# Compiled Object files
*.slo
*.lo
*.o
*.cuo
*.obj

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Compiled protocol buffers
*.pb.h
*.pb.cc
*_pb2.py

# Compiled python
*.pyc
*.pyd

# Compiled MATLAB
*.mex*

# IPython notebook checkpoints
.ipynb_checkpoints

# Editor temporaries
*.swn
*.swo
*.swp
*~

# Sublime Text settings
*.sublime-workspace
*.sublime-project

# Eclipse Project settings
*.*project
.settings

# QtCreator files
*.user

# PyCharm files
.idea

# Visual Studio Code files
.vscode
.vs

# OSX dir files
.DS_Store

## Caffe2

# build, distribute, and bins (+ python proto bindings)
build
build_host_protoc
build_android
build_ios
/build_*
.build_debug/*
.build_release/*
distribute/*
*.testbin
*.bin
cmake_build
.cmake_build
gen
.setuptools-cmake-build
.pytest_cache
aten/build/*

# Bram
plsdontbreak

# Generated documentation
docs/_site
docs/gathered
_site
doxygen
docs/dev

# LevelDB files
*.sst
*.ldb
LOCK
LOG*
CURRENT
MANIFEST-*

# generated version file
caffe2/version.py

# setup.py intermediates
.eggs
caffe2.egg-info

# Atom/Watchman required file
.watchmanconfig

# cython generated files
lib/model/utils/bbox.c
lib/pycocotools/_mask.c

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2017 Jianwei Yang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# A *Faster* Pytorch Implementation of Faster R-CNN

## Write at the beginning

[05/29/2020] This repo was initaited about two years ago, developed as the first open-sourced object detection code which supports multi-gpu training. It has been integrating tremendous efforts from many people. However, we have seen many high-quality repos emerged in the last years, such as:

* [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark)
* [detectron2](https://github.com/facebookresearch/detectron2)
* [mmdetection](https://github.com/open-mmlab/mmdetection)

**At this point, I think this repo is out-of-data in terms of the pipeline and coding style, and will not maintain actively. Though you can still use this repo as a playground, I highly recommend you move to the above repos to delve into west world of object detection!**

## Introduction

### :boom: Good news! This repo supports pytorch-1.0 now!!! We borrowed some code and techniques from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). Just go to pytorch-1.0 branch!

This project is a *faster* pytorch implementation of faster R-CNN, aimed to accelerating the training of faster R-CNN object detection models. Recently, there are a number of good implementations:

* [rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn), developed based on Pycaffe + Numpy

* [longcw/faster_rcnn_pytorch](https://github.com/longcw/faster_rcnn_pytorch), developed based on Pytorch + Numpy

* [endernewton/tf-faster-rcnn](https://github.com/endernewton/tf-faster-rcnn), developed based on TensorFlow + Numpy

* [ruotianluo/pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn), developed based on Pytorch + TensorFlow + Numpy

During our implementing, we referred the above implementations, especailly [longcw/faster_rcnn_pytorch](https://github.com/longcw/faster_rcnn_pytorch). However, our implementation has several unique and new features compared with the above implementations:

* **It is pure Pytorch code**. We convert all the numpy implementations to pytorch!

* **It supports multi-image batch training**. We revise all the layers, including dataloader, rpn, roi-pooling, etc., to support multiple images in each minibatch.

* **It supports multiple GPUs training**. We use a multiple GPU wrapper (nn.DataParallel here) to make it flexible to use one or more GPUs, as a merit of the above two features.

* **It supports three pooling methods**. We integrate three pooling methods: roi pooing, roi align and roi crop. More importantly, we modify all of them to support multi-image batch training.

* **It is memory efficient**. We limit the image aspect ratio, and group images with similar aspect ratios into a minibatch. As such, we can train resnet101 and VGG16 with batchsize = 4 (4 images) on a single Titan X (12 GB). When training with 8 GPU, the maximum batchsize for each GPU is 3 (Res101), totaling 24.

* **It is faster**. Based on the above modifications, the training is much faster. We report the training speed on NVIDIA TITAN Xp in the tables below.

### What we are doing and going to do

- [x] Support both python2 and python3 (great thanks to [cclauss](https://github.com/cclauss)).
- [x] Add deformable pooling layer (mainly supported by [Xander](https://github.com/xanderchf)).
- [x] Support pytorch-0.4.0 (this branch).
- [x] Support tensorboardX.
- [x] Support pytorch-1.0 (go to pytorch-1.0 branch).

## Other Implementations

* [Feature Pyramid Network (FPN)](https://github.com/jwyang/fpn.pytorch)

* [Mask R-CNN](https://github.com/roytseng-tw/mask-rcnn.pytorch) (~~ongoing~~ already implemented by [roytseng-tw](https://github.com/roytseng-tw))

* [Graph R-CNN](https://github.com/jwyang/graph-rcnn.pytorch) (extension to scene graph generation)

## Tutorial

* [Blog](http://www.telesens.co/2018/03/11/object-detection-and-classification-using-r-cnns/) by [ankur6ue](https://github.com/ankur6ue)

## Benchmarking

We benchmark our code thoroughly on three datasets: pascal voc, coco and visual genome, using two different network architectures: vgg16 and resnet101. Below are the results:

1). PASCAL VOC 2007 (Train/Test: 07trainval/07test, scale=600, ROI Align)

model    | #GPUs | batch size | lr        | lr_decay | max_epoch     |  time/epoch | mem/GPU | mAP
---------|--------|-----|--------|-----|-----|-------|--------|-----
[VGG-16](https://www.dropbox.com/s/6ief4w7qzka6083/faster_rcnn_1_6_10021.pth?dl=0)     | 1 | 1 | 1e-3 | 5   | 6   |  0.76 hr | 3265MB   | 70.1
[VGG-16](https://www.dropbox.com/s/cpj2nu35am0f9hp/faster_rcnn_1_9_2504.pth?dl=0)     | 1 | 4 | 4e-3 | 8   | 9  |  0.50 hr | 9083MB   | 69.6
[VGG-16](https://www.dropbox.com/s/1a31y7vicby0kvy/faster_rcnn_1_10_625.pth?dl=0)     | 8 | 16| 1e-2 | 8   | 10  |  0.19 hr | 5291MB   | 69.4
[VGG-16](https://www.dropbox.com/s/hkj7i6mbhw9tq4k/faster_rcnn_1_11_416.pth?dl=0)     | 8 | 24| 1e-2 | 10  | 11  |  0.16 hr | 11303MB  | 69.2
[Res-101](https://www.dropbox.com/s/4v3or0054kzl19q/faster_rcnn_1_7_10021.pth?dl=0)   | 1 | 1 | 1e-3 | 5   | 7   |  0.88 hr | 3200 MB  | 75.2
[Res-101](https://www.dropbox.com/s/8bhldrds3mf0yuj/faster_rcnn_1_10_2504.pth?dl=0)    | 1 | 4 | 4e-3 | 8   | 10  |  0.60 hr | 9700 MB  | 74.9
[Res-101](https://www.dropbox.com/s/5is50y01m1l9hbu/faster_rcnn_1_10_625.pth?dl=0)    | 8 | 16| 1e-2 | 8   | 10  |  0.23 hr | 8400 MB  | 75.2 
[Res-101](https://www.dropbox.com/s/cn8gneumg4gjo9i/faster_rcnn_1_12_416.pth?dl=0)    | 8 | 24| 1e-2 | 10  | 12  |  0.17 hr | 10327MB  | 75.1  


2). COCO (Train/Test: coco_train+coco_val-minival/minival, scale=800, max_size=1200, ROI Align)

model     | #GPUs | batch size |lr        | lr_decay | max_epoch     |  time/epoch | mem/GPU | mAP
---------|--------|-----|--------|-----|-----|-------|--------|-----
VGG-16     | 8 | 16    |1e-2| 4   | 6  |  4.9 hr | 7192 MB  | 29.2
[Res-101](https://www.dropbox.com/s/5if6l7mqsi4rfk9/faster_rcnn_1_6_14657.pth?dl=0)    | 8 | 16    |1e-2| 4   | 6  |  6.0 hr    |10956 MB  | 36.2
[Res-101](https://www.dropbox.com/s/be0isevd22eikqb/faster_rcnn_1_10_14657.pth?dl=0)    | 8 | 16    |1e-2| 4   | 10  |  6.0 hr    |10956 MB  | 37.0

**NOTE**. Since the above models use scale=800, you need add "--ls" at the end of test command.

3). COCO (Train/Test: coco_train+coco_val-minival/minival, scale=600, max_size=1000, ROI Align)

model     | #GPUs | batch size |lr        | lr_decay | max_epoch     |  time/epoch | mem/GPU | mAP
---------|--------|-----|--------|-----|-----|-------|--------|-----
[Res-101](https://www.dropbox.com/s/y171ze1sdw1o2ph/faster_rcnn_1_6_9771.pth?dl=0)    | 8 | 24    |1e-2| 4   | 6  |  5.4 hr    |10659 MB  | 33.9
[Res-101](https://www.dropbox.com/s/dpq6qv0efspelr3/faster_rcnn_1_10_9771.pth?dl=0)    | 8 | 24    |1e-2| 4   | 10  |  5.4 hr    |10659 MB  | 34.5

4). Visual Genome (Train/Test: vg_train/vg_test, scale=600, max_size=1000, ROI Align, category=2500)

model     | #GPUs | batch size |lr        | lr_decay | max_epoch     |  time/epoch | mem/GPU | mAP
---------|--------|-----|--------|-----|-----|-------|--------|-----
[VGG-16](http://data.lip6.fr/cadene/faster-rcnn.pytorch/faster_rcnn_1_19_48611.pth)    | 1 P100 | 4    |1e-3| 5   | 20  |  3.7 hr    |12707 MB  | 4.4

Thanks to [Remi](https://github.com/Cadene) for providing the pretrained detection model on visual genome!

* Click the links in the above tables to download our pre-trained faster r-cnn models.
* If not mentioned, the GPU we used is NVIDIA Titan X Pascal (12GB).

## Preparation


First of all, clone the code
```
git clone https://github.com/jwyang/faster-rcnn.pytorch.git
```

Then, create a folder:
```
cd faster-rcnn.pytorch && mkdir data
```

### prerequisites

* Python 2.7 or 3.6
* Pytorch 0.4.0 (**now it does not support 0.4.1 or higher**)
* CUDA 8.0 or higher

### Data Preparation

* **PASCAL_VOC 07+12**: Please follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare VOC datasets. Actually, you can refer to any others. After downloading the data, create softlinks in the folder data/.

* **COCO**: Please also follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare the data.

* **Visual Genome**: Please follow the instructions in [bottom-up-attention](https://github.com/peteanderson80/bottom-up-attention) to prepare Visual Genome dataset. You need to download the images and object annotation files first, and then perform proprecessing to obtain the vocabulary and cleansed annotations based on the scripts provided in this repository.

### Pretrained Model

We used two pretrained models in our experiments, VGG and ResNet101. You can download these two models from:

* VGG16: [Dropbox](https://www.dropbox.com/s/s3brpk0bdq60nyb/vgg16_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/vgg16_caffe.pth)

* ResNet101: [Dropbox](https://www.dropbox.com/s/iev3tkbz5wyyuz9/resnet101_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth)

Download them and put them into the data/pretrained_model/.

**NOTE**. We compare the pretrained models from Pytorch and Caffe, and surprisingly find Caffe pretrained models have slightly better performance than Pytorch pretrained. We would suggest to use Caffe pretrained models from the above link to reproduce our results.

**If you want to use pytorch pre-trained models, please remember to transpose images from BGR to RGB, and also use the same data transformer (minus mean and normalize) as used in pretrained model.**

### Compilation

As pointed out by [ruotianluo/pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn), choose the right `-arch` in `make.sh` file, to compile the cuda code:

  | GPU model  | Architecture |
  | ------------- | ------------- |
  | TitanX (Maxwell/Pascal) | sm_52 |
  | GTX 960M | sm_50 |
  | GTX 1080 (Ti) | sm_61 |
  | Grid K520 (AWS g2.2xlarge) | sm_30 |
  | Tesla K80 (AWS p2.xlarge) | sm_37 |

More details about setting the architecture can be found [here](https://developer.nvidia.com/cuda-gpus) or [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)

Install all the python dependencies using pip:
```
pip install -r requirements.txt
```

Compile the cuda dependencies using following simple commands:

```
cd lib
sh make.sh
```

It will compile all the modules you need, including NMS, ROI_Pooing, ROI_Align and ROI_Crop. The default version is compiled with Python 2.7, please compile by yourself if you are using a different python version.

**As pointed out in this [issue](https://github.com/jwyang/faster-rcnn.pytorch/issues/16), if you encounter some error during the compilation, you might miss to export the CUDA paths to your environment.**

## Train

Before training, set the right directory to save and load the trained models. Change the arguments "save_dir" and "load_dir" in trainval_net.py and test_net.py to adapt to your environment.

To train a faster R-CNN model with vgg16 on pascal_voc, simply run:
```
CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net.py \
                   --dataset pascal_voc --net vgg16 \
                   --bs $BATCH_SIZE --nw $WORKER_NUMBER \
                   --lr $LEARNING_RATE --lr_decay_step $DECAY_STEP \
                   --cuda
```
where 'bs' is the batch size with default 1. Alternatively, to train with resnet101 on pascal_voc, simple run:
```
 CUDA_VISIBLE_DEVICES=$GPU_ID python trainval_net.py \
                    --dataset pascal_voc --net res101 \
                    --bs $BATCH_SIZE --nw $WORKER_NUMBER \
                    --lr $LEARNING_RATE --lr_decay_step $DECAY_STEP \
                    --cuda
```
Above, BATCH_SIZE and WORKER_NUMBER can be set adaptively according to your GPU memory size. **On Titan Xp with 12G memory, it can be up to 4**.

If you have multiple (say 8) Titan Xp GPUs, then just use them all! Try:
```
python trainval_net.py --dataset pascal_voc --net vgg16 \
                       --bs 24 --nw 8 \
                       --lr $LEARNING_RATE --lr_decay_step $DECAY_STEP \
                       --cuda --mGPUs

```

Change dataset to "coco" or 'vg' if you want to train on COCO or Visual Genome.

## Test

If you want to evaluate the detection performance of a pre-trained vgg16 model on pascal_voc test set, simply run
```
python test_net.py --dataset pascal_voc --net vgg16 \
                   --checksession $SESSION --checkepoch $EPOCH --checkpoint $CHECKPOINT \
                   --cuda
```
Specify the specific model session, checkepoch and checkpoint, e.g., SESSION=1, EPOCH=6, CHECKPOINT=416.

## Demo

If you want to run detection on your own images with a pre-trained model, download the pretrained model listed in above tables or train your own models at first, then add images to folder $ROOT/images, and then run
```
python demo.py --net vgg16 \
               --checksession $SESSION --checkepoch $EPOCH --checkpoint $CHECKPOINT \
               --cuda --load_dir path/to/model/directoy
```

Then you will find the detection results in folder $ROOT/images.

**Note the default demo.py merely support pascal_voc categories. You need to change the [line](https://github.com/jwyang/faster-rcnn.pytorch/blob/530f3fdccaa60d05fa068bc2148695211586bd88/demo.py#L156) to adapt your own model.**

Below are some detection results:

<div style="color:#0000FF" align="center">
<img src="images/img3_det_res101.jpg" width="430"/> <img src="images/img4_det_res101.jpg" width="430"/>
</div>

## Webcam Demo

You can use a webcam in a real-time demo by running
```
python demo.py --net vgg16 \
               --checksession $SESSION --checkepoch $EPOCH --checkpoint $CHECKPOINT \
               --cuda --load_dir path/to/model/directoy \
               --webcam $WEBCAM_ID
```
The demo is stopped by clicking the image window and then pressing the 'q' key.

## Authorship

This project is equally contributed by [Jianwei Yang](https://github.com/jwyang) and [Jiasen Lu](https://github.com/jiasenlu), and many others (thanks to them!).

## Citation

    @article{jjfaster2rcnn,
        Author = {Jianwei Yang and Jiasen Lu and Dhruv Batra and Devi Parikh},
        Title = {A Faster Pytorch Implementation of Faster R-CNN},
        Journal = {https://github.com/jwyang/faster-rcnn.pytorch},
        Year = {2017}
    }

    @inproceedings{renNIPS15fasterrcnn,
        Author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun},
        Title = {Faster {R-CNN}: Towards Real-Time Object Detection
                 with Region Proposal Networks},
        Booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
        Year = {2015}
    }


================================================
FILE: _init_paths.py
================================================
import os.path as osp
import sys

def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)

this_dir = osp.dirname(__file__)

# Add lib to PYTHONPATH
lib_path = osp.join(this_dir, 'lib')
add_path(lib_path)

coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
add_path(coco_path)


================================================
FILE: cfgs/res101.yml
================================================
EXP_DIR: res101
TRAIN:
  HAS_RPN: True
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
  DISPLAY: 20
  BATCH_SIZE: 128
  WEIGHT_DECAY: 0.0001
  DOUBLE_BIAS: False
  LEARNING_RATE: 0.001
TEST:
  HAS_RPN: True
POOLING_SIZE: 7
POOLING_MODE: align
CROP_RESIZE_WITH_MAX_POOL: False


================================================
FILE: cfgs/res101_ls.yml
================================================
EXP_DIR: res101
TRAIN:
  HAS_RPN: True
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
  DISPLAY: 20
  BATCH_SIZE: 128
  WEIGHT_DECAY: 0.0001
  SCALES: [800]
  DOUBLE_BIAS: False
  LEARNING_RATE: 0.001
TEST:
  HAS_RPN: True
  SCALES: [800]
  MAX_SIZE: 1200
  RPN_POST_NMS_TOP_N: 1000
POOLING_SIZE: 7
POOLING_MODE: align
CROP_RESIZE_WITH_MAX_POOL: False


================================================
FILE: cfgs/res50.yml
================================================
EXP_DIR: res50
TRAIN:
  HAS_RPN: True
  # IMS_PER_BATCH: 1
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
  DISPLAY: 20
  BATCH_SIZE: 256
  WEIGHT_DECAY: 0.0001
  DOUBLE_BIAS: False
  SNAPSHOT_PREFIX: res50_faster_rcnn
TEST:
  HAS_RPN: True
POOLING_MODE: crop


================================================
FILE: cfgs/vgg16.yml
================================================
EXP_DIR: vgg16
TRAIN:
  HAS_RPN: True
  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
  RPN_POSITIVE_OVERLAP: 0.7
  RPN_BATCHSIZE: 256
  PROPOSAL_METHOD: gt
  BG_THRESH_LO: 0.0
  BATCH_SIZE: 256
  LEARNING_RATE: 0.01
TEST:
  HAS_RPN: True
POOLING_MODE: align
CROP_RESIZE_WITH_MAX_POOL: False


================================================
FILE: demo.py
================================================
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Jiasen Lu, Jianwei Yang, based on code from Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import _init_paths
import os
import sys
import numpy as np
import argparse
import pprint
import pdb
import time
import cv2
import imutils
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

import torchvision.transforms as transforms
import torchvision.datasets as dset
from scipy.misc import imread
from roi_data_layer.roidb import combined_roidb
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.rpn.bbox_transform import clip_boxes
from model.nms.nms_wrapper import nms
from model.rpn.bbox_transform import bbox_transform_inv
from model.utils.net_utils import save_net, load_net, vis_detections
from model.utils.blob import im_list_to_blob
from model.faster_rcnn.vgg16 import vgg16
from model.faster_rcnn.resnet import resnet
import pdb

try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3


def parse_args():
  """
  Parse input arguments
  """
  parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
  parser.add_argument('--dataset', dest='dataset',
                      help='training dataset',
                      default='pascal_voc', type=str)
  parser.add_argument('--cfg', dest='cfg_file',
                      help='optional config file',
                      default='cfgs/vgg16.yml', type=str)
  parser.add_argument('--net', dest='net',
                      help='vgg16, res50, res101, res152',
                      default='res101', type=str)
  parser.add_argument('--set', dest='set_cfgs',
                      help='set config keys', default=None,
                      nargs=argparse.REMAINDER)
  parser.add_argument('--load_dir', dest='load_dir',
                      help='directory to load models',
                      default="/srv/share/jyang375/models")
  parser.add_argument('--image_dir', dest='image_dir',
                      help='directory to load images for demo',
                      default="images")
  parser.add_argument('--cuda', dest='cuda',
                      help='whether use CUDA',
                      action='store_true')
  parser.add_argument('--mGPUs', dest='mGPUs',
                      help='whether use multiple GPUs',
                      action='store_true')
  parser.add_argument('--cag', dest='class_agnostic',
                      help='whether perform class_agnostic bbox regression',
                      action='store_true')
  parser.add_argument('--parallel_type', dest='parallel_type',
                      help='which part of model to parallel, 0: all, 1: model before roi pooling',
                      default=0, type=int)
  parser.add_argument('--checksession', dest='checksession',
                      help='checksession to load model',
                      default=1, type=int)
  parser.add_argument('--checkepoch', dest='checkepoch',
                      help='checkepoch to load network',
                      default=1, type=int)
  parser.add_argument('--checkpoint', dest='checkpoint',
                      help='checkpoint to load network',
                      default=10021, type=int)
  parser.add_argument('--bs', dest='batch_size',
                      help='batch_size',
                      default=1, type=int)
  parser.add_argument('--vis', dest='vis',
                      help='visualization mode',
                      action='store_true')
  parser.add_argument('--webcam_num', dest='webcam_num',
                      help='webcam ID number',
                      default=-1, type=int)

  args = parser.parse_args()
  return args

lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY

def _get_image_blob(im):
  """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
  im_orig = im.astype(np.float32, copy=True)
  im_orig -= cfg.PIXEL_MEANS

  im_shape = im_orig.shape
  im_size_min = np.min(im_shape[0:2])
  im_size_max = np.max(im_shape[0:2])

  processed_ims = []
  im_scale_factors = []

  for target_size in cfg.TEST.SCALES:
    im_scale = float(target_size) / float(im_size_min)
    # Prevent the biggest axis from being more than MAX_SIZE
    if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
      im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
            interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, np.array(im_scale_factors)

if __name__ == '__main__':

  args = parse_args()

  print('Called with args:')
  print(args)

  if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)
  if args.set_cfgs is not None:
    cfg_from_list(args.set_cfgs)

  cfg.USE_GPU_NMS = args.cuda

  print('Using config:')
  pprint.pprint(cfg)
  np.random.seed(cfg.RNG_SEED)

  # train set
  # -- Note: Use validation set and disable the flipped to enable faster loading.

  input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
  if not os.path.exists(input_dir):
    raise Exception('There is no input directory for loading network from ' + input_dir)
  load_name = os.path.join(input_dir,
    'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

  pascal_classes = np.asarray(['__background__',
                       'aeroplane', 'bicycle', 'bird', 'boat',
                       'bottle', 'bus', 'car', 'cat', 'chair',
                       'cow', 'diningtable', 'dog', 'horse',
                       'motorbike', 'person', 'pottedplant',
                       'sheep', 'sofa', 'train', 'tvmonitor'])

  # initilize the network here.
  if args.net == 'vgg16':
    fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic)
  elif args.net == 'res101':
    fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
  elif args.net == 'res50':
    fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic)
  elif args.net == 'res152':
    fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic)
  else:
    print("network is not defined")
    pdb.set_trace()

  fasterRCNN.create_architecture()

  print("load checkpoint %s" % (load_name))
  if args.cuda > 0:
    checkpoint = torch.load(load_name)
  else:
    checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage))
  fasterRCNN.load_state_dict(checkpoint['model'])
  if 'pooling_mode' in checkpoint.keys():
    cfg.POOLING_MODE = checkpoint['pooling_mode']


  print('load model successfully!')

  # pdb.set_trace()

  print("load checkpoint %s" % (load_name))

  # initilize the tensor holder here.
  im_data = torch.FloatTensor(1)
  im_info = torch.FloatTensor(1)
  num_boxes = torch.LongTensor(1)
  gt_boxes = torch.FloatTensor(1)

  # ship to cuda
  if args.cuda > 0:
    im_data = im_data.cuda()
    im_info = im_info.cuda()
    num_boxes = num_boxes.cuda()
    gt_boxes = gt_boxes.cuda()

  # make variable
  with torch.no_grad():
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

  if args.cuda > 0:
    cfg.CUDA = True

  if args.cuda > 0:
    fasterRCNN.cuda()

  fasterRCNN.eval()

  start = time.time()
  max_per_image = 100
  thresh = 0.05
  vis = True

  webcam_num = args.webcam_num
  # Set up webcam or get image directories
  if webcam_num >= 0 :
    cap = cv2.VideoCapture(webcam_num)
    num_images = 0
  else:
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)

  print('Loaded Photo: {} images.'.format(num_images))


  while (num_images >= 0):
      total_tic = time.time()
      if webcam_num == -1:
        num_images -= 1

      # Get image from the webcam
      if webcam_num >= 0:
        if not cap.isOpened():
          raise RuntimeError("Webcam could not open. Please check connection.")
        ret, frame = cap.read()
        im_in = np.array(frame)
      # Load the demo image
      else:
        im_file = os.path.join(args.image_dir, imglist[num_images])
        # im = cv2.imread(im_file)
        im_in = np.array(imread(im_file))
        if len(im_in.shape) == 2:
          im_in = im_in[:,:,np.newaxis]
          im_in = np.concatenate((im_in,im_in,im_in), axis=2)
        # rgb -> bgr
        im_in = im_in[:,:,::-1]
      im = im_in

      blobs, im_scales = _get_image_blob(im)
      assert len(im_scales) == 1, "Only single-image batch implemented"
      im_blob = blobs
      im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)

      im_data_pt = torch.from_numpy(im_blob)
      im_data_pt = im_data_pt.permute(0, 3, 1, 2)
      im_info_pt = torch.from_numpy(im_info_np)

      im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
      im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
      gt_boxes.data.resize_(1, 1, 5).zero_()
      num_boxes.data.resize_(1).zero_()

      # pdb.set_trace()
      det_tic = time.time()

      rois, cls_prob, bbox_pred, \
      rpn_loss_cls, rpn_loss_box, \
      RCNN_loss_cls, RCNN_loss_bbox, \
      rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

      scores = cls_prob.data
      boxes = rois.data[:, :, 1:5]

      if cfg.TEST.BBOX_REG:
          # Apply bounding-box regression deltas
          box_deltas = bbox_pred.data
          if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
          # Optionally normalize targets by a precomputed mean and stdev
            if args.class_agnostic:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                box_deltas = box_deltas.view(1, -1, 4)
            else:
                if args.cuda > 0:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes))

          pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
          pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
      else:
          # Simply repeat the boxes, once for each class
          _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1])))
          pred_boxes = _.cuda() if args.cuda > 0 else _

      pred_boxes /= im_scales[0]

      scores = scores.squeeze()
      pred_boxes = pred_boxes.squeeze()
      det_toc = time.time()
      detect_time = det_toc - det_tic
      misc_tic = time.time()
      if vis:
          im2show = np.copy(im)
      for j in xrange(1, len(pascal_classes)):
          inds = torch.nonzero(scores[:,j]>thresh).view(-1)
          # if there is det
          if inds.numel() > 0:
            cls_scores = scores[:,j][inds]
            _, order = torch.sort(cls_scores, 0, True)
            if args.class_agnostic:
              cls_boxes = pred_boxes[inds, :]
            else:
              cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
            
            cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
            # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
            cls_dets = cls_dets[order]
            keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
            cls_dets = cls_dets[keep.view(-1).long()]
            if vis:
              im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5)

      misc_toc = time.time()
      nms_time = misc_toc - misc_tic

      if webcam_num == -1:
          sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                           .format(num_images + 1, len(imglist), detect_time, nms_time))
          sys.stdout.flush()

      if vis and webcam_num == -1:
          # cv2.imshow('test', im2show)
          # cv2.waitKey(0)
          result_path = os.path.join(args.image_dir, imglist[num_images][:-4] + "_det.jpg")
          cv2.imwrite(result_path, im2show)
      else:
          cv2.imshow("frame", im2show)
          total_toc = time.time()
          total_time = total_toc - total_tic
          frame_rate = 1 / total_time
          print('Frame rate:', frame_rate)
          if cv2.waitKey(1) & 0xFF == ord('q'):
              break
  if webcam_num >= 0:
      cap.release()
      cv2.destroyAllWindows()


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m
================================================
function VOCopts = get_voc_opts(path)

tmp = pwd;
cd(path);
try
  addpath('VOCcode');
  VOCinit;
catch
  rmpath('VOCcode');
  cd(tmp);
  error(sprintf('VOCcode directory not found under %s', path));
end
rmpath('VOCcode');
cd(tmp);


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m
================================================
function res = voc_eval(path, comp_id, test_set, output_dir)

VOCopts = get_voc_opts(path);
VOCopts.testset = test_set;

for i = 1:length(VOCopts.classes)
  cls = VOCopts.classes{i};
  res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
end

fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
fprintf('Results:\n');
aps = [res(:).ap]';
fprintf('%.1f\n', aps * 100);
fprintf('%.1f\n', mean(aps) * 100);
fprintf('~~~~~~~~~~~~~~~~~~~~\n');

function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)

test_set = VOCopts.testset;
year = VOCopts.dataset(4:end);

addpath(fullfile(VOCopts.datadir, 'VOCcode'));

res_fn = sprintf(VOCopts.detrespath, comp_id, cls);

recall = [];
prec = [];
ap = 0;
ap_auc = 0;

do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
if do_eval
  % Bug in VOCevaldet requires that tic has been called first
  tic;
  [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
  ap_auc = xVOCap(recall, prec);

  % force plot limits
  ylim([0 1]);
  xlim([0 1]);

  print(gcf, '-djpeg', '-r0', ...
        [output_dir '/' cls '_pr.jpg']);
end
fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);

res.recall = recall;
res.prec = prec;
res.ap = ap;
res.ap_auc = ap_auc;

save([output_dir '/' cls '_pr.mat'], ...
     'res', 'recall', 'prec', 'ap', 'ap_auc');

rmpath(fullfile(VOCopts.datadir, 'VOCcode'));


================================================
FILE: lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m
================================================
function ap = xVOCap(rec,prec)
% From the PASCAL VOC 2011 devkit

mrec=[0 ; rec ; 1];
mpre=[0 ; prec ; 0];
for i=numel(mpre)-1:-1:1
    mpre(i)=max(mpre(i),mpre(i+1));
end
i=find(mrec(2:end)~=mrec(1:end-1))+1;
ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


================================================
FILE: lib/datasets/__init__.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------


================================================
FILE: lib/datasets/coco.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
from model.utils.config import cfg
import os.path as osp
import sys
import os
import numpy as np
import scipy.sparse
import scipy.io as sio
import pickle
import json
import uuid
# COCO API
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as COCOmask

class coco(imdb):
  def __init__(self, image_set, year):
    imdb.__init__(self, 'coco_' + year + '_' + image_set)
    # COCO specific config options
    self.config = {'use_salt': True,
                   'cleanup': True}
    # name, paths
    self._year = year
    self._image_set = image_set
    self._data_path = osp.join(cfg.DATA_DIR, 'coco')
    # load COCO API, classes, class <-> id mappings
    self._COCO = COCO(self._get_ann_file())
    cats = self._COCO.loadCats(self._COCO.getCatIds())
    self._classes = tuple(['__background__'] + [c['name'] for c in cats])
    self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes)))))
    self._class_to_coco_cat_id = dict(list(zip([c['name'] for c in cats],
                                               self._COCO.getCatIds())))
    self._image_index = self._load_image_set_index()
    # Default to roidb handler
    self.set_proposal_method('gt')
    self.competition_mode(False)

    # Some image sets are "views" (i.e. subsets) into others.
    # For example, minival2014 is a random 5000 image subset of val2014.
    # This mapping tells us where the view's images and proposals come from.
    self._view_map = {
      'minival2014': 'val2014',  # 5k val2014 subset
      'valminusminival2014': 'val2014',  # val2014 \setminus minival2014
      'test-dev2015': 'test2015',
      'valminuscapval2014': 'val2014',
      'capval2014': 'val2014',
      'captest2014': 'val2014'
    }
    coco_name = image_set + year  # e.g., "val2014"
    self._data_name = (self._view_map[coco_name]
                       if coco_name in self._view_map
                       else coco_name)
    # Dataset splits that have ground-truth annotations (test splits
    # do not have gt annotations)
    self._gt_splits = ('train', 'val', 'minival')

  def _get_ann_file(self):
    prefix = 'instances' if self._image_set.find('test') == -1 \
      else 'image_info'
    return osp.join(self._data_path, 'annotations',
                    prefix + '_' + self._image_set + self._year + '.json')

  def _load_image_set_index(self):
    """
    Load image ids.
    """
    image_ids = self._COCO.getImgIds()
    return image_ids

  def _get_widths(self):
    anns = self._COCO.loadImgs(self._image_index)
    widths = [ann['width'] for ann in anns]
    return widths

  def image_path_at(self, i):
    """
    Return the absolute path to image i in the image sequence.
    """
    return self.image_path_from_index(self._image_index[i])

  def image_id_at(self, i):
    """
    Return the absolute path to image i in the image sequence.
    """
    return self._image_index[i]

  def image_path_from_index(self, index):
    """
    Construct an image path from the image's "index" identifier.
    """
    # Example image path for index=119993:
    #   images/train2014/COCO_train2014_000000119993.jpg
    file_name = ('COCO_' + self._data_name + '_' +
                 str(index).zfill(12) + '.jpg')
    image_path = osp.join(self._data_path, 'images',
                          self._data_name, file_name)
    assert osp.exists(image_path), \
      'Path does not exist: {}'.format(image_path)
    return image_path

  def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.
    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if osp.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        roidb = pickle.load(fid)
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_coco_annotation(index)
                for index in self._image_index]

    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))
    return gt_roidb

  def _load_coco_annotation(self, index):
    """
    Loads COCO bounding-box instance annotations. Crowd instances are
    handled by marking their overlaps (with all categories) to -1. This
    overlap value means that crowd "instances" are excluded from training.
    """
    im_ann = self._COCO.loadImgs(index)[0]
    width = im_ann['width']
    height = im_ann['height']

    annIds = self._COCO.getAnnIds(imgIds=index, iscrowd=None)
    objs = self._COCO.loadAnns(annIds)
    # Sanitize bboxes -- some are invalid
    valid_objs = []
    for obj in objs:
      x1 = np.max((0, obj['bbox'][0]))
      y1 = np.max((0, obj['bbox'][1]))
      x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
      y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
      if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
        obj['clean_bbox'] = [x1, y1, x2, y2]
        valid_objs.append(obj)
    objs = valid_objs
    num_objs = len(objs)

    boxes = np.zeros((num_objs, 4), dtype=np.uint16)
    gt_classes = np.zeros((num_objs), dtype=np.int32)
    overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
    seg_areas = np.zeros((num_objs), dtype=np.float32)

    # Lookup table to map from COCO category ids to our internal class
    # indices
    coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[cls],
                                      self._class_to_ind[cls])
                                     for cls in self._classes[1:]])

    for ix, obj in enumerate(objs):
      cls = coco_cat_id_to_class_ind[obj['category_id']]
      boxes[ix, :] = obj['clean_bbox']
      gt_classes[ix] = cls
      seg_areas[ix] = obj['area']
      if obj['iscrowd']:
        # Set overlap to -1 for all classes for crowd objects
        # so they will be excluded during training
        overlaps[ix, :] = -1.0
      else:
        overlaps[ix, cls] = 1.0

    ds_utils.validate_boxes(boxes, width=width, height=height)
    overlaps = scipy.sparse.csr_matrix(overlaps)
    return {'width': width,
            'height': height,
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False,
            'seg_areas': seg_areas}

  def _get_widths(self):
    return [r['width'] for r in self.roidb]

  def append_flipped_images(self):
    num_images = self.num_images
    widths = self._get_widths()
    for i in range(num_images):
      boxes = self.roidb[i]['boxes'].copy()
      oldx1 = boxes[:, 0].copy()
      oldx2 = boxes[:, 2].copy()
      boxes[:, 0] = widths[i] - oldx2 - 1
      boxes[:, 2] = widths[i] - oldx1 - 1
      assert (boxes[:, 2] >= boxes[:, 0]).all()
      entry = {'width': widths[i],
               'height': self.roidb[i]['height'],
               'boxes': boxes,
               'gt_classes': self.roidb[i]['gt_classes'],
               'gt_overlaps': self.roidb[i]['gt_overlaps'],
               'flipped': True,
               'seg_areas': self.roidb[i]['seg_areas']}

      self.roidb.append(entry)
    self._image_index = self._image_index * 2

  def _get_box_file(self, index):
    # first 14 chars / first 22 chars / all chars + .mat
    # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
    file_name = ('COCO_' + self._data_name +
                 '_' + str(index).zfill(12) + '.mat')
    return osp.join(file_name[:14], file_name[:22], file_name)

  def _print_detection_eval_metrics(self, coco_eval):
    IoU_lo_thresh = 0.5
    IoU_hi_thresh = 0.95

    def _get_thr_ind(coco_eval, thr):
      ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
                     (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
      iou_thr = coco_eval.params.iouThrs[ind]
      assert np.isclose(iou_thr, thr)
      return ind

    ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
    ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
    # precision has dims (iou, recall, cls, area range, max dets)
    # area range index 0: all area ranges
    # max dets index 2: 100 per image
    precision = \
      coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
    ap_default = np.mean(precision[precision > -1])
    print(('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] '
           '~~~~').format(IoU_lo_thresh, IoU_hi_thresh))
    print('{:.1f}'.format(100 * ap_default))
    for cls_ind, cls in enumerate(self.classes):
      if cls == '__background__':
        continue
      # minus 1 because of __background__
      precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
      ap = np.mean(precision[precision > -1])
      print('{:.1f}'.format(100 * ap))

    print('~~~~ Summary metrics ~~~~')
    coco_eval.summarize()

  def _do_detection_eval(self, res_file, output_dir):
    ann_type = 'bbox'
    coco_dt = self._COCO.loadRes(res_file)
    coco_eval = COCOeval(self._COCO, coco_dt)
    coco_eval.params.useSegm = (ann_type == 'segm')
    coco_eval.evaluate()
    coco_eval.accumulate()
    self._print_detection_eval_metrics(coco_eval)
    eval_file = osp.join(output_dir, 'detection_results.pkl')
    with open(eval_file, 'wb') as fid:
      pickle.dump(coco_eval, fid, pickle.HIGHEST_PROTOCOL)
    print('Wrote COCO eval results to: {}'.format(eval_file))

  def _coco_results_one_category(self, boxes, cat_id):
    results = []
    for im_ind, index in enumerate(self.image_index):
      dets = boxes[im_ind].astype(np.float)
      if dets == []:
        continue
      scores = dets[:, -1]
      xs = dets[:, 0]
      ys = dets[:, 1]
      ws = dets[:, 2] - xs + 1
      hs = dets[:, 3] - ys + 1
      results.extend(
        [{'image_id': index,
          'category_id': cat_id,
          'bbox': [xs[k], ys[k], ws[k], hs[k]],
          'score': scores[k]} for k in range(dets.shape[0])])
    return results

  def _write_coco_results_file(self, all_boxes, res_file):
    # [{"image_id": 42,
    #   "category_id": 18,
    #   "bbox": [258.15,41.29,348.26,243.78],
    #   "score": 0.236}, ...]
    results = []
    for cls_ind, cls in enumerate(self.classes):
      if cls == '__background__':
        continue
      print('Collecting {} results ({:d}/{:d})'.format(cls, cls_ind,
                                                       self.num_classes - 1))
      coco_cat_id = self._class_to_coco_cat_id[cls]
      results.extend(self._coco_results_one_category(all_boxes[cls_ind],
                                                     coco_cat_id))
    print('Writing results json to {}'.format(res_file))
    with open(res_file, 'w') as fid:
      json.dump(results, fid)

  def evaluate_detections(self, all_boxes, output_dir):
    res_file = osp.join(output_dir, ('detections_' +
                                     self._image_set +
                                     self._year +
                                     '_results'))
    if self.config['use_salt']:
      res_file += '_{}'.format(str(uuid.uuid4()))
    res_file += '.json'
    self._write_coco_results_file(all_boxes, res_file)
    # Only do evaluation on non-test sets
    if self._image_set.find('test') == -1:
      self._do_detection_eval(res_file, output_dir)
    # Optionally cleanup results json file
    if self.config['cleanup']:
      os.remove(res_file)

  def competition_mode(self, on):
    if on:
      self.config['use_salt'] = False
      self.config['cleanup'] = False
    else:
      self.config['use_salt'] = True
      self.config['cleanup'] = True


================================================
FILE: lib/datasets/ds_utils.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np


def unique_boxes(boxes, scale=1.0):
  """Return indices of unique boxes."""
  v = np.array([1, 1e3, 1e6, 1e9])
  hashes = np.round(boxes * scale).dot(v)
  _, index = np.unique(hashes, return_index=True)
  return np.sort(index)


def xywh_to_xyxy(boxes):
  """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
  return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))


def xyxy_to_xywh(boxes):
  """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
  return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))


def validate_boxes(boxes, width=0, height=0):
  """Check that a set of boxes are valid."""
  x1 = boxes[:, 0]
  y1 = boxes[:, 1]
  x2 = boxes[:, 2]
  y2 = boxes[:, 3]
  assert (x1 >= 0).all()
  assert (y1 >= 0).all()
  assert (x2 >= x1).all()
  assert (y2 >= y1).all()
  assert (x2 < width).all()
  assert (y2 < height).all()


def filter_small_boxes(boxes, min_size):
  w = boxes[:, 2] - boxes[:, 0]
  h = boxes[:, 3] - boxes[:, 1]
  keep = np.where((w >= min_size) & (h > min_size))[0]
  return keep


================================================
FILE: lib/datasets/factory.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Factory method for easily getting imdbs by name."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

__sets = {}
from datasets.pascal_voc import pascal_voc
from datasets.coco import coco
from datasets.imagenet import imagenet
from datasets.vg import vg

import numpy as np

# Set up voc_<year>_<split>
for year in ['2007', '2012']:
  for split in ['train', 'val', 'trainval', 'test']:
    name = 'voc_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))

# Set up coco_2014_<split>
for year in ['2014']:
  for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
    name = 'coco_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up coco_2014_cap_<split>
for year in ['2014']:
  for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
    name = 'coco_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up coco_2015_<split>
for year in ['2015']:
  for split in ['test', 'test-dev']:
    name = 'coco_{}_{}'.format(year, split)
    __sets[name] = (lambda split=split, year=year: coco(split, year))

# Set up vg_<split>
# for version in ['1600-400-20']:
#     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
#         name = 'vg_{}_{}'.format(version,split)
#         __sets[name] = (lambda split=split, version=version: vg(version, split))
for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
    for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
        name = 'vg_{}_{}'.format(version,split)
        __sets[name] = (lambda split=split, version=version: vg(version, split))
        
# set up image net.
for split in ['train', 'val', 'val1', 'val2', 'test']:
    name = 'imagenet_{}'.format(split)
    devkit_path = 'data/imagenet/ILSVRC/devkit'
    data_path = 'data/imagenet/ILSVRC'
    __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))

def get_imdb(name):
  """Get an imdb (image database) by name."""
  if name not in __sets:
    raise KeyError('Unknown dataset: {}'.format(name))
  return __sets[name]()


def list_imdbs():
  """List all registered imdbs."""
  return list(__sets.keys())


================================================
FILE: lib/datasets/imagenet.py
================================================
from __future__ import print_function
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import datasets
import datasets.imagenet
import os, sys
from datasets.imdb import imdb
import xml.dom.minidom as minidom
import numpy as np
import scipy.sparse
import scipy.io as sio
import subprocess
import pdb
import pickle
try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3


class imagenet(imdb):
    def __init__(self, image_set, devkit_path, data_path):
        imdb.__init__(self, image_set)
        self._image_set = image_set
        self._devkit_path = devkit_path
        self._data_path = data_path
        synsets_image = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_det.mat'))
        synsets_video = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_vid.mat'))
        self._classes_image = ('__background__',)
        self._wnid_image = (0,)

        self._classes = ('__background__',)
        self._wnid = (0,)

        for i in xrange(200):
            self._classes_image = self._classes_image + (synsets_image['synsets'][0][i][2][0],)
            self._wnid_image = self._wnid_image + (synsets_image['synsets'][0][i][1][0],)

        for i in xrange(30):
            self._classes = self._classes + (synsets_video['synsets'][0][i][2][0],)
            self._wnid = self._wnid + (synsets_video['synsets'][0][i][1][0],)

        self._wnid_to_ind_image = dict(zip(self._wnid_image, xrange(201)))
        self._class_to_ind_image = dict(zip(self._classes_image, xrange(201)))

        self._wnid_to_ind = dict(zip(self._wnid, xrange(31)))
        self._class_to_ind = dict(zip(self._classes, xrange(31)))

        #check for valid intersection between video and image classes
        self._valid_image_flag = [0]*201

        for i in range(1,201):
            if self._wnid_image[i] in self._wnid_to_ind:
                self._valid_image_flag[i] = 1

        self._image_ext = ['.JPEG']

        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        self._roidb_handler = self.gt_roidb

        # Specific config options
        self.config = {'cleanup'  : True,
                       'use_salt' : True,
                       'top_k'    : 2000}

        assert os.path.exists(self._devkit_path), 'Devkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)

    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        image_path = os.path.join(self._data_path, 'Data', self._image_set, index + self._image_ext[0])
        assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
        return image_path

    def _load_image_set_index(self):
        """
        Load the indexes listed in this dataset's image set file.
        """
        # Example path to image set file:
        # self._data_path + /ImageSets/val.txt

        if self._image_set == 'train':
            image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
            image_index = []
            if os.path.exists(image_set_file):
                f = open(image_set_file, 'r')
                data = f.read().split()
                for lines in data:
                    if lines != '':
                        image_index.append(lines)
                f.close()
                return image_index

            for i in range(1,200):
                print(i)
                image_set_file = os.path.join(self._data_path, 'ImageSets', 'DET', 'train_' + str(i) + '.txt')
                with open(image_set_file) as f:
                    tmp_index = [x.strip() for x in f.readlines()]
                    vtmp_index = []
                    for line in tmp_index:
                        line = line.split(' ')
                        image_list = os.popen('ls ' + self._data_path + '/Data/DET/train/' + line[0] + '/*.JPEG').read().split()
                        tmp_list = []
                        for imgs in image_list:
                            tmp_list.append(imgs[:-5])
                        vtmp_index = vtmp_index + tmp_list

                num_lines = len(vtmp_index)
                ids = np.random.permutation(num_lines)
                count = 0
                while count < 2000:
                    image_index.append(vtmp_index[ids[count % num_lines]])
                    count = count + 1

            for i in range(1,201):
                if self._valid_image_flag[i] == 1:
                    image_set_file = os.path.join(self._data_path, 'ImageSets', 'train_pos_' + str(i) + '.txt')
                    with open(image_set_file) as f:
                        tmp_index = [x.strip() for x in f.readlines()]
                    num_lines = len(tmp_index)
                    ids = np.random.permutation(num_lines)
                    count = 0
                    while count < 2000:
                        image_index.append(tmp_index[ids[count % num_lines]])
                        count = count + 1
            image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
            f = open(image_set_file, 'w')
            for lines in image_index:
                f.write(lines + '\n')
            f.close()
        else:
            image_set_file = os.path.join(self._data_path, 'ImageSets', 'val.txt')
            with open(image_set_file) as f:
                image_index = [x.strip() for x in f.readlines()]
        return image_index

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.
        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = pickle.load(fid)
            print('{} gt roidb loaded from {}'.format(self.name, cache_file))
            return roidb

        gt_roidb = [self._load_imagenet_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
        print('wrote gt roidb to {}'.format(cache_file))

        return gt_roidb


    def _load_imagenet_annotation(self, index):
        """
        Load image and bounding boxes info from txt files of imagenet.
        """
        filename = os.path.join(self._data_path, 'Annotations', self._image_set, index + '.xml')

        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            x1 = float(get_data_from_tag(obj, 'xmin'))
            y1 = float(get_data_from_tag(obj, 'ymin'))
            x2 = float(get_data_from_tag(obj, 'xmax'))
            y2 = float(get_data_from_tag(obj, 'ymax'))
            cls = self._wnid_to_ind[
                    str(get_data_from_tag(obj, "name")).lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_overlaps' : overlaps,
                'flipped' : False}

if __name__ == '__main__':
    d = datasets.imagenet('val', '')
    res = d.roidb
    from IPython import embed; embed()


================================================
FILE: lib/datasets/imdb.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import os.path as osp
import PIL
from model.utils.cython_bbox import bbox_overlaps
import numpy as np
import scipy.sparse
from model.utils.config import cfg
import pdb

ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')

class imdb(object):
  """Image database."""

  def __init__(self, name, classes=None):
    self._name = name
    self._num_classes = 0
    if not classes:
      self._classes = []
    else:
      self._classes = classes
    self._image_index = []
    self._obj_proposer = 'gt'
    self._roidb = None
    self._roidb_handler = self.default_roidb
    # Use this dict for storing dataset specific config options
    self.config = {}

  @property
  def name(self):
    return self._name

  @property
  def num_classes(self):
    return len(self._classes)

  @property
  def classes(self):
    return self._classes

  @property
  def image_index(self):
    return self._image_index

  @property
  def roidb_handler(self):
    return self._roidb_handler

  @roidb_handler.setter
  def roidb_handler(self, val):
    self._roidb_handler = val

  def set_proposal_method(self, method):
    method = eval('self.' + method + '_roidb')
    self.roidb_handler = method

  @property
  def roidb(self):
    # A roidb is a list of dictionaries, each with the following keys:
    #   boxes
    #   gt_overlaps
    #   gt_classes
    #   flipped
    if self._roidb is not None:
      return self._roidb
    self._roidb = self.roidb_handler()
    return self._roidb

  @property
  def cache_path(self):
    cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
    if not os.path.exists(cache_path):
      os.makedirs(cache_path)
    return cache_path

  @property
  def num_images(self):
    return len(self.image_index)

  def image_path_at(self, i):
    raise NotImplementedError

  def image_id_at(self, i):
    raise NotImplementedError

  def default_roidb(self):
    raise NotImplementedError

  def evaluate_detections(self, all_boxes, output_dir=None):
    """
    all_boxes is a list of length number-of-classes.
    Each list element is a list of length number-of-images.
    Each of those list elements is either an empty list []
    or a numpy array of detection.

    all_boxes[class][image] = [] or np.array of shape #dets x 5
    """
    raise NotImplementedError

  def _get_widths(self):
    return [PIL.Image.open(self.image_path_at(i)).size[0]
            for i in range(self.num_images)]

  def append_flipped_images(self):
    num_images = self.num_images
    widths = self._get_widths()
    for i in range(num_images):
      boxes = self.roidb[i]['boxes'].copy()
      oldx1 = boxes[:, 0].copy()
      oldx2 = boxes[:, 2].copy()
      boxes[:, 0] = widths[i] - oldx2 - 1
      boxes[:, 2] = widths[i] - oldx1 - 1
      assert (boxes[:, 2] >= boxes[:, 0]).all()
      entry = {'boxes': boxes,
               'gt_overlaps': self.roidb[i]['gt_overlaps'],
               'gt_classes': self.roidb[i]['gt_classes'],
               'flipped': True}
      self.roidb.append(entry)
    self._image_index = self._image_index * 2

  def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                      area='all', limit=None):
    """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
             '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
    area_ranges = [[0 ** 2, 1e5 ** 2],  # all
                   [0 ** 2, 32 ** 2],  # small
                   [32 ** 2, 96 ** 2],  # medium
                   [96 ** 2, 1e5 ** 2],  # large
                   [96 ** 2, 128 ** 2],  # 96-128
                   [128 ** 2, 256 ** 2],  # 128-256
                   [256 ** 2, 512 ** 2],  # 256-512
                   [512 ** 2, 1e5 ** 2],  # 512-inf
                   ]
    assert area in areas, 'unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for i in range(self.num_images):
      # Checking for max_overlaps == 1 avoids including crowd annotations
      # (...pretty hacking :/)
      max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
      gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                         (max_gt_overlaps == 1))[0]
      gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
      gt_areas = self.roidb[i]['seg_areas'][gt_inds]
      valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                               (gt_areas <= area_range[1]))[0]
      gt_boxes = gt_boxes[valid_gt_inds, :]
      num_pos += len(valid_gt_inds)

      if candidate_boxes is None:
        # If candidate_boxes is not supplied, the default is to use the
        # non-ground-truth boxes from this roidb
        non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
        boxes = self.roidb[i]['boxes'][non_gt_inds, :]
      else:
        boxes = candidate_boxes[i]
      if boxes.shape[0] == 0:
        continue
      if limit is not None and boxes.shape[0] > limit:
        boxes = boxes[:limit, :]

      overlaps = bbox_overlaps(boxes.astype(np.float),
                               gt_boxes.astype(np.float))

      _gt_overlaps = np.zeros((gt_boxes.shape[0]))
      for j in range(gt_boxes.shape[0]):
        # find which proposal box maximally covers each gt box
        argmax_overlaps = overlaps.argmax(axis=0)
        # and get the iou amount of coverage for each gt box
        max_overlaps = overlaps.max(axis=0)
        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ind = max_overlaps.argmax()
        gt_ovr = max_overlaps.max()
        assert (gt_ovr >= 0)
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps[j] = overlaps[box_ind, gt_ind]
        assert (_gt_overlaps[j] == gt_ovr)
        # mark the proposal box and the gt box as used
        overlaps[box_ind, :] = -1
        overlaps[:, gt_ind] = -1
      # append recorded iou coverage level
      gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
      step = 0.05
      thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
            'gt_overlaps': gt_overlaps}

  def create_roidb_from_box_list(self, box_list, gt_roidb):
    assert len(box_list) == self.num_images, \
      'Number of boxes must match number of ground-truth images'
    roidb = []
    for i in range(self.num_images):
      boxes = box_list[i]
      num_boxes = boxes.shape[0]
      overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

      if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
        gt_boxes = gt_roidb[i]['boxes']
        gt_classes = gt_roidb[i]['gt_classes']
        gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                    gt_boxes.astype(np.float))
        argmaxes = gt_overlaps.argmax(axis=1)
        maxes = gt_overlaps.max(axis=1)
        I = np.where(maxes > 0)[0]
        overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

      overlaps = scipy.sparse.csr_matrix(overlaps)
      roidb.append({
        'boxes': boxes,
        'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
        'gt_overlaps': overlaps,
        'flipped': False,
        'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
      })
    return roidb

  @staticmethod
  def merge_roidbs(a, b):
    assert len(a) == len(b)
    for i in range(len(a)):
      a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
      a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
                                      b[i]['gt_classes']))
      a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
                                                 b[i]['gt_overlaps']])
      a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
                                     b[i]['seg_areas']))
    return a

  def competition_mode(self, on):
    """Turn competition mode on or off."""
    pass


================================================
FILE: lib/datasets/pascal_voc.py
================================================
from __future__ import print_function
from __future__ import absolute_import
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import xml.dom.minidom as minidom

import os
# import PIL
import numpy as np
import scipy.sparse
import subprocess
import math
import glob
import uuid
import scipy.io as sio
import xml.etree.ElementTree as ET
import pickle
from .imdb import imdb
from .imdb import ROOT_DIR
from . import ds_utils
from .voc_eval import voc_eval

# TODO: make fast_rcnn irrelevant
# >>>> obsolete, because it depends on sth outside of this project
from model.utils.config import cfg

try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3

# <<<< obsolete


class pascal_voc(imdb):
    def __init__(self, image_set, year, devkit_path=None):
        imdb.__init__(self, 'voc_' + year + '_' + image_set)
        self._year = year
        self._image_set = image_set
        self._devkit_path = self._get_default_path() if devkit_path is None \
            else devkit_path
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = ('__background__',  # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._image_ext = '.jpg'
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        # self._roidb_handler = self.selective_search_roidb
        self._roidb_handler = self.gt_roidb
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'

        # PASCAL specific config options
        self.config = {'cleanup': True,
                       'use_salt': True,
                       'use_diff': False,
                       'matlab_eval': False,
                       'rpn_file': None,
                       'min_size': 2}

        assert os.path.exists(self._devkit_path), \
            'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
            'Path does not exist: {}'.format(self._data_path)

    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_id_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return i

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        image_path = os.path.join(self._data_path, 'JPEGImages',
                                  index + self._image_ext)
        assert os.path.exists(image_path), \
            'Path does not exist: {}'.format(image_path)
        return image_path

    def _load_image_set_index(self):
        """
        Load the indexes listed in this dataset's image set file.
        """
        # Example path to image set file:
        # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
        image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
                                      self._image_set + '.txt')
        assert os.path.exists(image_set_file), \
            'Path does not exist: {}'.format(image_set_file)
        with open(image_set_file) as f:
            image_index = [x.strip() for x in f.readlines()]
        return image_index

    def _get_default_path(self):
        """
        Return the default path where PASCAL VOC is expected to be installed.
        """
        return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = pickle.load(fid)
            print('{} gt roidb loaded from {}'.format(self.name, cache_file))
            return roidb

        gt_roidb = [self._load_pascal_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
        print('wrote gt roidb to {}'.format(cache_file))

        return gt_roidb

    def selective_search_roidb(self):
        """
        Return the database of selective search regions of interest.
        Ground-truth ROIs are also included.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path,
                                  self.name + '_selective_search_roidb.pkl')

        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = pickle.load(fid)
            print('{} ss roidb loaded from {}'.format(self.name, cache_file))
            return roidb

        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            ss_roidb = self._load_selective_search_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
        else:
            roidb = self._load_selective_search_roidb(None)
        with open(cache_file, 'wb') as fid:
            pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL)
        print('wrote ss roidb to {}'.format(cache_file))

        return roidb

    def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
        else:
            roidb = self._load_rpn_roidb(None)

        return roidb

    def _load_rpn_roidb(self, gt_roidb):
        filename = self.config['rpn_file']
        print('loading {}'.format(filename))
        assert os.path.exists(filename), \
            'rpn data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            box_list = pickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

    def _load_selective_search_roidb(self, gt_roidb):
        filename = os.path.abspath(os.path.join(cfg.DATA_DIR,
                                                'selective_search_data',
                                                self.name + '.mat'))
        assert os.path.exists(filename), \
            'Selective search data not found at: {}'.format(filename)
        raw_data = sio.loadmat(filename)['boxes'].ravel()

        box_list = []
        for i in xrange(raw_data.shape[0]):
            boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            box_list.append(boxes)

        return self.create_roidb_from_box_list(box_list, gt_roidb)

    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        tree = ET.parse(filename)
        objs = tree.findall('object')
        # if not self.config['use_diff']:
        #     # Exclude the samples labeled as difficult
        #     non_diff_objs = [
        #         obj for obj in objs if int(obj.find('difficult').text) == 0]
        #     # if len(non_diff_objs) != len(objs):
        #     #     print 'Removed {} difficult objects'.format(
        #     #         len(objs) - len(non_diff_objs))
        #     objs = non_diff_objs
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)
        ishards = np.zeros((num_objs), dtype=np.int32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1

            diffc = obj.find('difficult')
            difficult = 0 if diffc == None else int(diffc.text)
            ishards[ix] = difficult

            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes': boxes,
                'gt_classes': gt_classes,
                'gt_ishard': ishards,
                'gt_overlaps': overlaps,
                'flipped': False,
                'seg_areas': seg_areas}

    def _get_comp_id(self):
        comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
                   else self._comp_id)
        return comp_id

    def _get_voc_results_file_template(self):
        # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
        filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
        filedir = os.path.join(self._devkit_path, 'results', 'VOC' + self._year, 'Main')
        if not os.path.exists(filedir):
            os.makedirs(filedir)
        path = os.path.join(filedir, filename)
        return path

    def _write_voc_results_file(self, all_boxes):
        for cls_ind, cls in enumerate(self.classes):
            if cls == '__background__':
                continue
            print('Writing {} VOC results file'.format(cls))
            filename = self._get_voc_results_file_template().format(cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(self.image_index):
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in xrange(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, -1],
                                       dets[k, 0] + 1, dets[k, 1] + 1,
                                       dets[k, 2] + 1, dets[k, 3] + 1))

    def _do_python_eval(self, output_dir='output'):
        annopath = os.path.join(
            self._devkit_path,
            'VOC' + self._year,
            'Annotations',
            '{:s}.xml')
        imagesetfile = os.path.join(
            self._devkit_path,
            'VOC' + self._year,
            'ImageSets',
            'Main',
            self._image_set + '.txt')
        cachedir = os.path.join(self._devkit_path, 'annotations_cache')
        aps = []
        # The PASCAL VOC metric changed in 2010
        use_07_metric = True if int(self._year) < 2010 else False
        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        for i, cls in enumerate(self._classes):
            if cls == '__background__':
                continue
            filename = self._get_voc_results_file_template().format(cls)
            rec, prec, ap = voc_eval(
                filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
                use_07_metric=use_07_metric)
            aps += [ap]
            print('AP for {} = {:.4f}'.format(cls, ap))
            with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
                pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
        print('Mean AP = {:.4f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('Results:')
        for ap in aps:
            print('{:.3f}'.format(ap))
        print('{:.3f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('')
        print('--------------------------------------------------------------')
        print('Results computed with the **unofficial** Python eval code.')
        print('Results should be very close to the official MATLAB eval code.')
        print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
        print('-- Thanks, The Management')
        print('--------------------------------------------------------------')

    def _do_matlab_eval(self, output_dir='output'):
        print('-----------------------------------------------------')
        print('Computing results with the official MATLAB eval code.')
        print('-----------------------------------------------------')
        path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets',
                            'VOCdevkit-matlab-wrapper')
        cmd = 'cd {} && '.format(path)
        cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
        cmd += '-r "dbstop if error; '
        cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
            .format(self._devkit_path, self._get_comp_id(),
                    self._image_set, output_dir)
        print('Running:\n{}'.format(cmd))
        status = subprocess.call(cmd, shell=True)

    def evaluate_detections(self, all_boxes, output_dir):
        self._write_voc_results_file(all_boxes)
        self._do_python_eval(output_dir)
        if self.config['matlab_eval']:
            self._do_matlab_eval(output_dir)
        if self.config['cleanup']:
            for cls in self._classes:
                if cls == '__background__':
                    continue
                filename = self._get_voc_results_file_template().format(cls)
                os.remove(filename)

    def competition_mode(self, on):
        if on:
            self.config['use_salt'] = False
            self.config['cleanup'] = False
        else:
            self.config['use_salt'] = True
            self.config['cleanup'] = True


if __name__ == '__main__':
    d = pascal_voc('trainval', '2007')
    res = d.roidb
    from IPython import embed;

    embed()


================================================
FILE: lib/datasets/pascal_voc_rbg.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import scipy.io as sio
import model.utils.cython_bbox
import pickle
import subprocess
import uuid
from .voc_eval import voc_eval
from model.utils.config import cfg
import pdb


class pascal_voc(imdb):
  def __init__(self, image_set, year, devkit_path=None):
    imdb.__init__(self, 'voc_' + year + '_' + image_set)
    self._year = year
    self._image_set = image_set
    self._devkit_path = self._get_default_path() if devkit_path is None \
      else devkit_path

    
    self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
    self._classes = ('__background__',  # always index 0
                     'aeroplane', 'bicycle', 'bird', 'boat',
                     'bottle', 'bus', 'car', 'cat', 'chair',
                     'cow', 'diningtable', 'dog', 'horse',
                     'motorbike', 'person', 'pottedplant',
                     'sheep', 'sofa', 'train', 'tvmonitor')
    self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes)))))
    self._image_ext = '.jpg'
    self._image_index = self._load_image_set_index()
    # Default to roidb handler
    self._roidb_handler = self.gt_roidb
    self._salt = str(uuid.uuid4())
    self._comp_id = 'comp4'

    # PASCAL specific config options
    self.config = {'cleanup': True,
                   'use_salt': True,
                   'use_diff': False,
                   'matlab_eval': False,
                   'rpn_file': None}

    assert os.path.exists(self._devkit_path), \
      'VOCdevkit path does not exist: {}'.format(self._devkit_path)
    assert os.path.exists(self._data_path), \
      'Path does not exist: {}'.format(self._data_path)

  def image_path_at(self, i):
    """
    Return the absolute path to image i in the image sequence.
    """
    return self.image_path_from_index(self._image_index[i])

  def image_path_from_index(self, index):
    """
    Construct an image path from the image's "index" identifier.
    """
    image_path = os.path.join(self._data_path, 'JPEGImages',
                              index + self._image_ext)
    assert os.path.exists(image_path), \
      'Path does not exist: {}'.format(image_path)
    return image_path

  def _load_image_set_index(self):
    """
    Load the indexes listed in this dataset's image set file.
    """
    # Example path to image set file:
    # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
    image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
                                  self._image_set + '.txt')
    
    assert os.path.exists(image_set_file), \
      'Path does not exist: {}'.format(image_set_file)
    with open(image_set_file) as f:
      image_index = [x.strip() for x in f.readlines()]
    return image_index

  def _get_default_path(self):
    """
    Return the default path where PASCAL VOC is expected to be installed.
    """
    return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

  def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.

    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if os.path.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        try:
          roidb = pickle.load(fid)
        except:
          roidb = pickle.load(fid, encoding='bytes')
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_pascal_annotation(index)
                for index in self.image_index]
    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))

    return gt_roidb

  def rpn_roidb(self):
    if int(self._year) == 2007 or self._image_set != 'test':
      gt_roidb = self.gt_roidb()
      rpn_roidb = self._load_rpn_roidb(gt_roidb)
      roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
    else:
      roidb = self._load_rpn_roidb(None)

    return roidb

  def _load_rpn_roidb(self, gt_roidb):
    filename = self.config['rpn_file']
    print('loading {}'.format(filename))
    assert os.path.exists(filename), \
      'rpn data not found at: {}'.format(filename)
    with open(filename, 'rb') as f:
      box_list = pickle.load(f)
    return self.create_roidb_from_box_list(box_list, gt_roidb)

  def _load_pascal_annotation(self, index):
    """
    Load image and bounding boxes info from XML file in the PASCAL VOC
    format.
    """
    filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
    tree = ET.parse(filename)
    objs = tree.findall('object')
    if not self.config['use_diff']:
      # Exclude the samples labeled as difficult
      non_diff_objs = [
        obj for obj in objs if int(obj.find('difficult').text) == 0]
      # if len(non_diff_objs) != len(objs):
      #     print 'Removed {} difficult objects'.format(
      #         len(objs) - len(non_diff_objs))
      objs = non_diff_objs
    num_objs = len(objs)

    boxes = np.zeros((num_objs, 4), dtype=np.uint16)
    gt_classes = np.zeros((num_objs), dtype=np.int32)
    overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
    # "Seg" area for pascal is just the box area
    seg_areas = np.zeros((num_objs), dtype=np.float32)

    # Load object bounding boxes into a data frame.
    for ix, obj in enumerate(objs):
      bbox = obj.find('bndbox')
      # Make pixel indexes 0-based
      x1 = float(bbox.find('xmin').text) - 1
      y1 = float(bbox.find('ymin').text) - 1
      x2 = float(bbox.find('xmax').text) - 1
      y2 = float(bbox.find('ymax').text) - 1
      cls = self._class_to_ind[obj.find('name').text.lower().strip()]
      boxes[ix, :] = [x1, y1, x2, y2]
      gt_classes[ix] = cls
      overlaps[ix, cls] = 1.0
      seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

    overlaps = scipy.sparse.csr_matrix(overlaps)

    return {'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False,
            'seg_areas': seg_areas}

  def _get_comp_id(self):
    comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
               else self._comp_id)
    return comp_id

  def _get_voc_results_file_template(self):
    # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
    filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
    path = os.path.join(
      self._devkit_path,
      'results',
      'VOC' + self._year,
      'Main',
      filename)
    return path

  def _write_voc_results_file(self, all_boxes):
    for cls_ind, cls in enumerate(self.classes):
      if cls == '__background__':
        continue
      print('Writing {} VOC results file'.format(cls))
      filename = self._get_voc_results_file_template().format(cls)
      with open(filename, 'wt') as f:
        for im_ind, index in enumerate(self.image_index):
          dets = all_boxes[cls_ind][im_ind]
          if dets == []:
            continue
          # the VOCdevkit expects 1-based indices
          for k in range(dets.shape[0]):
            f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                    format(index, dets[k, -1],
                           dets[k, 0] + 1, dets[k, 1] + 1,
                           dets[k, 2] + 1, dets[k, 3] + 1))

  def _do_python_eval(self, output_dir='output'):
    annopath = os.path.join(
      self._devkit_path,
      'VOC' + self._year,
      'Annotations',
      '{:s}.xml')
    imagesetfile = os.path.join(
      self._devkit_path,
      'VOC' + self._year,
      'ImageSets',
      'Main',
      self._image_set + '.txt')
    cachedir = os.path.join(self._devkit_path, 'annotations_cache')
    aps = []
    # The PASCAL VOC metric changed in 2010
    use_07_metric = True if int(self._year) < 2010 else False
    print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
    if not os.path.isdir(output_dir):
      os.mkdir(output_dir)
    for i, cls in enumerate(self._classes):
      if cls == '__background__':
        continue
      filename = self._get_voc_results_file_template().format(cls)
      rec, prec, ap = voc_eval(
        filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
        use_07_metric=use_07_metric)
      aps += [ap]
      print(('AP for {} = {:.4f}'.format(cls, ap)))
      with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
        pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
    print(('Mean AP = {:.4f}'.format(np.mean(aps))))
    print('~~~~~~~~')
    print('Results:')
    for ap in aps:
      print(('{:.3f}'.format(ap)))
    print(('{:.3f}'.format(np.mean(aps))))
    print('~~~~~~~~')
    print('')
    print('--------------------------------------------------------------')
    print('Results computed with the **unofficial** Python eval code.')
    print('Results should be very close to the official MATLAB eval code.')
    print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
    print('-- Thanks, The Management')
    print('--------------------------------------------------------------')

  def _do_matlab_eval(self, output_dir='output'):
    print('-----------------------------------------------------')
    print('Computing results with the official MATLAB eval code.')
    print('-----------------------------------------------------')
    path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets',
                        'VOCdevkit-matlab-wrapper')
    cmd = 'cd {} && '.format(path)
    cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
    cmd += '-r "dbstop if error; '
    cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
      .format(self._devkit_path, self._get_comp_id(),
              self._image_set, output_dir)
    print(('Running:\n{}'.format(cmd)))
    status = subprocess.call(cmd, shell=True)

  def evaluate_detections(self, all_boxes, output_dir):
    pdb.set_trace()
    self._write_voc_results_file(all_boxes)
    self._do_python_eval(output_dir)
    if self.config['matlab_eval']:
      self._do_matlab_eval(output_dir)
    if self.config['cleanup']:
      for cls in self._classes:
        if cls == '__background__':
          continue
        filename = self._get_voc_results_file_template().format(cls)
        os.remove(filename)

  def competition_mode(self, on):
    if on:
      self.config['use_salt'] = False
      self.config['cleanup'] = False
    else:
      self.config['use_salt'] = True
      self.config['cleanup'] = True


if __name__ == '__main__':
  from datasets.pascal_voc import pascal_voc

  d = pascal_voc('trainval', '2007')
  res = d.roidb
  from IPython import embed;

  embed()


================================================
FILE: lib/datasets/tools/mcg_munge.py
================================================
from __future__ import print_function
import os
import sys

"""Hacky tool to convert file system layout of MCG boxes downloaded from
http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
so that it's consistent with those computed by Jan Hosang (see:
http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
  computing/research/object-recognition-and-scene-understanding/how-
  good-are-detection-proposals-really/)

NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
"""

def munge(src_dir):
    # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
    # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat

    files = os.listdir(src_dir)
    for fn in files:
        base, ext = os.path.splitext(fn)
        # first 14 chars / first 22 chars / all chars + .mat
        # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
        first = base[:14]
        second = base[:22]
        dst_dir = os.path.join('MCG', 'mat', first, second)
        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)
        src = os.path.join(src_dir, fn)
        dst = os.path.join(dst_dir, fn)
        print('MV: {} -> {}'.format(src, dst))
        os.rename(src, dst)

if __name__ == '__main__':
    # src_dir should look something like:
    #  src_dir = 'MCG-COCO-val2014-boxes'
    src_dir = sys.argv[1]
    munge(src_dir)


================================================
FILE: lib/datasets/vg.py
================================================
from __future__ import print_function
from __future__ import absolute_import
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import os
from datasets.imdb import imdb
import datasets.ds_utils as ds_utils
import xml.etree.ElementTree as ET
import numpy as np
import scipy.sparse
import gzip
import PIL
import json
from .vg_eval import vg_eval
from model.utils.config import cfg
import pickle
import pdb
try:
    xrange          # Python 2
except NameError:
    xrange = range  # Python 3


class vg(imdb):
    def __init__(self, version, image_set, ):
        imdb.__init__(self, 'vg_' + version + '_' + image_set)
        self._version = version
        self._image_set = image_set
        self._data_path = os.path.join(cfg.DATA_DIR, 'genome')
        self._img_path = os.path.join(cfg.DATA_DIR, 'vg')
        # VG specific config options
        self.config = {'cleanup' : False}

        # Load classes
        self._classes = ['__background__']
        self._class_to_ind = {}
        self._class_to_ind[self._classes[0]] = 0
        with open(os.path.join(self._data_path, self._version, 'objects_vocab.txt')) as f:
          count = 1
          for object in f.readlines():
            names = [n.lower().strip() for n in object.split(',')]
            self._classes.append(names[0])
            for n in names:
              self._class_to_ind[n] = count
            count += 1

        # Load attributes
        self._attributes = ['__no_attribute__']
        self._attribute_to_ind = {}
        self._attribute_to_ind[self._attributes[0]] = 0
        with open(os.path.join(self._data_path, self._version, 'attributes_vocab.txt')) as f:
          count = 1
          for att in f.readlines():
            names = [n.lower().strip() for n in att.split(',')]
            self._attributes.append(names[0])
            for n in names:
              self._attribute_to_ind[n] = count
            count += 1

        # Load relations
        self._relations = ['__no_relation__']
        self._relation_to_ind = {}
        self._relation_to_ind[self._relations[0]] = 0
        with open(os.path.join(self._data_path, self._version, 'relations_vocab.txt')) as f:
          count = 1
          for rel in f.readlines():
            names = [n.lower().strip() for n in rel.split(',')]
            self._relations.append(names[0])
            for n in names:
              self._relation_to_ind[n] = count
            count += 1


        self._image_ext = '.jpg'
        load_index_from_file = False
        if os.path.exists(os.path.join(self._data_path, "vg_image_index_{}.p".format(self._image_set))):
            with open(os.path.join(self._data_path, "vg_image_index_{}.p".format(self._image_set)), 'rb') as fp:
                self._image_index = pickle.load(fp)
            load_index_from_file = True

        load_id_from_file = False
        if os.path.exists(os.path.join(self._data_path, "vg_id_to_dir_{}.p".format(self._image_set))):
            with open(os.path.join(self._data_path, "vg_id_to_dir_{}.p".format(self._image_set)), 'rb') as fp:
                self._id_to_dir = pickle.load(fp)
            load_id_from_file = True

        if not load_index_from_file or not load_id_from_file:
            self._image_index, self._id_to_dir = self._load_image_set_index()
            with open(os.path.join(self._data_path, "vg_image_index_{}.p".format(self._image_set)), 'wb') as fp:
                pickle.dump(self._image_index, fp)
            with open(os.path.join(self._data_path, "vg_id_to_dir_{}.p".format(self._image_set)), 'wb') as fp:
                pickle.dump(self._id_to_dir, fp)

        self._roidb_handler = self.gt_roidb


    def image_path_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return self.image_path_from_index(self._image_index[i])

    def image_id_at(self, i):
        """
        Return the absolute path to image i in the image sequence.
        """
        return i
        # return self._image_index[i]

    def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        folder = self._id_to_dir[index]
        image_path = os.path.join(self._img_path, folder,
                                  str(index) + self._image_ext)
        assert os.path.exists(image_path), \
                'Path does not exist: {}'.format(image_path)
        return image_path

    def _image_split_path(self):
        if self._image_set == "minitrain":
          return os.path.join(self._data_path, 'train.txt')
        if self._image_set == "smalltrain":
          return os.path.join(self._data_path, 'train.txt')
        if self._image_set == "minival":
          return os.path.join(self._data_path, 'val.txt')
        if self._image_set == "smallval":
          return os.path.join(self._data_path, 'val.txt')
        else:
          return os.path.join(self._data_path, self._image_set+'.txt')

    def _load_image_set_index(self):
        """
        Load the indexes listed in this dataset's image set file.
        """
        training_split_file = self._image_split_path()
        assert os.path.exists(training_split_file), \
                'Path does not exist: {}'.format(training_split_file)
        with open(training_split_file) as f:
          metadata = f.readlines()
          if self._image_set == "minitrain":
            metadata = metadata[:1000]
          elif self._image_set == "smalltrain":
            metadata = metadata[:20000]
          elif self._image_set == "minival":
            metadata = metadata[:100]
          elif self._image_set == "smallval":
            metadata = metadata[:2000]

        image_index = []
        id_to_dir = {}
        for line in metadata:
          im_file,ann_file = line.split()
          image_id = int(ann_file.split('/')[-1].split('.')[0])
          filename = self._annotation_path(image_id)
          if os.path.exists(filename):
              # Some images have no bboxes after object filtering, so there
              # is no xml annotation for these.
              tree = ET.parse(filename)
              for obj in tree.findall('object'):
                  obj_name = obj.find('name').text.lower().strip()
                  if obj_name in self._class_to_ind:
                      # We have to actually load and check these to make sure they have
                      # at least one object actually in vocab
                      image_index.append(image_id)
                      id_to_dir[image_id] = im_file.split('/')[0]
                      break
        return image_index, id_to_dir

    def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if os.path.exists(cache_file):
            fid = gzip.open(cache_file,'rb')
            roidb = pickle.load(fid)
            fid.close()
            print('{} gt roidb loaded from {}'.format(self.name, cache_file))
            return roidb

        gt_roidb = [self._load_vg_annotation(index)
                    for index in self.image_index]
        fid = gzip.open(cache_file,'wb')
        pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
        fid.close()
        print('wrote gt roidb to {}'.format(cache_file))
        return gt_roidb

    def _get_size(self, index):
      return PIL.Image.open(self.image_path_from_index(index)).size

    def _annotation_path(self, index):
        return os.path.join(self._data_path, 'xml', str(index) + '.xml')

    def _load_vg_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        width, height = self._get_size(index)
        filename = self._annotation_path(index)
        tree = ET.parse(filename)
        objs = tree.findall('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        # Max of 16 attributes are observed in the data
        gt_attributes = np.zeros((num_objs, 16), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        obj_dict = {}
        ix = 0
        for obj in objs:
            obj_name = obj.find('name').text.lower().strip()
            if obj_name in self._class_to_ind:
                bbox = obj.find('bndbox')
                x1 = max(0,float(bbox.find('xmin').text))
                y1 = max(0,float(bbox.find('ymin').text))
                x2 = min(width-1,float(bbox.find('xmax').text))
                y2 = min(height-1,float(bbox.find('ymax').text))
                # If bboxes are not positive, just give whole image coords (there are a few examples)
                if x2 < x1 or y2 < y1:
                    print('Failed bbox in %s, object %s' % (filename, obj_name))
                    x1 = 0
                    y1 = 0
                    x2 = width-1
                    y2 = width-1
                cls = self._class_to_ind[obj_name]
                obj_dict[obj.find('object_id').text] = ix
                atts = obj.findall('attribute')
                n = 0
                for att in atts:
                    att = att.text.lower().strip()
                    if att in self._attribute_to_ind:
                        gt_attributes[ix, n] = self._attribute_to_ind[att]
                        n += 1
                    if n >= 16:
                        break
                boxes[ix, :] = [x1, y1, x2, y2]
                gt_classes[ix] = cls
                overlaps[ix, cls] = 1.0
                seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
                ix += 1
        # clip gt_classes and gt_relations
        gt_classes = gt_classes[:ix]
        gt_attributes = gt_attributes[:ix, :]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_attributes = scipy.sparse.csr_matrix(gt_attributes)

        rels = tree.findall('relation')
        num_rels = len(rels)
        gt_relations = set() # Avoid duplicates
        for rel in rels:
            pred = rel.find('predicate').text
            if pred: # One is empty
                pred = pred.lower().strip()
                if pred in self._relation_to_ind:
                    try:
                        triple = []
                        triple.append(obj_dict[rel.find('subject_id').text])
                        triple.append(self._relation_to_ind[pred])
                        triple.append(obj_dict[rel.find('object_id').text])
                        gt_relations.add(tuple(triple))
                    except:
                        pass # Object not in dictionary
        gt_relations = np.array(list(gt_relations), dtype=np.int32)

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_attributes' : gt_attributes,
                'gt_relations' : gt_relations,
                'gt_overlaps' : overlaps,
                'width' : width,
                'height': height,
                'flipped' : False,
                'seg_areas' : seg_areas}

    def evaluate_detections(self, all_boxes, output_dir):
        self._write_voc_results_file(self.classes, all_boxes, output_dir)
        self._do_python_eval(output_dir)
        if self.config['cleanup']:
            for cls in self._classes:
                if cls == '__background__':
                    continue
                filename = self._get_vg_results_file_template(output_dir).format(cls)
                os.remove(filename)

    def evaluate_attributes(self, all_boxes, output_dir):
        self._write_voc_results_file(self.attributes, all_boxes, output_dir)
        self._do_python_eval(output_dir, eval_attributes = True)
        if self.config['cleanup']:
            for cls in self._attributes:
                if cls == '__no_attribute__':
                    continue
                filename = self._get_vg_results_file_template(output_dir).format(cls)
                os.remove(filename)

    def _get_vg_results_file_template(self, output_dir):
        filename = 'detections_' + self._image_set + '_{:s}.txt'
        path = os.path.join(output_dir, filename)
        return path

    def _write_voc_results_file(self, classes, all_boxes, output_dir):
        for cls_ind, cls in enumerate(classes):
            if cls == '__background__':
                continue
            print('Writing "{}" vg results file'.format(cls))
            filename = self._get_vg_results_file_template(output_dir).format(cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(self.image_index):
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in xrange(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(str(index), dets[k, -1],
                                       dets[k, 0] + 1, dets[k, 1] + 1,
                                       dets[k, 2] + 1, dets[k, 3] + 1))


    def _do_python_eval(self, output_dir, pickle=True, eval_attributes = False):
        # We re-use parts of the pascal voc python code for visual genome
        aps = []
        nposs = []
        thresh = []
        # The PASCAL VOC metric changed in 2010
        use_07_metric = False
        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        # Load ground truth
        gt_roidb = self.gt_roidb()
        if eval_attributes:
            classes = self._attributes
        else:
            classes = self._classes
        for i, cls in enumerate(classes):
            if cls == '__background__' or cls == '__no_attribute__':
                continue
            filename = self._get_vg_results_file_template(output_dir).format(cls)
            rec, prec, ap, scores, npos = vg_eval(
                filename, gt_roidb, self.image_index, i, ovthresh=0.5,
                use_07_metric=use_07_metric, eval_attributes=eval_attributes)

            # Determine per class detection thresholds that maximise f score
            if npos > 1:
                f = np.nan_to_num((prec*rec)/(prec+rec))
                thresh += [scores[np.argmax(f)]]
            else:
                thresh += [0]
            aps += [ap]
            nposs += [float(npos)]
            print('AP for {} = {:.4f} (npos={:,})'.format(cls, ap, npos))
            if pickle:
                with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
                    pickle.dump({'rec': rec, 'prec': prec, 'ap': ap,
                        'scores': scores, 'npos':npos}, f)

        # Set thresh to mean for classes with poor results
        thresh = np.array(thresh)
        avg_thresh = np.mean(thresh[thresh!=0])
        thresh[thresh==0] = avg_thresh
        if eval_attributes:
            filename = 'attribute_thresholds_' + self._image_set + '.txt'
        else:
            filename = 'object_thresholds_' + self._image_set + '.txt'
        path = os.path.join(output_dir, filename)
        with open(path, 'wt') as f:
            for i, cls in enumerate(classes[1:]):
                f.write('{:s} {:.3f}\n'.format(cls, thresh[i]))

        weights = np.array(nposs)
        weights /= weights.sum()
        print('Mean AP = {:.4f}'.format(np.mean(aps)))
        print('Weighted Mean AP = {:.4f}'.format(np.average(aps, weights=weights)))
        print('Mean Detection Threshold = {:.3f}'.format(avg_thresh))
        print('~~~~~~~~')
        print('Results:')
        for ap,npos in zip(aps,nposs):
            print('{:.3f}\t{:.3f}'.format(ap,npos))
        print('{:.3f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('')
        print('--------------------------------------------------------------')
        print('Results computed with the **unofficial** PASCAL VOC Python eval code.')
        print('--------------------------------------------------------------')


if __name__ == '__main__':
    d = vg('val')
    res = d.roidb
    from IPython import embed; embed()


================================================
FILE: lib/datasets/vg_eval.py
================================================
from __future__ import absolute_import
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------

import xml.etree.ElementTree as ET
import os
import numpy as np
from .voc_eval import voc_ap

def vg_eval( detpath,
             gt_roidb,
             image_index,
             classindex,
             ovthresh=0.5,
             use_07_metric=False,
             eval_attributes=False):
    """rec, prec, ap, sorted_scores, npos = voc_eval(
                                detpath, 
                                gt_roidb,
                                image_index,
                                classindex,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the Visual Genome evaluation.

    detpath: Path to detections
    gt_roidb: List of ground truth structs.
    image_index: List of image ids.
    classindex: Category index
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for item,imagename in zip(gt_roidb,image_index):
        if eval_attributes:
            bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
        else:
            bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
        difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
        det = [False] * bbox.shape[0]
        npos = npos + sum(~difficult)        
        class_recs[str(imagename)] = {'bbox': bbox,
                                 'difficult': difficult,
                                 'det': det}
    if npos == 0:
        # No ground truth examples
        return 0,0,0,0,npos

    # read dets
    with open(detpath, 'r') as f:
        lines = f.readlines()
    if len(lines) == 0:
        # No detection examples
        return 0,0,0,0,npos

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = -np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)
    
    return rec, prec, ap, sorted_scores, npos


================================================
FILE: lib/datasets/voc_eval.py
================================================
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import xml.etree.ElementTree as ET
import os
import pickle
import numpy as np

def parse_rec(filename):
  """ Parse a PASCAL VOC xml file """
  tree = ET.parse(filename)
  objects = []
  for obj in tree.findall('object'):
    obj_struct = {}
    obj_struct['name'] = obj.find('name').text
    obj_struct['pose'] = obj.find('pose').text
    obj_struct['truncated'] = int(obj.find('truncated').text)
    obj_struct['difficult'] = int(obj.find('difficult').text)
    bbox = obj.find('bndbox')
    obj_struct['bbox'] = [int(bbox.find('xmin').text),
                          int(bbox.find('ymin').text),
                          int(bbox.find('xmax').text),
                          int(bbox.find('ymax').text)]
    objects.append(obj_struct)

  return objects


def voc_ap(rec, prec, use_07_metric=False):
  """ ap = voc_ap(rec, prec, [use_07_metric])
  Compute VOC AP given precision and recall.
  If use_07_metric is true, uses the
  VOC 07 11 point method (default:False).
  """
  if use_07_metric:
    # 11 point metric
    ap = 0.
    for t in np.arange(0., 1.1, 0.1):
      if np.sum(rec >= t) == 0:
        p = 0
      else:
        p = np.max(prec[rec >= t])
      ap = ap + p / 11.
  else:
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], rec, [1.]))
    mpre = np.concatenate(([0.], prec, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
      mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
  return ap


def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
  """rec, prec, ap = voc_eval(detpath,
                              annopath,
                              imagesetfile,
                              classname,
                              [ovthresh],
                              [use_07_metric])

  Top level function that does the PASCAL VOC evaluation.

  detpath: Path to detections
      detpath.format(classname) should produce the detection results file.
  annopath: Path to annotations
      annopath.format(imagename) should be the xml annotations file.
  imagesetfile: Text file containing the list of images, one image per line.
  classname: Category name (duh)
  cachedir: Directory for caching the annotations
  [ovthresh]: Overlap threshold (default = 0.5)
  [use_07_metric]: Whether to use VOC07's 11 point AP computation
      (default False)
  """
  # assumes detections are in detpath.format(classname)
  # assumes annotations are in annopath.format(imagename)
  # assumes imagesetfile is a text file with each line an image name
  # cachedir caches the annotations in a pickle file

  # first load gt
  if not os.path.isdir(cachedir):
    os.mkdir(cachedir)
  cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
  # read list of images
  with open(imagesetfile, 'r') as f:
    lines = f.readlines()
  imagenames = [x.strip() for x in lines]

  if not os.path.isfile(cachefile):
    # load annotations
    recs = {}
    for i, imagename in enumerate(imagenames):
      recs[imagename] = parse_rec(annopath.format(imagename))
      if i % 100 == 0:
        print('Reading annotation for {:d}/{:d}'.format(
          i + 1, len(imagenames)))
    # save
    print('Saving cached annotations to {:s}'.format(cachefile))
    with open(cachefile, 'wb') as f:
      pickle.dump(recs, f)
  else:
    # load
    with open(cachefile, 'rb') as f:
      try:
        recs = pickle.load(f)
      except:
        recs = pickle.load(f, encoding='bytes')

  # extract gt objects for this class
  class_recs = {}
  npos = 0
  for imagename in imagenames:
    R = [obj for obj in recs[imagename] if obj['name'] == classname]
    bbox = np.array([x['bbox'] for x in R])
    difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
    det = [False] * len(R)
    npos = npos + sum(~difficult)
    class_recs[imagename] = {'bbox': bbox,
                             'difficult': difficult,
                             'det': det}

  # read dets
  detfile = detpath.format(classname)
  with open(detfile, 'r') as f:
    lines = f.readlines()

  splitlines = [x.strip().split(' ') for x in lines]
  image_ids = [x[0] for x in splitlines]
  confidence = np.array([float(x[1]) for x in splitlines])
  BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

  nd = len(image_ids)
  tp = np.zeros(nd)
  fp = np.zeros(nd)

  if BB.shape[0] > 0:
    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    for d in range(nd):
      R = class_recs[image_ids[d]]
      bb = BB[d, :].astype(float)
      ovmax = -np.inf
      BBGT = R['bbox'].astype(float)

      if BBGT.size > 0:
        # compute overlaps
        # intersection
        ixmin = np.maximum(BBGT[:, 0], bb[0])
        iymin = np.maximum(BBGT[:, 1], bb[1])
        ixmax = np.minimum(BBGT[:, 2], bb[2])
        iymax = np.minimum(BBGT[:, 3], bb[3])
        iw = np.maximum(ixmax - ixmin + 1., 0.)
        ih = np.maximum(iymax - iymin + 1., 0.)
        inters = iw * ih

        # union
        uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
               (BBGT[:, 2] - BBGT[:, 0] + 1.) *
               (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

        overlaps = inters / uni
        ovmax = np.max(overlaps)
        jmax = np.argmax(overlaps)

      if ovmax > ovthresh:
        if not R['difficult'][jmax]:
          if not R['det'][jmax]:
            tp[d] = 1.
            R['det'][jmax] = 1
          else:
            fp[d] = 1.
      else:
        fp[d] = 1.

  # compute precision recall
  fp = np.cumsum(fp)
  tp = np.cumsum(tp)
  rec = tp / float(npos)
  # avoid divide by zero in case the first detection matches a difficult
  # ground truth
  prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
  ap = voc_ap(rec, prec, use_07_metric)

  return rec, prec, ap


================================================
FILE: lib/make.sh
================================================
#!/usr/bin/env bash

# CUDA_PATH=/usr/local/cuda/

export CUDA_PATH=/usr/local/cuda/
#You may also want to ad the following
#export C_INCLUDE_PATH=/opt/cuda/include

export CXXFLAGS="-std=c++11"
export CFLAGS="-std=c99"

python setup.py build_ext --inplace
rm -rf build

CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
           -gencode arch=compute_35,code=sm_35 \
           -gencode arch=compute_50,code=sm_50 \
           -gencode arch=compute_52,code=sm_52 \
           -gencode arch=compute_60,code=sm_60 \
           -gencode arch=compute_61,code=sm_61 "

# compile NMS
cd model/nms/src
echo "Compiling nms kernels by nvcc..."
nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH

cd ../
python build.py

# compile roi_pooling
cd ../../
cd model/roi_pooling/src
echo "Compiling roi pooling kernels by nvcc..."
nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
cd ../
python build.py

# compile roi_align
cd ../../
cd model/roi_align/src
echo "Compiling roi align kernels by nvcc..."
nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
cd ../
python build.py

# compile roi_crop
cd ../../
cd model/roi_crop/src
echo "Compiling roi crop kernels by nvcc..."
nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
cd ../
python build.py


================================================
FILE: lib/model/__init__.py
================================================


================================================
FILE: lib/model/faster_rcnn/__init__.py
================================================


================================================
FILE: lib/model/faster_rcnn/faster_rcnn.py
================================================
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.models as models
from torch.autograd import Variable
import numpy as np
from model.utils.config import cfg
from model.rpn.rpn import _RPN
from model.roi_pooling.modules.roi_pool import _RoIPooling
from model.roi_crop.modules.roi_crop import _RoICrop
from model.roi_align.modules.roi_align import RoIAlignAvg
from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
import time
import pdb
from model.utils.net_utils import _smooth_l1_loss, _crop_pool_layer, _affine_grid_gen, _affine_theta

class _fasterRCNN(nn.Module):
    """ faster RCNN """
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label

    def _init_weights(self):
        def normal_init(m, mean, stddev, truncated=False):
            """
            weight initalizer: truncated normal and random normal.
            """
            # x is a parameter
            if truncated:
                m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
            else:
                m.weight.data.normal_(mean, stddev)
                m.bias.data.zero_()

        normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
        normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
        normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
        normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
        normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)

    def create_architecture(self):
        self._init_modules()
        self._init_weights()


================================================
FILE: lib/model/faster_rcnn/resnet.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from model.utils.config import cfg
from model.faster_rcnn.faster_rcnn import _fasterRCNN

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import math
import torch.utils.model_zoo as model_zoo
import pdb

__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
       'resnet152']


model_urls = {
  'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth',
  'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth',
  'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth',
  'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth',
  'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth',
}

def conv3x3(in_planes, out_planes, stride=1):
  "3x3 convolution with padding"
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
           padding=1, bias=False)


class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(BasicBlock, self).__init__()
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(Bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
                 padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes * 4)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out


class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=1000):
    self.inplanes = 64
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                 bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
    # it is slightly better whereas slower to set stride = 1
    # self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
    self.avgpool = nn.AvgPool2d(7)
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
        nn.Conv2d(self.inplanes, planes * block.expansion,
              kernel_size=1, stride=stride, bias=False),
        nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x


def resnet18(pretrained=False):
  """Constructs a ResNet-18 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [2, 2, 2, 2])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
  return model


def resnet34(pretrained=False):
  """Constructs a ResNet-34 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(BasicBlock, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
  return model


def resnet50(pretrained=False):
  """Constructs a ResNet-50 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
  return model


def resnet101(pretrained=False):
  """Constructs a ResNet-101 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 23, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
  return model


def resnet152(pretrained=False):
  """Constructs a ResNet-152 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 8, 36, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
  return model

class resnet(_fasterRCNN):
  def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False):
    self.model_path = 'data/pretrained_model/resnet101_caffe.pth'
    self.dout_base_model = 1024
    self.pretrained = pretrained
    self.class_agnostic = class_agnostic
    self.num_layers = num_layers

    _fasterRCNN.__init__(self, classes, class_agnostic)

  def _init_modules(self):
    resnet = resnet101()

    if self.num_layers == 18:
        resnet = resnet18()
    if self.num_layers == 34:
        resnet = resnet34()     
    if self.num_layers == 50:
        resnet = resnet50()
    if self.num_layers == 152:
        resnet = resnet152()

    if self.pretrained == True:
      print("Loading pretrained weights from %s" %(self.model_path))
      state_dict = torch.load(self.model_path)
      resnet.load_state_dict({k:v for k,v in state_dict.items() if k in resnet.state_dict()})

    # Build resnet.
    self.RCNN_base = nn.Sequential(resnet.conv1, resnet.bn1,resnet.relu,
      resnet.maxpool,resnet.layer1,resnet.layer2,resnet.layer3)

    self.RCNN_top = nn.Sequential(resnet.layer4)

    self.RCNN_cls_score = nn.Linear(2048, self.n_classes)
    if self.class_agnostic:
      self.RCNN_bbox_pred = nn.Linear(2048, 4)
    else:
      self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

    # Fix blocks
    for p in self.RCNN_base[0].parameters(): p.requires_grad=False
    for p in self.RCNN_base[1].parameters(): p.requires_grad=False

    assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
    if cfg.RESNET.FIXED_BLOCKS >= 3:
      for p in self.RCNN_base[6].parameters(): p.requires_grad=False
    if cfg.RESNET.FIXED_BLOCKS >= 2:
      for p in self.RCNN_base[5].parameters(): p.requires_grad=False
    if cfg.RESNET.FIXED_BLOCKS >= 1:
      for p in self.RCNN_base[4].parameters(): p.requires_grad=False

    def set_bn_fix(m):
      classname = m.__class__.__name__
      if classname.find('BatchNorm') != -1:
        for p in m.parameters(): p.requires_grad=False

    self.RCNN_base.apply(set_bn_fix)
    self.RCNN_top.apply(set_bn_fix)

  def train(self, mode=True):
    # Override train so that the training mode is set as we want
    nn.Module.train(self, mode)
    if mode:
      # Set fixed blocks to be in eval mode
      self.RCNN_base.eval()
      self.RCNN_base[5].train()
      self.RCNN_base[6].train()

      def set_bn_eval(m):
        classname = m.__class__.__name__
        if classname.find('BatchNorm') != -1:
          m.eval()

      self.RCNN_base.apply(set_bn_eval)
      self.RCNN_top.apply(set_bn_eval)

  def _head_to_tail(self, pool5):
    fc7 = self.RCNN_top(pool5).mean(3).mean(2)
    return fc7


================================================
FILE: lib/model/faster_rcnn/vgg16.py
================================================
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import math
import torchvision.models as models
from model.faster_rcnn.faster_rcnn import _fasterRCNN
import pdb

class vgg16(_fasterRCNN):
  def __init__(self, classes, pretrained=False, class_agnostic=False):
    self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
    self.dout_base_model = 512
    self.pretrained = pretrained
    self.class_agnostic = class_agnostic

    _fasterRCNN.__init__(self, classes, class_agnostic)

  def _init_modules(self):
    vgg = models.vgg16()
    if self.pretrained:
        print("Loading pretrained weights from %s" %(self.model_path))
        state_dict = torch.load(self.model_path)
        vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})

    vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])

    # not using the last maxpool layer
    self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])

    # Fix the layers before conv3:
    for layer in range(10):
      for p in self.RCNN_base[layer].parameters(): p.requires_grad = False

    # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)

    self.RCNN_top = vgg.classifier

    # not using the last maxpool layer
    self.RCNN_cls_score = nn.Linear(4096, self.n_classes)

    if self.class_agnostic:
      self.RCNN_bbox_pred = nn.Linear(4096, 4)
    else:
      self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)      

  def _head_to_tail(self, pool5):
    
    pool5_flat = pool5.view(pool5.size(0), -1)
    fc7 = self.RCNN_top(pool5_flat)

    return fc7



================================================
FILE: lib/model/nms/.gitignore
================================================
*.c
*.cpp
*.so


================================================
FILE: lib/model/nms/__init__.py
================================================


================================================
FILE: lib/model/nms/_ext/__init__.py
================================================


================================================
FILE: lib/model/nms/_ext/nms/__init__.py
================================================

from torch.utils.ffi import _wrap_function
from ._nms import lib as _lib, ffi as _ffi

__all__ = []
def _import_symbols(locals):
    for symbol in dir(_lib):
        fn = getattr(_lib, symbol)
        if callable(fn):
            locals[symbol] = _wrap_function(fn, _ffi)
        else:
            locals[symbol] = fn
        __all__.append(symbol)

_import_symbols(locals())


================================================
FILE: lib/model/nms/build.py
================================================
from __future__ import print_function
import os
import torch
from torch.utils.ffi import create_extension

#this_file = os.path.dirname(__file__)

sources = []
headers = []
defines = []
with_cuda = False

if torch.cuda.is_available():
    print('Including CUDA code.')
    sources += ['src/nms_cuda.c']
    headers += ['src/nms_cuda.h']
    defines += [('WITH_CUDA', None)]
    with_cuda = True

this_file = os.path.dirname(os.path.realpath(__file__))
print(this_file)
extra_objects = ['src/nms_cuda_kernel.cu.o']
extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
print(extra_objects)

ffi = create_extension(
    '_ext.nms',
    headers=headers,
    sources=sources,
    define_macros=defines,
    relative_to=__file__,
    with_cuda=with_cuda,
    extra_objects=extra_objects
)

if __name__ == '__main__':
    ffi.build()


================================================
FILE: lib/model/nms/make.sh
================================================
#!/usr/bin/env bash

# CUDA_PATH=/usr/local/cuda/

cd src
echo "Compiling stnm kernels by nvcc..."
nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52

cd ../
python build.py


================================================
FILE: lib/model/nms/nms_cpu.py
================================================
from __future__ import absolute_import

import numpy as np
import torch

def nms_cpu(dets, thresh):
    dets = dets.numpy()
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order.item(0)
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return torch.IntTensor(keep)


================================================
FILE: lib/model/nms/nms_gpu.py
================================================
from __future__ import absolute_import
import torch
import numpy as np
from ._ext import nms
import pdb

def nms_gpu(dets, thresh):
	keep = dets.new(dets.size(0), 1).zero_().int()
	num_out = dets.new(1).zero_().int()
	nms.nms_cuda(keep, dets, num_out, thresh)
	keep = keep[:num_out[0]]
	return keep


================================================
FILE: lib/model/nms/nms_kernel.cu
================================================
// ------------------------------------------------------------------
// Faster R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Shaoqing Ren
// ------------------------------------------------------------------

#include "gpu_nms.hpp"
#include <vector>
#include <iostream>

#define CUDA_CHECK(condition) \
  /* Code block avoids redefinition of cudaError_t error */ \
  do { \
    cudaError_t error = condition; \
    if (error != cudaSuccess) { \
      std::cout << cudaGetErrorString(error) << std::endl; \
    } \
  } while (0)

#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
int const threadsPerBlock = sizeof(unsigned long long) * 8;

__device__ inline float devIoU(float const * const a, float const * const b) {
  float left = max(a[0], b[0]), right = min(a[2], b[2]);
  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
  return interS / (Sa + Sb - interS);
}

__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
                           const float *dev_boxes, unsigned long long *dev_mask) {
  const int row_start = blockIdx.y;
  const int col_start = blockIdx.x;

  // if (row_start > col_start) return;

  const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
  const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

  __shared__ float block_boxes[threadsPerBlock * 5];
  if (threadIdx.x < col_size) {
    block_boxes[threadIdx.x * 5 + 0] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
    block_boxes[threadIdx.x * 5 + 1] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
    block_boxes[threadIdx.x * 5 + 2] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
    block_boxes[threadIdx.x * 5 + 3] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
    block_boxes[threadIdx.x * 5 + 4] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
  }
  __syncthreads();

  if (threadIdx.x < row_size) {
    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
    const float *cur_box = dev_boxes + cur_box_idx * 5;
    int i = 0;
    unsigned long long t = 0;
    int start = 0;
    if (row_start == col_start) {
      start = threadIdx.x + 1;
    }
    for (i = start; i < col_size; i++) {
      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
        t |= 1ULL << i;
      }
    }
    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
    dev_mask[cur_box_idx * col_blocks + col_start] = t;
  }
}

void _set_device(int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
}

void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
          int boxes_dim, float nms_overlap_thresh, int device_id) {
  _set_device(device_id);

  float* boxes_dev = NULL;
  unsigned long long* mask_dev = NULL;

  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);

  CUDA_CHECK(cudaMalloc(&boxes_dev,
                        boxes_num * boxes_dim * sizeof(float)));
  CUDA_CHECK(cudaMemcpy(boxes_dev,
                        boxes_host,
                        boxes_num * boxes_dim * sizeof(float),
                        cudaMemcpyHostToDevice));

  CUDA_CHECK(cudaMalloc(&mask_dev,
                        boxes_num * col_blocks * sizeof(unsigned long long)));

  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
              DIVUP(boxes_num, threadsPerBlock));
  dim3 threads(threadsPerBlock);
  nms_kernel<<<blocks, threads>>>(boxes_num,
                                  nms_overlap_thresh,
                                  boxes_dev,
                                  mask_dev);

  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
  CUDA_CHECK(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));

  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

  int num_to_keep = 0;
  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;

    if (!(remv[nblock] & (1ULL << inblock))) {
      keep_out[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }
  *num_out = num_to_keep;

  CUDA_CHECK(cudaFree(boxes_dev));
  CUDA_CHECK(cudaFree(mask_dev));
}


================================================
FILE: lib/model/nms/nms_wrapper.py
================================================
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
import torch
from model.utils.config import cfg
if torch.cuda.is_available():
    from model.nms.nms_gpu import nms_gpu
from model.nms.nms_cpu import nms_cpu

def nms(dets, thresh, force_cpu=False):
    """Dispatch to either CPU or GPU NMS implementations."""
    if dets.shape[0] == 0:
        return []
    # ---numpy version---
    # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
    # ---pytorch version---

    return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)


================================================
FILE: lib/model/nms/src/nms_cuda.h
================================================
// int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
//             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);

int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
             THCudaIntTensor *num_out, float nms_overlap_thresh);


================================================
FILE: lib/model/nms/src/nms_cuda_kernel.cu
================================================
// ------------------------------------------------------------------
// Faster R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Shaoqing Ren
// ------------------------------------------------------------------

#include <stdbool.h>
#include <stdio.h>
#include <vector>
#include <iostream>
#include "nms_cuda_kernel.h"

#define CUDA_WARN(XXX) \
    do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
        cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< std::endl; cudaDeviceSynchronize(); } while (0)

#define CUDA_CHECK(condition) \
  /* Code block avoids redefinition of cudaError_t error */ \
  do { \
    cudaError_t error = condition; \
    if (error != cudaSuccess) { \
      std::cout << cudaGetErrorString(error) << std::endl; \
    } \
  } while (0)

#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
int const threadsPerBlock = sizeof(unsigned long long) * 8;

__device__ inline float devIoU(float const * const a, float const * const b) {
  float left = max(a[0], b[0]), right = min(a[2], b[2]);
  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
  return interS / (Sa + Sb - interS);
}

__global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
                           float *dev_boxes, unsigned long long *dev_mask) {
  const int row_start = blockIdx.y;
  const int col_start = blockIdx.x;

  // if (row_start > col_start) return;

  const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
  const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);

  __shared__ float block_boxes[threadsPerBlock * 5];
  if (threadIdx.x < col_size) {
    block_boxes[threadIdx.x * 5 + 0] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
    block_boxes[threadIdx.x * 5 + 1] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
    block_boxes[threadIdx.x * 5 + 2] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
    block_boxes[threadIdx.x * 5 + 3] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
    block_boxes[threadIdx.x * 5 + 4] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
  }
  __syncthreads();

  if (threadIdx.x < row_size) {
    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
    const float *cur_box = dev_boxes + cur_box_idx * 5;
    int i = 0;
    unsigned long long t = 0;
    int start = 0;
    if (row_start == col_start) {
      start = threadIdx.x + 1;
    }
    for (i = start; i < col_size; i++) {
      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
        t |= 1ULL << i;
      }
    }
    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
    dev_mask[cur_box_idx * col_blocks + col_start] = t;
  }
}

void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
          int boxes_dim, float nms_overlap_thresh) {

  float* boxes_dev = NULL;
  unsigned long long* mask_dev = NULL;

  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);

  CUDA_CHECK(cudaMalloc(&boxes_dev,
                        boxes_num * boxes_dim * sizeof(float)));
  CUDA_CHECK(cudaMemcpy(boxes_dev,
                        boxes_host,
                        boxes_num * boxes_dim * sizeof(float),
                        cudaMemcpyHostToDevice));

  CUDA_CHECK(cudaMalloc(&mask_dev,
                        boxes_num * col_blocks * sizeof(unsigned long long)));

  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
              DIVUP(boxes_num, threadsPerBlock));
  dim3 threads(threadsPerBlock);

  // printf("i am at line %d\n", boxes_num);
  // printf("i am at line %d\n", boxes_dim);  

  nms_kernel<<<blocks, threads>>>(boxes_num,
                                  nms_overlap_thresh,
                                  boxes_dev,
                                  mask_dev);

  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
  CUDA_CHECK(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));

  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

  // we need to create a memory for keep_out on cpu
  // otherwise, the following code cannot run

  int* keep_out_cpu = new int[boxes_num];

  int num_to_keep = 0;
  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;

    if (!(remv[nblock] & (1ULL << inblock))) {
      // orignal: keep_out[num_to_keep++] = i;
      keep_out_cpu[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }

  // copy keep_out_cpu to keep_out on gpu
  CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  

  // *num_out = num_to_keep;

  // original: *num_out = num_to_keep;
  // copy num_to_keep to num_out on gpu

  CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  

  // release cuda memory
  CUDA_CHECK(cudaFree(boxes_dev));
  CUDA_CHECK(cudaFree(mask_dev));
  // release cpu memory
  delete []keep_out_cpu;
}


================================================
FILE: lib/model/nms/src/nms_cuda_kernel.h
================================================
#ifdef __cplusplus
extern "C" {
#endif

void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
          int boxes_dim, float nms_overlap_thresh);

#ifdef __cplusplus
}
#endif


================================================
FILE: lib/model/roi_align/__init__.py
================================================


================================================
FILE: lib/model/roi_align/_ext/__init__.py
================================================


================================================
FILE: lib/model/roi_align/_ext/roi_align/__init__.py
================================================

from torch.utils.ffi import _wrap_function
from ._roi_align import lib as _lib, ffi as _ffi

__all__ = []
def _import_symbols(locals):
    for symbol in dir(_lib):
        fn = getattr(_lib, symbol)
        if callable(fn):
            locals[symbol] = _wrap_function(fn, _ffi)
        else:
            locals[symbol] = fn
        __all__.append(symbol)

_import_symbols(locals())


================================================
FILE: lib/model/roi_align/build.py
================================================
from __future__ import print_function
import os
import torch
from torch.utils.ffi import create_extension

sources = ['src/roi_align.c']
headers = ['src/roi_align.h']
extra_objects = []
#sources = []
#headers = []
defines = []
with_cuda = False

this_file = os.path.dirname(os.path.realpath(__file__))
print(this_file)

if torch.cuda.is_available():
    print('Including CUDA code.')
    sources += ['src/roi_align_cuda.c']
    headers += ['src/roi_align_cuda.h']
    defines += [('WITH_CUDA', None)]
    with_cuda = True
    
    extra_objects = ['src/roi_align_kernel.cu.o']
    extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]

ffi = create_extension(
    '_ext.roi_align',
    headers=headers,
    sources=sources,
    define_macros=defines,
    relative_to=__file__,
    with_cuda=with_cuda,
    extra_objects=extra_objects
)

if __name__ == '__main__':
    ffi.build()


================================================
FILE: lib/model/roi_align/functions/__init__.py
================================================


================================================
FILE: lib/model/roi_align/functions/roi_align.py
================================================
import torch
from torch.autograd import Function
from .._ext import roi_align


# TODO use save_for_backward instead
class RoIAlignFunction(Function):
    def __init__(self, aligned_height, aligned_width, spatial_scale):
        self.aligned_width = int(aligned_width)
        self.aligned_height = int(aligned_height)
        self.spatial_scale = float(spatial_scale)
        self.rois = None
        self.feature_size = None

    def forward(self, features, rois):
        self.rois = rois
        self.feature_size = features.size()

        batch_size, num_channels, data_height, data_width = features.size()
        num_rois = rois.size(0)

        output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
        if features.is_cuda:
            roi_align.roi_align_forward_cuda(self.aligned_height,
                                             self.aligned_width,
                                             self.spatial_scale, features,
                                             rois, output)
        else:
            roi_align.roi_align_forward(self.aligned_height,
                                        self.aligned_width,
                                        self.spatial_scale, features,
                                        rois, output)
#            raise NotImplementedError

        return output

    def backward(self, grad_output):
        assert(self.feature_size is not None and grad_output.is_cuda)

        batch_size, num_channels, data_height, data_width = self.feature_size

        grad_input = self.rois.new(batch_size, num_channels, data_height,
                                  data_width).zero_()
        roi_align.roi_align_backward_cuda(self.aligned_height,
                                          self.aligned_width,
                                          self.spatial_scale, grad_output,
                                          self.rois, grad_input)

        # print grad_input

        return grad_input, None


================================================
FILE: lib/model/roi_align/make.sh
================================================
#!/usr/bin/env bash

CUDA_PATH=/usr/local/cuda/

cd src
echo "Compiling my_lib kernels by nvcc..."
nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52

cd ../
python build.py


================================================
FILE: lib/model/roi_align/modules/__init__.py
================================================


================================================
FILE: lib/model/roi_align/modules/roi_align.py
================================================
from torch.nn.modules.module import Module
from torch.nn.functional import avg_pool2d, max_pool2d
from ..functions.roi_align import RoIAlignFunction


class RoIAlign(Module):
    def __init__(self, aligned_height, aligned_width, spatial_scale):
        super(RoIAlign, self).__init__()

        self.aligned_width = int(aligned_width)
        self.aligned_height = int(aligned_height)
        self.spatial_scale = float(spatial_scale)

    def forward(self, features, rois):
        return RoIAlignFunction(self.aligned_height, self.aligned_width,
                                self.spatial_scale)(features, rois)

class RoIAlignAvg(Module):
    def __init__(self, aligned_height, aligned_width, spatial_scale):
        super(RoIAlignAvg, self).__init__()

        self.aligned_width = int(aligned_width)
        self.aligned_height = int(aligned_height)
        self.spatial_scale = float(spatial_scale)

    def forward(self, features, rois):
        x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
                                self.spatial_scale)(features, rois)
        return avg_pool2d(x, kernel_size=2, stride=1)

class RoIAlignMax(Module):
    def __init__(self, aligned_height, aligned_width, spatial_scale):
        super(RoIAlignMax, self).__init__()

        self.aligned_width = int(aligned_width)
        self.aligned_height = int(aligned_height)
        self.spatial_scale = float(spatial_scale)

    def forward(self, features, rois):
        x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
                                self.spatial_scale)(features, rois)
        return max_pool2d(x, kernel_size=2, stride=1)


================================================
FILE: lib/model/roi_align/src/roi_align.c
================================================
#include <TH/TH.h>
#include <math.h>
#include <omp.h>


void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
                     const int height, const int width, const int channels,
                     const int aligned_height, const int aligned_width, const float * bottom_rois,
                     float* top_data);

void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
                     const int height, const int width, const int channels,
                     const int aligned_height, const int aligned_width, const float * bottom_rois,
                     float* top_data);

int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
                     THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
{
    //Grab the input tensor
    float * data_flat = THFloatTensor_data(features);
    float * rois_flat = THFloatTensor_data(rois);

    float * output_flat = THFloatTensor_data(output);

    // Number of ROIs
    int num_rois = THFloatTensor_size(rois, 0);
    int size_rois = THFloatTensor_size(rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // data height
    int data_height = THFloatTensor_size(features, 2);
    // data width
    int data_width = THFloatTensor_size(features, 3);
    // Number of channels
    int num_channels = THFloatTensor_size(features, 1);

    // do ROIAlignForward
    ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
            aligned_height, aligned_width, rois_flat, output_flat);

    return 1;
}

int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
{
    //Grab the input tensor
    float * top_grad_flat = THFloatTensor_data(top_grad);
    float * rois_flat = THFloatTensor_data(rois);

    float * bottom_grad_flat = THFloatTensor_data(bottom_grad);

    // Number of ROIs
    int num_rois = THFloatTensor_size(rois, 0);
    int size_rois = THFloatTensor_size(rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // batch size
    // int batch_size = THFloatTensor_size(bottom_grad, 0);
    // data height
    int data_height = THFloatTensor_size(bottom_grad, 2);
    // data width
    int data_width = THFloatTensor_size(bottom_grad, 3);
    // Number of channels
    int num_channels = THFloatTensor_size(bottom_grad, 1);

    // do ROIAlignBackward
    ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
            data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);

    return 1;
}

void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
                     const int height, const int width, const int channels,
                     const int aligned_height, const int aligned_width, const float * bottom_rois,
                     float* top_data)
{
    const int output_size = num_rois * aligned_height * aligned_width * channels;

    int idx = 0;
    for (idx = 0; idx < output_size; ++idx)
    {
        // (n, c, ph, pw) is an element in the aligned output
        int pw = idx % aligned_width;
        int ph = (idx / aligned_width) % aligned_height;
        int c = (idx / aligned_width / aligned_height) % channels;
        int n = idx / aligned_width / aligned_height / channels;

        float roi_batch_ind = bottom_rois[n * 5 + 0];
        float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
        float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
        float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
        float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;

        // Force malformed ROI to be 1x1
        float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
        float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
        float bin_size_h = roi_height / (aligned_height - 1.);
        float bin_size_w = roi_width / (aligned_width - 1.);

        float h = (float)(ph) * bin_size_h + roi_start_h;
        float w = (float)(pw) * bin_size_w + roi_start_w;

        int hstart = fminf(floor(h), height - 2);
        int wstart = fminf(floor(w), width - 2);

        int img_start = roi_batch_ind * channels * height * width;

        // bilinear interpolation
        if (h < 0 || h >= height || w < 0 || w >= width)
        {
            top_data[idx] = 0.;
        }
        else
        {
            float h_ratio = h - (float)(hstart);
            float w_ratio = w - (float)(wstart);
            int upleft = img_start + (c * height + hstart) * width + wstart;
            int upright = upleft + 1;
            int downleft = upleft + width;
            int downright = downleft + 1;

            top_data[idx] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
                + bottom_data[upright] * (1. - h_ratio) * w_ratio
                + bottom_data[downleft] * h_ratio * (1. - w_ratio)
                + bottom_data[downright] * h_ratio * w_ratio;
        }
    }
}

void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
                     const int height, const int width, const int channels,
                     const int aligned_height, const int aligned_width, const float * bottom_rois,
                     float* bottom_diff)
{
    const int output_size = num_rois * aligned_height * aligned_width * channels;

    int idx = 0;
    for (idx = 0; idx < output_size; ++idx)
    {
        // (n, c, ph, pw) is an element in the aligned output
        int pw = idx % aligned_width;
        int ph = (idx / aligned_width) % aligned_height;
        int c = (idx / aligned_width / aligned_height) % channels;
        int n = idx / aligned_width / aligned_height / channels;

        float roi_batch_ind = bottom_rois[n * 5 + 0];
        float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
        float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
        float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
        float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;

        // Force malformed ROI to be 1x1
        float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
        float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
        float bin_size_h = roi_height / (aligned_height - 1.);
        float bin_size_w = roi_width / (aligned_width - 1.);

        float h = (float)(ph) * bin_size_h + roi_start_h;
        float w = (float)(pw) * bin_size_w + roi_start_w;

        int hstart = fminf(floor(h), height - 2);
        int wstart = fminf(floor(w), width - 2);

        int img_start = roi_batch_ind * channels * height * width;

        // bilinear interpolation
        if (h < 0 || h >= height || w < 0 || w >= width)
        {
            float h_ratio = h - (float)(hstart);
            float w_ratio = w - (float)(wstart);
            int upleft = img_start + (c * height + hstart) * width + wstart;
            int upright = upleft + 1;
            int downleft = upleft + width;
            int downright = downleft + 1;

            bottom_diff[upleft] += top_diff[idx] * (1. - h_ratio) * (1. - w_ratio);
            bottom_diff[upright] += top_diff[idx] * (1. - h_ratio) *  w_ratio;
            bottom_diff[downleft] += top_diff[idx] * h_ratio * (1. - w_ratio);
            bottom_diff[downright] += top_diff[idx] * h_ratio * w_ratio;
        }
    }
}


================================================
FILE: lib/model/roi_align/src/roi_align.h
================================================
int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
                      THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);

int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
                      THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);


================================================
FILE: lib/model/roi_align/src/roi_align_cuda.c
================================================
#include <THC/THC.h>
#include <math.h>
#include "roi_align_kernel.h"

extern THCState *state;

int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
                        THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
{
    // Grab the input tensor
    float * data_flat = THCudaTensor_data(state, features);
    float * rois_flat = THCudaTensor_data(state, rois);

    float * output_flat = THCudaTensor_data(state, output);

    // Number of ROIs
    int num_rois = THCudaTensor_size(state, rois, 0);
    int size_rois = THCudaTensor_size(state, rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // data height
    int data_height = THCudaTensor_size(state, features, 2);
    // data width
    int data_width = THCudaTensor_size(state, features, 3);
    // Number of channels
    int num_channels = THCudaTensor_size(state, features, 1);

    cudaStream_t stream = THCState_getCurrentStream(state);

    ROIAlignForwardLaucher(
        data_flat, spatial_scale, num_rois, data_height,
        data_width, num_channels, aligned_height,
        aligned_width, rois_flat,
        output_flat, stream);

    return 1;
}

int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
                        THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
{
    // Grab the input tensor
    float * top_grad_flat = THCudaTensor_data(state, top_grad);
    float * rois_flat = THCudaTensor_data(state, rois);

    float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);

    // Number of ROIs
    int num_rois = THCudaTensor_size(state, rois, 0);
    int size_rois = THCudaTensor_size(state, rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // batch size
    int batch_size = THCudaTensor_size(state, bottom_grad, 0);
    // data height
    int data_height = THCudaTensor_size(state, bottom_grad, 2);
    // data width
    int data_width = THCudaTensor_size(state, bottom_grad, 3);
    // Number of channels
    int num_channels = THCudaTensor_size(state, bottom_grad, 1);

    cudaStream_t stream = THCState_getCurrentStream(state);
    ROIAlignBackwardLaucher(
        top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
        data_width, num_channels, aligned_height,
        aligned_width, rois_flat,
        bottom_grad_flat, stream);

    return 1;
}


================================================
FILE: lib/model/roi_align/src/roi_align_cuda.h
================================================
int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
                        THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);

int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
                        THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);


================================================
FILE: lib/model/roi_align/src/roi_align_kernel.cu
================================================
#ifdef __cplusplus
extern "C" {
#endif

#include <stdio.h>
#include <math.h>
#include <float.h>
#include "roi_align_kernel.h"

#define CUDA_1D_KERNEL_LOOP(i, n)                            \
    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
            i += blockDim.x * gridDim.x)


    __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
                                    const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
        CUDA_1D_KERNEL_LOOP(index, nthreads) {
            // (n, c, ph, pw) is an element in the aligned output
            // int n = index;
            // int pw = n % aligned_width;
            // n /= aligned_width;
            // int ph = n % aligned_height;
            // n /= aligned_height;
            // int c = n % channels;
            // n /= channels;

            int pw = index % aligned_width;
            int ph = (index / aligned_width) % aligned_height;
            int c  = (index / aligned_width / aligned_height) % channels;
            int n  = index / aligned_width / aligned_height / channels;

            // bottom_rois += n * 5;
            float roi_batch_ind = bottom_rois[n * 5 + 0];
            float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
            float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
            float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
            float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;

            // Force malformed ROIs to be 1x1
            float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
            float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
            float bin_size_h = roi_height / (aligned_height - 1.);
            float bin_size_w = roi_width / (aligned_width - 1.);

            float h = (float)(ph) * bin_size_h + roi_start_h;
            float w = (float)(pw) * bin_size_w + roi_start_w;

            int hstart = fminf(floor(h), height - 2);
            int wstart = fminf(floor(w), width - 2);

            int img_start = roi_batch_ind * channels * height * width;

            // bilinear interpolation
            if (h < 0 || h >= height || w < 0 || w >= width) {
                top_data[index] = 0.;
            } else {
                float h_ratio = h - (float)(hstart);
                float w_ratio = w - (float)(wstart);
                int upleft = img_start + (c * height + hstart) * width + wstart;
                int upright = upleft + 1;
                int downleft = upleft + width;
                int downright = downleft + 1;

                top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
                    + bottom_data[upright] * (1. - h_ratio) * w_ratio
                    + bottom_data[downleft] * h_ratio * (1. - w_ratio)
                    + bottom_data[downright] * h_ratio * w_ratio;
            }
        }
    }


    int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
                               const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
        const int kThreadsPerBlock = 1024;
        const int output_size = num_rois * aligned_height * aligned_width * channels;
        cudaError_t err;


        ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
          output_size, bottom_data, spatial_scale, height, width, channels,
          aligned_height, aligned_width, bottom_rois, top_data);

        err = cudaGetLastError();
        if(cudaSuccess != err) {
            fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
            exit( -1 );
        }

        return 1;
    }


    __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
                                     const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
        CUDA_1D_KERNEL_LOOP(index, nthreads) {

            // (n, c, ph, pw) is an element in the aligned output
            int pw = index % aligned_width;
            int ph = (index / aligned_width) % aligned_height;
            int c  = (index / aligned_width / aligned_height) % channels;
            int n  = index / aligned_width / aligned_height / channels;

            float roi_batch_ind = bottom_rois[n * 5 + 0];
            float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
            float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
            float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
            float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
            /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
            /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
            /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
            /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */

            // Force malformed ROIs to be 1x1
            float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
            float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
            float bin_size_h = roi_height / (aligned_height - 1.);
            float bin_size_w = roi_width / (aligned_width - 1.);

            float h = (float)(ph) * bin_size_h + roi_start_h;
            float w = (float)(pw) * bin_size_w + roi_start_w;

            int hstart = fminf(floor(h), height - 2);
            int wstart = fminf(floor(w), width - 2);

            int img_start = roi_batch_ind * channels * height * width;

            // bilinear interpolation
            if (!(h < 0 || h >= height || w < 0 || w >= width)) {
                float h_ratio = h - (float)(hstart);
                float w_ratio = w - (float)(wstart);
                int upleft = img_start + (c * height + hstart) * width + wstart;
                int upright = upleft + 1;
                int downleft = upleft + width;
                int downright = downleft + 1;

                atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
                atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
                atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
                atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
            }
        }
    }

    int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
        const int kThreadsPerBlock = 1024;
        const int output_size = num_rois * aligned_height * aligned_width * channels;
        cudaError_t err;

        ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
          output_size, top_diff, spatial_scale, height, width, channels,
          aligned_height, aligned_width, bottom_diff, bottom_rois);

        err = cudaGetLastError();
        if(cudaSuccess != err) {
            fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
            exit( -1 );
        }

        return 1;
    }


#ifdef __cplusplus
}
#endif


================================================
FILE: lib/model/roi_align/src/roi_align_kernel.h
================================================
#ifndef _ROI_ALIGN_KERNEL
#define _ROI_ALIGN_KERNEL

#ifdef __cplusplus
extern "C" {
#endif

__global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
    const float spatial_scale, const int height, const int width,
    const int channels, const int aligned_height, const int aligned_width,
    const float* bottom_rois, float* top_data);

int ROIAlignForwardLaucher(
    const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
    const int width, const int channels, const int aligned_height,
    const int aligned_width, const float* bottom_rois,
    float* top_data, cudaStream_t stream);

__global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
    const float spatial_scale, const int height, const int width,
    const int channels, const int aligned_height, const int aligned_width,
    float* bottom_diff, const float* bottom_rois);

int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
    const int height, const int width, const int channels, const int aligned_height,
    const int aligned_width, const float* bottom_rois,
    float* bottom_diff, cudaStream_t stream);

#ifdef __cplusplus
}
#endif

#endif



================================================
FILE: lib/model/roi_crop/__init__.py
================================================


================================================
FILE: lib/model/roi_crop/_ext/__init__.py
================================================


================================================
FILE: lib/model/roi_crop/_ext/crop_resize/__init__.py
================================================

from torch.utils.ffi import _wrap_function
from ._crop_resize import lib as _lib, ffi as _ffi

__all__ = []
def _import_symbols(locals):
    for symbol in dir(_lib):
        fn = getattr(_lib, symbol)
        locals[symbol] = _wrap_function(fn, _ffi)
        __all__.append(symbol)

_import_symbols(locals())


================================================
FILE: lib/model/roi_crop/_ext/roi_crop/__init__.py
================================================

from torch.utils.ffi import _wrap_function
from ._roi_crop import lib as _lib, ffi as _ffi

__all__ = []
def _import_symbols(locals):
    for symbol in dir(_lib):
        fn = getattr(_lib, symbol)
        if callable(fn):
            locals[symbol] = _wrap_function(fn, _ffi)
        else:
            locals[symbol] = fn
        __all__.append(symbol)

_import_symbols(locals())


================================================
FILE: lib/model/roi_crop/build.py
================================================
from __future__ import print_function
import os
import torch
from torch.utils.ffi import create_extension

#this_file = os.path.dirname(__file__)

sources = ['src/roi_crop.c']
headers = ['src/roi_crop.h']
defines = []
with_cuda = False

if torch.cuda.is_available():
    print('Including CUDA code.')
    sources += ['src/roi_crop_cuda.c']
    headers += ['src/roi_crop_cuda.h']
    defines += [('WITH_CUDA', None)]
    with_cuda = True

this_file = os.path.dirname(os.path.realpath(__file__))
print(this_file)
extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]

ffi = create_extension(
    '_ext.roi_crop',
    headers=headers,
    sources=sources,
    define_macros=defines,
    relative_to=__file__,
    with_cuda=with_cuda,
    extra_objects=extra_objects
)

if __name__ == '__main__':
    ffi.build()


================================================
FILE: lib/model/roi_crop/functions/__init__.py
================================================


================================================
FILE: lib/model/roi_crop/functions/crop_resize.py
================================================
# functions/add.py
import torch
from torch.autograd import Function
from .._ext import roi_crop
from cffi import FFI
ffi = FFI()

class RoICropFunction(Function):
    def forward(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        self.device_c = ffi.new("int *")
        output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
        #print('decice %d' % torch.cuda.current_device())
        if input1.is_cuda:
            self.device = torch.cuda.current_device()
        else:
            self.device = -1
        self.device_c[0] = self.device
        if not input1.is_cuda:
            roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
        else:
            output = output.cuda(self.device)
            roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
        return output

    def backward(self, grad_output):
        grad_input1 = torch.zeros(self.input1.size())
        grad_input2 = torch.zeros(self.input2.size())
        #print('backward decice %d' % self.device)
        if not grad_output.is_cuda:
            roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
        else:
            grad_input1 = grad_input1.cuda(self.device)
            grad_input2 = grad_input2.cuda(self.device)
            roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
        return grad_input1, grad_input2


================================================
FILE: lib/model/roi_crop/functions/gridgen.py
================================================
# functions/add.py
import torch
from torch.autograd import Function
import numpy as np


class AffineGridGenFunction(Function):
    def __init__(self, height, width,lr=1):
        super(AffineGridGenFunction, self).__init__()
        self.lr = lr
        self.height, self.width = height, width
        self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
        self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
        self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
        # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
        # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
        self.grid[:,:,2] = np.ones([self.height, width])
        self.grid = torch.from_numpy(self.grid.astype(np.float32))
        #print(self.grid)

    def forward(self, input1):
        self.input1 = input1
        output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
        self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
        for i in range(input1.size(0)):
            self.batchgrid[i] = self.grid.astype(self.batchgrid[i])

        # if input1.is_cuda:
        #    self.batchgrid = self.batchgrid.cuda()
        #    output = output.cuda()

        for i in range(input1.size(0)):
            output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)

        return output

    def backward(self, grad_output):

        grad_input1 = self.input1.new(self.input1.size()).zero_()

        # if grad_output.is_cuda:
        #    self.batchgrid = self.batchgrid.cuda()
        #    grad_input1 = grad_input1.cuda()

        grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
        return grad_input1


================================================
FILE: lib/model/roi_crop/functions/roi_crop.py
================================================
# functions/add.py
import torch
from torch.autograd import Function
from .._ext import roi_crop
import pdb

class RoICropFunction(Function):
    def forward(self, input1, input2):
        self.input1 = input1.clone()
        self.input2 = input2.clone()
        output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
        assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
        assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
        roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
        return output

    def backward(self, grad_output):
        grad_input1 = self.input1.new(self.input1.size()).zero_()
        grad_input2 = self.input2.new(self.input2.size()).zero_()
        roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
        return grad_input1, grad_input2


================================================
FILE: lib/model/roi_crop/make.sh
================================================
#!/usr/bin/env bash

CUDA_PATH=/usr/local/cuda/

cd src
echo "Compiling my_lib kernels by nvcc..."
nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52

cd ../
python build.py


================================================
FILE: lib/model/roi_crop/modules/__init__.py
================================================


================================================
FILE: lib/model/roi_crop/modules/gridgen.py
================================================
from torch.nn.modules.module import Module
import torch
from torch.autograd import Variable
import numpy as np
from ..functions.gridgen import AffineGridGenFunction

import pyximport
pyximport.install(setup_args={"include_dirs":np.get_include()},
                  reload_support=True)


class _AffineGridGen(Module):
    def __init__(self, height, width, lr = 1, aux_loss = False):
        super(_AffineGridGen, self).__init__()
        self.height, self.width = height, width
        self.aux_loss = aux_loss
        self.f = AffineGridGenFunction(self.height, self.width, lr=lr)
        self.lr = lr
    def forward(self, input):
        # if not self.aux_loss:
        return self.f(input)
        # else:
        #     identity = torch.from_numpy(np.array([[1,0,0], [0,1,0]], dtype=np.float32))
  
Download .txt
gitextract_3vzfjfkd/

├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs/
│   ├── res101.yml
│   ├── res101_ls.yml
│   ├── res50.yml
│   └── vgg16.yml
├── demo.py
├── lib/
│   ├── datasets/
│   │   ├── VOCdevkit-matlab-wrapper/
│   │   │   ├── get_voc_opts.m
│   │   │   ├── voc_eval.m
│   │   │   └── xVOCap.m
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── ds_utils.py
│   │   ├── factory.py
│   │   ├── imagenet.py
│   │   ├── imdb.py
│   │   ├── pascal_voc.py
│   │   ├── pascal_voc_rbg.py
│   │   ├── tools/
│   │   │   └── mcg_munge.py
│   │   ├── vg.py
│   │   ├── vg_eval.py
│   │   └── voc_eval.py
│   ├── make.sh
│   ├── model/
│   │   ├── __init__.py
│   │   ├── faster_rcnn/
│   │   │   ├── __init__.py
│   │   │   ├── faster_rcnn.py
│   │   │   ├── resnet.py
│   │   │   └── vgg16.py
│   │   ├── nms/
│   │   │   ├── .gitignore
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── nms/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── make.sh
│   │   │   ├── nms_cpu.py
│   │   │   ├── nms_gpu.py
│   │   │   ├── nms_kernel.cu
│   │   │   ├── nms_wrapper.py
│   │   │   └── src/
│   │   │       ├── nms_cuda.h
│   │   │       ├── nms_cuda_kernel.cu
│   │   │       └── nms_cuda_kernel.h
│   │   ├── roi_align/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align.py
│   │   │   ├── make.sh
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_align.py
│   │   │   └── src/
│   │   │       ├── roi_align.c
│   │   │       ├── roi_align.h
│   │   │       ├── roi_align_cuda.c
│   │   │       ├── roi_align_cuda.h
│   │   │       ├── roi_align_kernel.cu
│   │   │       └── roi_align_kernel.h
│   │   ├── roi_crop/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── crop_resize/
│   │   │   │   │   └── __init__.py
│   │   │   │   └── roi_crop/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── crop_resize.py
│   │   │   │   ├── gridgen.py
│   │   │   │   └── roi_crop.py
│   │   │   ├── make.sh
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gridgen.py
│   │   │   │   └── roi_crop.py
│   │   │   └── src/
│   │   │       ├── roi_crop.c
│   │   │       ├── roi_crop.h
│   │   │       ├── roi_crop_cuda.c
│   │   │       ├── roi_crop_cuda.h
│   │   │       ├── roi_crop_cuda_kernel.cu
│   │   │       └── roi_crop_cuda_kernel.h
│   │   ├── roi_pooling/
│   │   │   ├── __init__.py
│   │   │   ├── _ext/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pooling/
│   │   │   │       └── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functions/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pool.py
│   │   │   ├── modules/
│   │   │   │   ├── __init__.py
│   │   │   │   └── roi_pool.py
│   │   │   └── src/
│   │   │       ├── roi_pooling.c
│   │   │       ├── roi_pooling.h
│   │   │       ├── roi_pooling_cuda.c
│   │   │       ├── roi_pooling_cuda.h
│   │   │       ├── roi_pooling_kernel.cu
│   │   │       └── roi_pooling_kernel.h
│   │   ├── rpn/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_target_layer.py
│   │   │   ├── bbox_transform.py
│   │   │   ├── generate_anchors.py
│   │   │   ├── proposal_layer.py
│   │   │   ├── proposal_target_layer_cascade.py
│   │   │   └── rpn.py
│   │   └── utils/
│   │       ├── .gitignore
│   │       ├── __init__.py
│   │       ├── bbox.pyx
│   │       ├── blob.py
│   │       ├── config.py
│   │       ├── logger.py
│   │       └── net_utils.py
│   ├── pycocotools/
│   │   ├── UPSTREAM_REV
│   │   ├── __init__.py
│   │   ├── _mask.pyx
│   │   ├── coco.py
│   │   ├── cocoeval.py
│   │   ├── license.txt
│   │   ├── mask.py
│   │   ├── maskApi.c
│   │   └── maskApi.h
│   ├── roi_data_layer/
│   │   ├── __init__.py
│   │   ├── minibatch.py
│   │   ├── roibatchLoader.py
│   │   └── roidb.py
│   └── setup.py
├── requirements.txt
├── test_net.py
└── trainval_net.py
Download .txt
SYMBOL INDEX (355 symbols across 59 files)

FILE: _init_paths.py
  function add_path (line 4) | def add_path(path):

FILE: demo.py
  function parse_args (line 46) | def parse_args():
  function _get_image_blob (line 107) | def _get_image_blob(im):

FILE: lib/datasets/coco.py
  class coco (line 27) | class coco(imdb):
    method __init__ (line 28) | def __init__(self, image_set, year):
    method _get_ann_file (line 68) | def _get_ann_file(self):
    method _load_image_set_index (line 74) | def _load_image_set_index(self):
    method _get_widths (line 81) | def _get_widths(self):
    method image_path_at (line 86) | def image_path_at(self, i):
    method image_id_at (line 92) | def image_id_at(self, i):
    method image_path_from_index (line 98) | def image_path_from_index(self, index):
    method gt_roidb (line 112) | def gt_roidb(self):
    method _load_coco_annotation (line 132) | def _load_coco_annotation(self, index):
    method _get_widths (line 190) | def _get_widths(self):
    method append_flipped_images (line 193) | def append_flipped_images(self):
    method _get_box_file (line 214) | def _get_box_file(self, index):
    method _print_detection_eval_metrics (line 221) | def _print_detection_eval_metrics(self, coco_eval):
    method _do_detection_eval (line 254) | def _do_detection_eval(self, res_file, output_dir):
    method _coco_results_one_category (line 267) | def _coco_results_one_category(self, boxes, cat_id):
    method _write_coco_results_file (line 285) | def _write_coco_results_file(self, all_boxes, res_file):
    method evaluate_detections (line 303) | def evaluate_detections(self, all_boxes, output_dir):
    method competition_mode (line 319) | def competition_mode(self, on):

FILE: lib/datasets/ds_utils.py
  function unique_boxes (line 13) | def unique_boxes(boxes, scale=1.0):
  function xywh_to_xyxy (line 21) | def xywh_to_xyxy(boxes):
  function xyxy_to_xywh (line 26) | def xyxy_to_xywh(boxes):
  function validate_boxes (line 31) | def validate_boxes(boxes, width=0, height=0):
  function filter_small_boxes (line 45) | def filter_small_boxes(boxes, min_size):

FILE: lib/datasets/factory.py
  function get_imdb (line 62) | def get_imdb(name):
  function list_imdbs (line 69) | def list_imdbs():

FILE: lib/datasets/imagenet.py
  class imagenet (line 26) | class imagenet(imdb):
    method __init__ (line 27) | def __init__(self, image_set, devkit_path, data_path):
    method image_path_at (line 75) | def image_path_at(self, i):
    method image_path_from_index (line 81) | def image_path_from_index(self, index):
    method _load_image_set_index (line 89) | def _load_image_set_index(self):
    method gt_roidb (line 151) | def gt_roidb(self):
    method _load_imagenet_annotation (line 172) | def _load_imagenet_annotation(self, index):

FILE: lib/datasets/imdb.py
  class imdb (line 22) | class imdb(object):
    method __init__ (line 25) | def __init__(self, name, classes=None):
    method name (line 40) | def name(self):
    method num_classes (line 44) | def num_classes(self):
    method classes (line 48) | def classes(self):
    method image_index (line 52) | def image_index(self):
    method roidb_handler (line 56) | def roidb_handler(self):
    method roidb_handler (line 60) | def roidb_handler(self, val):
    method set_proposal_method (line 63) | def set_proposal_method(self, method):
    method roidb (line 68) | def roidb(self):
    method cache_path (line 80) | def cache_path(self):
    method num_images (line 87) | def num_images(self):
    method image_path_at (line 90) | def image_path_at(self, i):
    method image_id_at (line 93) | def image_id_at(self, i):
    method default_roidb (line 96) | def default_roidb(self):
    method evaluate_detections (line 99) | def evaluate_detections(self, all_boxes, output_dir=None):
    method _get_widths (line 110) | def _get_widths(self):
    method append_flipped_images (line 114) | def append_flipped_images(self):
    method evaluate_recall (line 131) | def evaluate_recall(self, candidate_boxes=None, thresholds=None,
    method create_roidb_from_box_list (line 221) | def create_roidb_from_box_list(self, box_list, gt_roidb):
    method merge_roidbs (line 251) | def merge_roidbs(a, b):
    method competition_mode (line 263) | def competition_mode(self, on):

FILE: lib/datasets/pascal_voc.py
  class pascal_voc (line 40) | class pascal_voc(imdb):
    method __init__ (line 41) | def __init__(self, image_set, year, devkit_path=None):
    method image_path_at (line 76) | def image_path_at(self, i):
    method image_id_at (line 82) | def image_id_at(self, i):
    method image_path_from_index (line 88) | def image_path_from_index(self, index):
    method _load_image_set_index (line 98) | def _load_image_set_index(self):
    method _get_default_path (line 112) | def _get_default_path(self):
    method gt_roidb (line 118) | def gt_roidb(self):
    method selective_search_roidb (line 139) | def selective_search_roidb(self):
    method rpn_roidb (line 167) | def rpn_roidb(self):
    method _load_rpn_roidb (line 177) | def _load_rpn_roidb(self, gt_roidb):
    method _load_selective_search_roidb (line 186) | def _load_selective_search_roidb(self, gt_roidb):
    method _load_pascal_annotation (line 205) | def _load_pascal_annotation(self, index):
    method _get_comp_id (line 258) | def _get_comp_id(self):
    method _get_voc_results_file_template (line 263) | def _get_voc_results_file_template(self):
    method _write_voc_results_file (line 272) | def _write_voc_results_file(self, all_boxes):
    method _do_python_eval (line 290) | def _do_python_eval(self, output_dir='output'):
    method _do_matlab_eval (line 335) | def _do_matlab_eval(self, output_dir='output'):
    method evaluate_detections (line 350) | def evaluate_detections(self, all_boxes, output_dir):
    method competition_mode (line 362) | def competition_mode(self, on):

FILE: lib/datasets/pascal_voc_rbg.py
  class pascal_voc (line 27) | class pascal_voc(imdb):
    method __init__ (line 28) | def __init__(self, image_set, year, devkit_path=None):
    method image_path_at (line 63) | def image_path_at(self, i):
    method image_path_from_index (line 69) | def image_path_from_index(self, index):
    method _load_image_set_index (line 79) | def _load_image_set_index(self):
    method _get_default_path (line 94) | def _get_default_path(self):
    method gt_roidb (line 100) | def gt_roidb(self):
    method rpn_roidb (line 124) | def rpn_roidb(self):
    method _load_rpn_roidb (line 134) | def _load_rpn_roidb(self, gt_roidb):
    method _load_pascal_annotation (line 143) | def _load_pascal_annotation(self, index):
    method _get_comp_id (line 189) | def _get_comp_id(self):
    method _get_voc_results_file_template (line 194) | def _get_voc_results_file_template(self):
    method _write_voc_results_file (line 205) | def _write_voc_results_file(self, all_boxes):
    method _do_python_eval (line 223) | def _do_python_eval(self, output_dir='output'):
    method _do_matlab_eval (line 268) | def _do_matlab_eval(self, output_dir='output'):
    method evaluate_detections (line 283) | def evaluate_detections(self, all_boxes, output_dir):
    method competition_mode (line 296) | def competition_mode(self, on):

FILE: lib/datasets/tools/mcg_munge.py
  function munge (line 16) | def munge(src_dir):

FILE: lib/datasets/vg.py
  class vg (line 29) | class vg(imdb):
    method __init__ (line 30) | def __init__(self, version, image_set, ):
    method image_path_at (line 102) | def image_path_at(self, i):
    method image_id_at (line 108) | def image_id_at(self, i):
    method image_path_from_index (line 115) | def image_path_from_index(self, index):
    method _image_split_path (line 126) | def _image_split_path(self):
    method _load_image_set_index (line 138) | def _load_image_set_index(self):
    method gt_roidb (line 176) | def gt_roidb(self):
    method _get_size (line 198) | def _get_size(self, index):
    method _annotation_path (line 201) | def _annotation_path(self, index):
    method _load_vg_annotation (line 204) | def _load_vg_annotation(self, index):
    method evaluate_detections (line 292) | def evaluate_detections(self, all_boxes, output_dir):
    method evaluate_attributes (line 302) | def evaluate_attributes(self, all_boxes, output_dir):
    method _get_vg_results_file_template (line 312) | def _get_vg_results_file_template(self, output_dir):
    method _write_voc_results_file (line 317) | def _write_voc_results_file(self, classes, all_boxes, output_dir):
    method _do_python_eval (line 336) | def _do_python_eval(self, output_dir, pickle=True, eval_attributes = F...

FILE: lib/datasets/vg_eval.py
  function vg_eval (line 13) | def vg_eval( detpath,

FILE: lib/datasets/voc_eval.py
  function parse_rec (line 15) | def parse_rec(filename):
  function voc_ap (line 35) | def voc_ap(rec, prec, use_07_metric=False):
  function voc_eval (line 69) | def voc_eval(detpath,

FILE: lib/model/faster_rcnn/faster_rcnn.py
  class _fasterRCNN (line 19) | class _fasterRCNN(nn.Module):
    method __init__ (line 21) | def __init__(self, classes, class_agnostic):
    method forward (line 39) | def forward(self, im_data, im_info, gt_boxes, num_boxes):
    method _init_weights (line 116) | def _init_weights(self):
    method create_architecture (line 134) | def create_architecture(self):

FILE: lib/model/faster_rcnn/resnet.py
  function conv3x3 (line 28) | def conv3x3(in_planes, out_planes, stride=1):
  class BasicBlock (line 34) | class BasicBlock(nn.Module):
    method __init__ (line 37) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 47) | def forward(self, x):
  class Bottleneck (line 66) | class Bottleneck(nn.Module):
    method __init__ (line 69) | def __init__(self, inplanes, planes, stride=1, downsample=None):
    method forward (line 82) | def forward(self, x):
  class ResNet (line 105) | class ResNet(nn.Module):
    method __init__ (line 106) | def __init__(self, block, layers, num_classes=1000):
    method _make_layer (line 131) | def _make_layer(self, block, planes, blocks, stride=1):
    method forward (line 148) | def forward(self, x):
  function resnet18 (line 166) | def resnet18(pretrained=False):
  function resnet34 (line 177) | def resnet34(pretrained=False):
  function resnet50 (line 188) | def resnet50(pretrained=False):
  function resnet101 (line 199) | def resnet101(pretrained=False):
  function resnet152 (line 210) | def resnet152(pretrained=False):
  class resnet (line 220) | class resnet(_fasterRCNN):
    method __init__ (line 221) | def __init__(self, classes, num_layers=101, pretrained=False, class_ag...
    method _init_modules (line 230) | def _init_modules(self):
    method train (line 279) | def train(self, mode=True):
    method _head_to_tail (line 296) | def _head_to_tail(self, pool5):

FILE: lib/model/faster_rcnn/vgg16.py
  class vgg16 (line 19) | class vgg16(_fasterRCNN):
    method __init__ (line 20) | def __init__(self, classes, pretrained=False, class_agnostic=False):
    method _init_modules (line 28) | def _init_modules(self):
    method _head_to_tail (line 56) | def _head_to_tail(self, pool5):

FILE: lib/model/nms/_ext/nms/__init__.py
  function _import_symbols (line 6) | def _import_symbols(locals):

FILE: lib/model/nms/nms_cpu.py
  function nms_cpu (line 6) | def nms_cpu(dets, thresh):

FILE: lib/model/nms/nms_gpu.py
  function nms_gpu (line 7) | def nms_gpu(dets, thresh):

FILE: lib/model/nms/nms_wrapper.py
  function nms (line 13) | def nms(dets, thresh, force_cpu=False):

FILE: lib/model/roi_align/_ext/roi_align/__init__.py
  function _import_symbols (line 6) | def _import_symbols(locals):

FILE: lib/model/roi_align/functions/roi_align.py
  class RoIAlignFunction (line 7) | class RoIAlignFunction(Function):
    method __init__ (line 8) | def __init__(self, aligned_height, aligned_width, spatial_scale):
    method forward (line 15) | def forward(self, features, rois):
    method backward (line 37) | def backward(self, grad_output):

FILE: lib/model/roi_align/modules/roi_align.py
  class RoIAlign (line 6) | class RoIAlign(Module):
    method __init__ (line 7) | def __init__(self, aligned_height, aligned_width, spatial_scale):
    method forward (line 14) | def forward(self, features, rois):
  class RoIAlignAvg (line 18) | class RoIAlignAvg(Module):
    method __init__ (line 19) | def __init__(self, aligned_height, aligned_width, spatial_scale):
    method forward (line 26) | def forward(self, features, rois):
  class RoIAlignMax (line 31) | class RoIAlignMax(Module):
    method __init__ (line 32) | def __init__(self, aligned_height, aligned_width, spatial_scale):
    method forward (line 39) | def forward(self, features, rois):

FILE: lib/model/roi_align/src/roi_align.c
  function roi_align_forward (line 16) | int roi_align_forward(int aligned_height, int aligned_width, float spati...
  function roi_align_backward (line 47) | int roi_align_backward(int aligned_height, int aligned_width, float spat...
  function ROIAlignForwardCpu (line 80) | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_sc...
  function ROIAlignBackwardCpu (line 138) | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scal...

FILE: lib/model/roi_align/src/roi_align_cuda.c
  function roi_align_forward_cuda (line 7) | int roi_align_forward_cuda(int aligned_height, int aligned_width, float ...
  function roi_align_backward_cuda (line 42) | int roi_align_backward_cuda(int aligned_height, int aligned_width, float...

FILE: lib/model/roi_crop/_ext/crop_resize/__init__.py
  function _import_symbols (line 6) | def _import_symbols(locals):

FILE: lib/model/roi_crop/_ext/roi_crop/__init__.py
  function _import_symbols (line 6) | def _import_symbols(locals):

FILE: lib/model/roi_crop/functions/crop_resize.py
  class RoICropFunction (line 8) | class RoICropFunction(Function):
    method forward (line 9) | def forward(self, input1, input2):
    method backward (line 27) | def backward(self, grad_output):

FILE: lib/model/roi_crop/functions/gridgen.py
  class AffineGridGenFunction (line 7) | class AffineGridGenFunction(Function):
    method __init__ (line 8) | def __init__(self, height, width,lr=1):
    method forward (line 21) | def forward(self, input1):
    method backward (line 37) | def backward(self, grad_output):

FILE: lib/model/roi_crop/functions/roi_crop.py
  class RoICropFunction (line 7) | class RoICropFunction(Function):
    method forward (line 8) | def forward(self, input1, input2):
    method backward (line 17) | def backward(self, grad_output):

FILE: lib/model/roi_crop/modules/gridgen.py
  class _AffineGridGen (line 12) | class _AffineGridGen(Module):
    method __init__ (line 13) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 19) | def forward(self, input):
  class CylinderGridGen (line 34) | class CylinderGridGen(Module):
    method __init__ (line 35) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 41) | def forward(self, input):
  class AffineGridGenV2 (line 49) | class AffineGridGenV2(Module):
    method __init__ (line 50) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 63) | def forward(self, input1):
  class CylinderGridGenV2 (line 78) | class CylinderGridGenV2(Module):
    method __init__ (line 79) | def __init__(self, height, width, lr = 1):
    method forward (line 88) | def forward(self, input):
  class DenseAffineGridGen (line 108) | class DenseAffineGridGen(Module):
    method __init__ (line 109) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 122) | def forward(self, input1):
  class DenseAffine3DGridGen (line 140) | class DenseAffine3DGridGen(Module):
    method __init__ (line 141) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 168) | def forward(self, input1):
  class DenseAffine3DGridGen_rotate (line 198) | class DenseAffine3DGridGen_rotate(Module):
    method __init__ (line 199) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 226) | def forward(self, input1, input2):
  class Depth3DGridGen (line 265) | class Depth3DGridGen(Module):
    method __init__ (line 266) | def __init__(self, height, width, lr = 1, aux_loss = False):
    method forward (line 293) | def forward(self, depth, trans0, trans1, rotate):
  class Depth3DGridGen_with_mask (line 338) | class Depth3DGridGen_with_mask(Module):
    method __init__ (line 339) | def __init__(self, height, width, lr = 1, aux_loss = False, ray_tracin...
    method forward (line 367) | def forward(self, depth, trans0, trans1, rotate):

FILE: lib/model/roi_crop/modules/roi_crop.py
  class _RoICrop (line 4) | class _RoICrop(Module):
    method __init__ (line 5) | def __init__(self, layout = 'BHWD'):
    method forward (line 7) | def forward(self, input1, input2):

FILE: lib/model/roi_crop/src/roi_crop.c
  function BilinearSamplerBHWD_updateOutput (line 7) | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloat...
  function BilinearSamplerBHWD_updateGradInput (line 104) | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFl...
  function BilinearSamplerBCHW_updateOutput (line 239) | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloat...
  function BilinearSamplerBCHW_updateGradInput (line 342) | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFl...

FILE: lib/model/roi_crop/src/roi_crop_cuda.c
  function BilinearSamplerBHWD_updateOutput_cuda (line 15) | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THC...
  function BilinearSamplerBHWD_updateGradInput_cuda (line 54) | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, ...

FILE: lib/model/roi_pooling/_ext/roi_pooling/__init__.py
  function _import_symbols (line 6) | def _import_symbols(locals):

FILE: lib/model/roi_pooling/functions/roi_pool.py
  class RoIPoolFunction (line 6) | class RoIPoolFunction(Function):
    method __init__ (line 7) | def __init__(ctx, pooled_height, pooled_width, spatial_scale):
    method forward (line 13) | def forward(ctx, features, rois):
    method backward (line 30) | def backward(ctx, grad_output):

FILE: lib/model/roi_pooling/modules/roi_pool.py
  class _RoIPooling (line 5) | class _RoIPooling(Module):
    method __init__ (line 6) | def __init__(self, pooled_height, pooled_width, spatial_scale):
    method forward (line 13) | def forward(self, features, rois):

FILE: lib/model/roi_pooling/src/roi_pooling.c
  function roi_pooling_forward (line 4) | int roi_pooling_forward(int pooled_height, int pooled_width, float spati...

FILE: lib/model/roi_pooling/src/roi_pooling_cuda.c
  function roi_pooling_forward_cuda (line 7) | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float ...
  function roi_pooling_backward_cuda (line 49) | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float...

FILE: lib/model/rpn/anchor_target_layer.py
  class _AnchorTargetLayer (line 31) | class _AnchorTargetLayer(nn.Module):
    method __init__ (line 36) | def __init__(self, feat_stride, scales, ratios):
    method forward (line 48) | def forward(self, input):
    method backward (line 195) | def backward(self, top, propagate_down, bottom):
    method reshape (line 199) | def reshape(self, bottom, top):
  function _unmap (line 203) | def _unmap(data, count, inds, batch_size, fill=0):
  function _compute_targets_batch (line 216) | def _compute_targets_batch(ex_rois, gt_rois):

FILE: lib/model/rpn/bbox_transform.py
  function bbox_transform (line 15) | def bbox_transform(ex_rois, gt_rois):
  function bbox_transform_batch (line 36) | def bbox_transform_batch(ex_rois, gt_rois):
  function bbox_transform_inv (line 77) | def bbox_transform_inv(boxes, deltas, batch_size):
  function clip_boxes_batch (line 105) | def clip_boxes_batch(boxes, im_shape, batch_size):
  function clip_boxes (line 125) | def clip_boxes(boxes, im_shape, batch_size):
  function bbox_overlaps (line 136) | def bbox_overlaps(anchors, gt_boxes):
  function bbox_overlaps_batch (line 168) | def bbox_overlaps_batch(anchors, gt_boxes):

FILE: lib/model/rpn/generate_anchors.py
  function generate_anchors (line 45) | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
  function _whctrs (line 58) | def _whctrs(anchor):
  function _mkanchors (line 69) | def _mkanchors(ws, hs, x_ctr, y_ctr):
  function _ratio_enum (line 83) | def _ratio_enum(anchor, ratios):
  function _scale_enum (line 96) | def _scale_enum(anchor, scales):

FILE: lib/model/rpn/proposal_layer.py
  class _ProposalLayer (line 26) | class _ProposalLayer(nn.Module):
    method __init__ (line 32) | def __init__(self, feat_stride, scales, ratios):
    method forward (line 49) | def forward(self, input):
    method backward (line 163) | def backward(self, top, propagate_down, bottom):
    method reshape (line 167) | def reshape(self, bottom, top):
    method _filter_boxes (line 171) | def _filter_boxes(self, boxes, min_size):

FILE: lib/model/rpn/proposal_target_layer_cascade.py
  class _ProposalTargetLayer (line 20) | class _ProposalTargetLayer(nn.Module):
    method __init__ (line 26) | def __init__(self, nclasses):
    method forward (line 33) | def forward(self, all_rois, gt_boxes, num_boxes):
    method backward (line 58) | def backward(self, top, propagate_down, bottom):
    method reshape (line 62) | def reshape(self, bottom, top):
    method _get_bbox_regression_labels_pytorch (line 66) | def _get_bbox_regression_labels_pytorch(self, bbox_target_data, labels...
    method _compute_targets_pytorch (line 96) | def _compute_targets_pytorch(self, ex_rois, gt_rois):
    method _sample_rois_pytorch (line 116) | def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, ...

FILE: lib/model/rpn/rpn.py
  class _RPN (line 17) | class _RPN(nn.Module):
    method __init__ (line 19) | def __init__(self, din):
    method reshape (line 48) | def reshape(x, d):
    method forward (line 58) | def forward(self, base_feat, im_info, gt_boxes, num_boxes):

FILE: lib/model/utils/blob.py
  function im_list_to_blob (line 20) | def im_list_to_blob(ims):
  function prep_im_for_blob (line 35) | def prep_im_for_blob(im, pixel_means, target_size, max_size):

FILE: lib/model/utils/config.py
  function get_output_dir (line 305) | def get_output_dir(imdb, weights_filename):
  function get_output_tb_dir (line 321) | def get_output_tb_dir(imdb, weights_filename):
  function _merge_a_into_b (line 337) | def _merge_a_into_b(a, b):
  function cfg_from_file (line 370) | def cfg_from_file(filename):
  function cfg_from_list (line 379) | def cfg_from_list(cfg_list):

FILE: lib/model/utils/logger.py
  class Logger (line 11) | class Logger(object):
    method __init__ (line 13) | def __init__(self, log_dir):
    method scalar_summary (line 17) | def scalar_summary(self, tag, value, step):
    method image_summary (line 22) | def image_summary(self, tag, images, step):
    method histo_summary (line 45) | def histo_summary(self, tag, values, step, bins=1000):

FILE: lib/model/utils/net_utils.py
  function save_net (line 13) | def save_net(fname, net):
  function load_net (line 19) | def load_net(fname, net):
  function weights_normal_init (line 26) | def weights_normal_init(model, dev=0.01):
  function clip_gradient (line 38) | def clip_gradient(model, clip_norm):
  function vis_detections (line 51) | def vis_detections(im, class_name, dets, thresh=0.8):
  function adjust_learning_rate (line 63) | def adjust_learning_rate(optimizer, decay=0.1):
  function save_checkpoint (line 69) | def save_checkpoint(state, filename):
  function _smooth_l1_loss (line 72) | def _smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_o...
  function _crop_pool_layer (line 88) | def _crop_pool_layer(bottom, rois, max_pool=True):
  function _affine_grid_gen (line 142) | def _affine_grid_gen(rois, input_size, grid_size):
  function _affine_theta (line 166) | def _affine_theta(rois, input_size):
  function compare_grid_sample (line 197) | def compare_grid_sample():

FILE: lib/pycocotools/coco.py
  class COCO (line 67) | class COCO:
    method __init__ (line 68) | def __init__(self, annotation_file=None):
    method createIndex (line 90) | def createIndex(self):
    method info (line 128) | def info(self):
    method getAnnIds (line 136) | def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
    method getCatIds (line 165) | def getCatIds(self, catNms=[], supNms=[], catIds=[]):
    method getImgIds (line 187) | def getImgIds(self, imgIds=[], catIds=[]):
    method loadAnns (line 208) | def loadAnns(self, ids=[]):
    method loadCats (line 219) | def loadCats(self, ids=[]):
    method loadImgs (line 230) | def loadImgs(self, ids=[]):
    method showAnns (line 241) | def showAnns(self, anns):
    method loadRes (line 287) | def loadRes(self, resFile):
    method download (line 335) | def download( self, tarDir = None, imgIds = [] ):

FILE: lib/pycocotools/cocoeval.py
  class COCOeval (line 18) | class COCOeval:
    method __init__ (line 67) | def __init__(self, cocoGt=None, cocoDt=None):
    method _prepare (line 90) | def _prepare(self):
    method evaluate (line 137) | def evaluate(self):
    method computeIoU (line 171) | def computeIoU(self, imgId, catId):
    method evaluateImg (line 197) | def evaluateImg(self, imgId, catId, aRng, maxDet):
    method accumulate (line 282) | def accumulate(self, p = None):
    method summarize (line 384) | def summarize(self):
    method __str__ (line 436) | def __str__(self):
  class Params (line 439) | class Params:
    method __init__ (line 443) | def __init__(self):

FILE: lib/pycocotools/maskApi.c
  function uint (line 11) | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
  function uint (line 12) | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
  function rleInit (line 14) | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
  function rleFree (line 19) | void rleFree( RLE *R ) {
  function rlesInit (line 23) | void rlesInit( RLE **R, siz n ) {
  function rlesFree (line 28) | void rlesFree( RLE **R, siz n ) {
  function rleEncode (line 32) | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
  function rleDecode (line 43) | void rleDecode( const RLE *R, byte *M, siz n ) {
  function rleMerge (line 49) | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ) {
  function rleArea (line 72) | void rleArea( const RLE *R, siz n, uint *a ) {
  function rleIou (line 77) | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
  function bbIou (line 98) | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
  function rleToBbox (line 111) | void rleToBbox( const RLE *R, BB bb, siz n ) {
  function rleFrBbox (line 126) | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
  function uintCompare (line 135) | int uintCompare(const void *a, const void *b) {
  function rleFrPoly (line 139) | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
  function rleFrString (line 195) | void rleFrString( RLE *R, char *s, siz h, siz w ) {

FILE: lib/pycocotools/maskApi.h
  type uint (line 10) | typedef unsigned int uint;
  type siz (line 11) | typedef unsigned long siz;
  type byte (line 12) | typedef unsigned char byte;
  type RLE (line 14) | typedef struct { siz h, w, m; uint *cnts; } RLE;

FILE: lib/roi_data_layer/minibatch.py
  function get_minibatch (line 19) | def get_minibatch(roidb, num_classes):
  function _get_image_blob (line 56) | def _get_image_blob(roidb, scale_inds):

FILE: lib/roi_data_layer/roibatchLoader.py
  class roibatchLoader (line 22) | class roibatchLoader(data.Dataset):
    method __init__ (line 23) | def __init__(self, roidb, ratio_list, ratio_index, batch_size, num_cla...
    method __getitem__ (line 57) | def __getitem__(self, index):
    method __len__ (line 215) | def __len__(self):

FILE: lib/roi_data_layer/roidb.py
  function prepare_roidb (line 16) | def prepare_roidb(imdb):
  function rank_roidb_ratio (line 62) | def rank_roidb_ratio(roidb):
  function filter_roidb (line 88) | def filter_roidb(roidb):
  function combined_roidb (line 101) | def combined_roidb(imdb_names, training=True):

FILE: lib/setup.py
  function find_in_path (line 17) | def find_in_path(name, path):
  function customize_compiler_for_nvcc (line 69) | def customize_compiler_for_nvcc(self):
  class custom_build_ext (line 109) | class custom_build_ext(build_ext):
    method build_extensions (line 110) | def build_extensions(self):

FILE: test_net.py
  function parse_args (line 43) | def parse_args():

FILE: trainval_net.py
  function parse_args (line 36) | def parse_args():
  class sampler (line 123) | class sampler(Sampler):
    method __init__ (line 124) | def __init__(self, train_size, batch_size):
    method __iter__ (line 134) | def __iter__(self):
    method __len__ (line 145) | def __len__(self):
Condensed preview — 122 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (476K chars).
[
  {
    "path": ".gitignore",
    "chars": 2889,
    "preview": "data/*\n\n# READ THIS BEFORE YOU REFACTOR ME\n#\n# setup.py uses the list of patterns in this file to decide\n# what to delet"
  },
  {
    "path": "LICENSE",
    "chars": 1069,
    "preview": "MIT License\n\nCopyright (c) 2017 Jianwei Yang\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
  },
  {
    "path": "README.md",
    "chars": 14901,
    "preview": "# A *Faster* Pytorch Implementation of Faster R-CNN\n\n## Write at the beginning\n\n[05/29/2020] This repo was initaited abo"
  },
  {
    "path": "_init_paths.py",
    "chars": 312,
    "preview": "import os.path as osp\nimport sys\n\ndef add_path(path):\n    if path not in sys.path:\n        sys.path.insert(0, path)\n\nthi"
  },
  {
    "path": "cfgs/res101.yml",
    "chars": 363,
    "preview": "EXP_DIR: res101\nTRAIN:\n  HAS_RPN: True\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n  RPN_POSITIVE_OVERLAP: 0.7\n  RPN_BATC"
  },
  {
    "path": "cfgs/res101_ls.yml",
    "chars": 439,
    "preview": "EXP_DIR: res101\nTRAIN:\n  HAS_RPN: True\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n  RPN_POSITIVE_OVERLAP: 0.7\n  RPN_BATC"
  },
  {
    "path": "cfgs/res50.yml",
    "chars": 347,
    "preview": "EXP_DIR: res50\nTRAIN:\n  HAS_RPN: True\n  # IMS_PER_BATCH: 1\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n  RPN_POSITIVE_OVE"
  },
  {
    "path": "cfgs/vgg16.yml",
    "chars": 287,
    "preview": "EXP_DIR: vgg16\nTRAIN:\n  HAS_RPN: True\n  BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True\n  RPN_POSITIVE_OVERLAP: 0.7\n  RPN_BATCH"
  },
  {
    "path": "demo.py",
    "chars": 13700,
    "preview": "# --------------------------------------------------------\n# Tensorflow Faster R-CNN\n# Licensed under The MIT License [s"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m",
    "chars": 231,
    "preview": "function VOCopts = get_voc_opts(path)\n\ntmp = pwd;\ncd(path);\ntry\n  addpath('VOCcode');\n  VOCinit;\ncatch\n  rmpath('VOCcode"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m",
    "chars": 1332,
    "preview": "function res = voc_eval(path, comp_id, test_set, output_dir)\n\nVOCopts = get_voc_opts(path);\nVOCopts.testset = test_set;\n"
  },
  {
    "path": "lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m",
    "chars": 258,
    "preview": "function ap = xVOCap(rec,prec)\r\n% From the PASCAL VOC 2011 devkit\r\n\r\nmrec=[0 ; rec ; 1];\r\nmpre=[0 ; prec ; 0];\r\nfor i=nu"
  },
  {
    "path": "lib/datasets/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/coco.py",
    "chars": 12051,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/datasets/ds_utils.py",
    "chars": 1402,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/datasets/factory.py",
    "chars": 2670,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/imagenet.py",
    "chars": 8415,
    "preview": "from __future__ import print_function\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyrigh"
  },
  {
    "path": "lib/datasets/imdb.py",
    "chars": 9109,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/pascal_voc.py",
    "chars": 14942,
    "preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n# -----------------------------------------"
  },
  {
    "path": "lib/datasets/pascal_voc_rbg.py",
    "chars": 11236,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/datasets/tools/mcg_munge.py",
    "chars": 1490,
    "preview": "from __future__ import print_function\nimport os\nimport sys\n\n\"\"\"Hacky tool to convert file system layout of MCG boxes dow"
  },
  {
    "path": "lib/datasets/vg.py",
    "chars": 16780,
    "preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n# -----------------------------------------"
  },
  {
    "path": "lib/datasets/vg_eval.py",
    "chars": 4176,
    "preview": "from __future__ import absolute_import\n# --------------------------------------------------------\n# Fast/er R-CNN\n# Lice"
  },
  {
    "path": "lib/datasets/voc_eval.py",
    "chars": 6659,
    "preview": "# --------------------------------------------------------\n# Fast/er R-CNN\n# Licensed under The MIT License [see LICENSE"
  },
  {
    "path": "lib/make.sh",
    "chars": 1468,
    "preview": "#!/usr/bin/env bash\n\n# CUDA_PATH=/usr/local/cuda/\n\nexport CUDA_PATH=/usr/local/cuda/\n#You may also want to ad the follow"
  },
  {
    "path": "lib/model/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/faster_rcnn/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/faster_rcnn/faster_rcnn.py",
    "chars": 5785,
    "preview": "import random\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimp"
  },
  {
    "path": "lib/model/faster_rcnn/resnet.py",
    "chars": 9060,
    "preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom model"
  },
  {
    "path": "lib/model/faster_rcnn/vgg16.py",
    "chars": 2018,
    "preview": "# --------------------------------------------------------\n# Tensorflow Faster R-CNN\n# Licensed under The MIT License [s"
  },
  {
    "path": "lib/model/nms/.gitignore",
    "chars": 15,
    "preview": "*.c\n*.cpp\n*.so\n"
  },
  {
    "path": "lib/model/nms/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/nms/_ext/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/nms/_ext/nms/__init__.py",
    "chars": 377,
    "preview": "\nfrom torch.utils.ffi import _wrap_function\nfrom ._nms import lib as _lib, ffi as _ffi\n\n__all__ = []\ndef _import_symbols"
  },
  {
    "path": "lib/model/nms/build.py",
    "chars": 850,
    "preview": "from __future__ import print_function\nimport os\nimport torch\nfrom torch.utils.ffi import create_extension\n\n#this_file = "
  },
  {
    "path": "lib/model/nms/make.sh",
    "chars": 209,
    "preview": "#!/usr/bin/env bash\n\n# CUDA_PATH=/usr/local/cuda/\n\ncd src\necho \"Compiling stnm kernels by nvcc...\"\nnvcc -c -o nms_cuda_k"
  },
  {
    "path": "lib/model/nms/nms_cpu.py",
    "chars": 862,
    "preview": "from __future__ import absolute_import\n\nimport numpy as np\nimport torch\n\ndef nms_cpu(dets, thresh):\n    dets = dets.nump"
  },
  {
    "path": "lib/model/nms/nms_gpu.py",
    "chars": 299,
    "preview": "from __future__ import absolute_import\nimport torch\nimport numpy as np\nfrom ._ext import nms\nimport pdb\n\ndef nms_gpu(det"
  },
  {
    "path": "lib/model/nms/nms_kernel.cu",
    "chars": 5064,
    "preview": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n//"
  },
  {
    "path": "lib/model/nms/nms_wrapper.py",
    "chars": 757,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/model/nms/src/nms_cuda.h",
    "chars": 272,
    "preview": "// int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,\n//             THCudaTensor *boxes_host, THCudaTensor *nm"
  },
  {
    "path": "lib/model/nms/src/nms_cuda_kernel.cu",
    "chars": 5623,
    "preview": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n//"
  },
  {
    "path": "lib/model/nms/src/nms_cuda_kernel.h",
    "chars": 206,
    "preview": "#ifdef __cplusplus\nextern \"C\" {\n#endif\n\nvoid nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_"
  },
  {
    "path": "lib/model/roi_align/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_align/_ext/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_align/_ext/roi_align/__init__.py",
    "chars": 383,
    "preview": "\nfrom torch.utils.ffi import _wrap_function\nfrom ._roi_align import lib as _lib, ffi as _ffi\n\n__all__ = []\ndef _import_s"
  },
  {
    "path": "lib/model/roi_align/build.py",
    "chars": 902,
    "preview": "from __future__ import print_function\nimport os\nimport torch\nfrom torch.utils.ffi import create_extension\n\nsources = ['s"
  },
  {
    "path": "lib/model/roi_align/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_align/functions/roi_align.py",
    "chars": 2006,
    "preview": "import torch\nfrom torch.autograd import Function\nfrom .._ext import roi_align\n\n\n# TODO use save_for_backward instead\ncla"
  },
  {
    "path": "lib/model/roi_align/make.sh",
    "chars": 211,
    "preview": "#!/usr/bin/env bash\n\nCUDA_PATH=/usr/local/cuda/\n\ncd src\necho \"Compiling my_lib kernels by nvcc...\"\nnvcc -c -o roi_align_"
  },
  {
    "path": "lib/model/roi_align/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_align/modules/roi_align.py",
    "chars": 1672,
    "preview": "from torch.nn.modules.module import Module\nfrom torch.nn.functional import avg_pool2d, max_pool2d\nfrom ..functions.roi_a"
  },
  {
    "path": "lib/model/roi_align/src/roi_align.c",
    "chars": 7571,
    "preview": "#include <TH/TH.h>\n#include <math.h>\n#include <omp.h>\n\n\nvoid ROIAlignForwardCpu(const float* bottom_data, const float sp"
  },
  {
    "path": "lib/model/roi_align/src/roi_align.h",
    "chars": 361,
    "preview": "int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,\n                      THFloatTensor * "
  },
  {
    "path": "lib/model/roi_align/src/roi_align_cuda.c",
    "chars": 2425,
    "preview": "#include <THC/THC.h>\n#include <math.h>\n#include \"roi_align_kernel.h\"\n\nextern THCState *state;\n\nint roi_align_forward_cud"
  },
  {
    "path": "lib/model/roi_align/src/roi_align_cuda.h",
    "chars": 369,
    "preview": "int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,\n                        THCudaTen"
  },
  {
    "path": "lib/model/roi_align/src/roi_align_kernel.cu",
    "chars": 7732,
    "preview": "#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <stdio.h>\n#include <math.h>\n#include <float.h>\n#include \"roi_align_kern"
  },
  {
    "path": "lib/model/roi_align/src/roi_align_kernel.h",
    "chars": 1263,
    "preview": "#ifndef _ROI_ALIGN_KERNEL\n#define _ROI_ALIGN_KERNEL\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n__global__ void ROIAlignFor"
  },
  {
    "path": "lib/model/roi_crop/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_crop/_ext/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_crop/_ext/crop_resize/__init__.py",
    "chars": 310,
    "preview": "\nfrom torch.utils.ffi import _wrap_function\nfrom ._crop_resize import lib as _lib, ffi as _ffi\n\n__all__ = []\ndef _import"
  },
  {
    "path": "lib/model/roi_crop/_ext/roi_crop/__init__.py",
    "chars": 382,
    "preview": "\nfrom torch.utils.ffi import _wrap_function\nfrom ._roi_crop import lib as _lib, ffi as _ffi\n\n__all__ = []\ndef _import_sy"
  },
  {
    "path": "lib/model/roi_crop/build.py",
    "chars": 881,
    "preview": "from __future__ import print_function\nimport os\nimport torch\nfrom torch.utils.ffi import create_extension\n\n#this_file = "
  },
  {
    "path": "lib/model/roi_crop/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_crop/functions/crop_resize.py",
    "chars": 1545,
    "preview": "# functions/add.py\nimport torch\nfrom torch.autograd import Function\nfrom .._ext import roi_crop\nfrom cffi import FFI\nffi"
  },
  {
    "path": "lib/model/roi_crop/functions/gridgen.py",
    "chars": 2233,
    "preview": "# functions/add.py\nimport torch\nfrom torch.autograd import Function\nimport numpy as np\n\n\nclass AffineGridGenFunction(Fun"
  },
  {
    "path": "lib/model/roi_crop/functions/roi_crop.py",
    "chars": 1002,
    "preview": "# functions/add.py\nimport torch\nfrom torch.autograd import Function\nfrom .._ext import roi_crop\nimport pdb\n\nclass RoICro"
  },
  {
    "path": "lib/model/roi_crop/make.sh",
    "chars": 219,
    "preview": "#!/usr/bin/env bash\n\nCUDA_PATH=/usr/local/cuda/\n\ncd src\necho \"Compiling my_lib kernels by nvcc...\"\nnvcc -c -o roi_crop_c"
  },
  {
    "path": "lib/model/roi_crop/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_crop/modules/gridgen.py",
    "chars": 16532,
    "preview": "from torch.nn.modules.module import Module\nimport torch\nfrom torch.autograd import Variable\nimport numpy as np\nfrom ..fu"
  },
  {
    "path": "lib/model/roi_crop/modules/roi_crop.py",
    "chars": 287,
    "preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_crop import RoICropFunction\n\nclass _RoICrop(Module):\n   "
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop.c",
    "chars": 23113,
    "preview": "#include <TH/TH.h>\n#include <stdbool.h>\n#include <stdio.h>\n\n#define real float\n\nint BilinearSamplerBHWD_updateOutput(THF"
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop.h",
    "chars": 659,
    "preview": "int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);\n\nint Bili"
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop_cuda.c",
    "chars": 4711,
    "preview": "#include <THC/THC.h>\n#include <stdbool.h>\n#include <stdio.h>\n#include \"roi_crop_cuda_kernel.h\"\n\n#define real float\n\n// t"
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop_cuda.h",
    "chars": 481,
    "preview": "// Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)\n// we assume BHWD format in inputImages\n// we a"
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop_cuda_kernel.cu",
    "chars": 17168,
    "preview": "#include <stdbool.h>\n#include <stdio.h>\n#include \"roi_crop_cuda_kernel.h\"\n\n#define real float\n\n// Bilinear sampling is d"
  },
  {
    "path": "lib/model/roi_crop/src/roi_crop_cuda_kernel.h",
    "chars": 2816,
    "preview": "#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n\nint BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,\n   "
  },
  {
    "path": "lib/model/roi_pooling/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_pooling/_ext/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_pooling/_ext/roi_pooling/__init__.py",
    "chars": 385,
    "preview": "\nfrom torch.utils.ffi import _wrap_function\nfrom ._roi_pooling import lib as _lib, ffi as _ffi\n\n__all__ = []\ndef _import"
  },
  {
    "path": "lib/model/roi_pooling/build.py",
    "chars": 875,
    "preview": "from __future__ import print_function\nimport os\nimport torch\nfrom torch.utils.ffi import create_extension\n\n\nsources = ['"
  },
  {
    "path": "lib/model/roi_pooling/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_pooling/functions/roi_pool.py",
    "chars": 1773,
    "preview": "import torch\nfrom torch.autograd import Function\nfrom .._ext import roi_pooling\nimport pdb\n\nclass RoIPoolFunction(Functi"
  },
  {
    "path": "lib/model/roi_pooling/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/roi_pooling/modules/roi_pool.py",
    "chars": 524,
    "preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_pool import RoIPoolFunction\n\n\nclass _RoIPooling(Module):"
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling.c",
    "chars": 4104,
    "preview": "#include <TH/TH.h>\n#include <math.h>\n\nint roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,\n"
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling.h",
    "chars": 178,
    "preview": "int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,\n                        THFloatTensor "
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling_cuda.c",
    "chars": 2836,
    "preview": "#include <THC/THC.h>\n#include <math.h>\n#include \"roi_pooling_kernel.h\"\n\nextern THCState *state;\n\nint roi_pooling_forward"
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling_cuda.h",
    "chars": 420,
    "preview": "int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,\n                        THCudaTen"
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling_kernel.cu",
    "chars": 9573,
    "preview": "// #ifdef __cplusplus\n// extern \"C\" {\n// #endif\n\n#include <stdio.h>\n#include <vector>\n#include <math.h>\n#include <float."
  },
  {
    "path": "lib/model/roi_pooling/src/roi_pooling_kernel.h",
    "chars": 767,
    "preview": "#ifndef _ROI_POOLING_KERNEL\n#define _ROI_POOLING_KERNEL\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\nint ROIPoolForwardLauch"
  },
  {
    "path": "lib/model/rpn/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/rpn/anchor_target_layer.py",
    "chars": 9001,
    "preview": "from __future__ import absolute_import\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyr"
  },
  {
    "path": "lib/model/rpn/bbox_transform.py",
    "chars": 9351,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/model/rpn/generate_anchors.py",
    "chars": 3248,
    "preview": "from __future__ import print_function\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyri"
  },
  {
    "path": "lib/model/rpn/proposal_layer.py",
    "chars": 7032,
    "preview": "from __future__ import absolute_import\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyr"
  },
  {
    "path": "lib/model/rpn/proposal_target_layer_cascade.py",
    "chars": 9322,
    "preview": "from __future__ import absolute_import\n# --------------------------------------------------------\n# Faster R-CNN\n# Copyr"
  },
  {
    "path": "lib/model/rpn/rpn.py",
    "chars": 4291,
    "preview": "from __future__ import absolute_import\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.aut"
  },
  {
    "path": "lib/model/utils/.gitignore",
    "chars": 15,
    "preview": "*.c\n*.cpp\n*.so\n"
  },
  {
    "path": "lib/model/utils/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "lib/model/utils/bbox.pyx",
    "chars": 3431,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/model/utils/blob.py",
    "chars": 1642,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/model/utils/config.py",
    "chars": 11815,
    "preview": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\n"
  },
  {
    "path": "lib/model/utils/logger.py",
    "chars": 2467,
    "preview": "# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514\nimport tensorflow as tf\nimport nu"
  },
  {
    "path": "lib/model/utils/net_utils.py",
    "chars": 7492,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd import Variable\nimport numpy as n"
  },
  {
    "path": "lib/pycocotools/UPSTREAM_REV",
    "chars": 80,
    "preview": "https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574\n"
  },
  {
    "path": "lib/pycocotools/__init__.py",
    "chars": 21,
    "preview": "__author__ = 'tylin'\n"
  },
  {
    "path": "lib/pycocotools/_mask.pyx",
    "chars": 10709,
    "preview": "# distutils: language = c\n# distutils: sources = ../MatlabAPI/private/maskApi.c\n\n#**************************************"
  },
  {
    "path": "lib/pycocotools/coco.py",
    "chars": 15064,
    "preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n__author__ = 'tylin'\n__version__ = '1.0.1'\n"
  },
  {
    "path": "lib/pycocotools/cocoeval.py",
    "chars": 19907,
    "preview": "from __future__ import print_function\nfrom __future__ import absolute_import\n__author__ = 'tsungyi'\n\nimport numpy as np\n"
  },
  {
    "path": "lib/pycocotools/license.txt",
    "chars": 1533,
    "preview": "Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin\nAll rights reserved.\n\nRedistribution and use in source and binary form"
  },
  {
    "path": "lib/pycocotools/mask.py",
    "chars": 4044,
    "preview": "__author__ = 'tsungyi'\n\nfrom . import _mask\n\n# Interface for manipulating masks stored in RLE format.\n#\n# RLE is a simpl"
  },
  {
    "path": "lib/pycocotools/maskApi.c",
    "chars": 7704,
    "preview": "/**************************************************************************\n* Microsoft COCO Toolbox.      version 2.0\n*"
  },
  {
    "path": "lib/pycocotools/maskApi.h",
    "chars": 1928,
    "preview": "/**************************************************************************\n* Microsoft COCO Toolbox.      version 2.0\n*"
  },
  {
    "path": "lib/roi_data_layer/__init__.py",
    "chars": 248,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/roi_data_layer/minibatch.py",
    "chars": 2923,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "lib/roi_data_layer/roibatchLoader.py",
    "chars": 8878,
    "preview": "\n\"\"\"The data layer used during training to train a Fast R-CNN network.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom "
  },
  {
    "path": "lib/roi_data_layer/roidb.py",
    "chars": 4522,
    "preview": "\"\"\"Transform a roidb into a trainable roidb by adding a bunch of metadata.\"\"\"\nfrom __future__ import absolute_import\nfro"
  },
  {
    "path": "lib/setup.py",
    "chars": 4802,
    "preview": "from __future__ import print_function\n# --------------------------------------------------------\n# Fast R-CNN\n# Copyrigh"
  },
  {
    "path": "requirements.txt",
    "chars": 80,
    "preview": "cython\ncffi\nopencv-python\nscipy\nmsgpack\neasydict\nmatplotlib\npyyaml\ntensorboardX\n"
  },
  {
    "path": "test_net.py",
    "chars": 12236,
    "preview": "# --------------------------------------------------------\n# Tensorflow Faster R-CNN\n# Licensed under The MIT License [s"
  },
  {
    "path": "trainval_net.py",
    "chars": 14559,
    "preview": "# --------------------------------------------------------\n# Pytorch multi-GPU Faster R-CNN\n# Licensed under The MIT Lic"
  }
]

About this extraction

This page contains the full source code of the jwyang/faster-rcnn.pytorch GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 122 files (444.5 KB), approximately 126.7k tokens, and a symbol index with 355 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!