Full Code of chanyn/Reasoning-RCNN for AI

master 9bd7c7ab0ffd cached
179 files
138.1 MB
175.1k tokens
544 symbols
1 requests
Download .txt
Showing preview only (709K chars total). Download the full file or copy to clipboard to get everything.
Repository: chanyn/Reasoning-RCNN
Branch: master
Commit: 9bd7c7ab0ffd
Files: 179
Total size: 138.1 MB

Directory structure:
gitextract_155bakyx/

├── .gitignore
├── .travis.yml
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── compile.sh
├── configs/
│   ├── ade_faster_rcnn_r101_fpn_1x.py
│   ├── coco_faster_rcnn_r101_fpn_1x.py
│   ├── coco_sgrb_fpn_ms.py
│   ├── hkrm/
│   │   ├── ade_faster_rcnn_r50_fpn_1x.py
│   │   ├── coco_faster_rcnn_r101_fpn_1x.py
│   │   └── vg_faster_rcnn_r101_fpn_1x.py
│   ├── pascal_voc/
│   │   ├── faster_rcnn_r50_fpn_1x_voc0712.py
│   │   ├── ssd300_voc.py
│   │   └── ssd512_voc.py
│   ├── rrcnn/
│   │   ├── ade_reasoning_rcnn_r101_fpn_1x.py
│   │   ├── coco_reasoning_rcnn_r101_fpn_1x.py
│   │   └── vg_reasoning_rcnn_r101_fpn_1x.py
│   ├── vg_faster_rcnn_r101_fpn_1x.py
│   └── vgbig_faster_rcnn_r101_fpn_1x.py
├── mmdet/
│   ├── __init__.py
│   ├── apis/
│   │   ├── __init__.py
│   │   ├── env.py
│   │   ├── inference.py
│   │   └── train.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── anchor/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_generator.py
│   │   │   └── anchor_target.py
│   │   ├── bbox/
│   │   │   ├── __init__.py
│   │   │   ├── assign_sampling.py
│   │   │   ├── assigners/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── assign_result.py
│   │   │   │   ├── base_assigner.py
│   │   │   │   └── max_iou_assigner.py
│   │   │   ├── bbox_target.py
│   │   │   ├── geometry.py
│   │   │   ├── samplers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_sampler.py
│   │   │   │   ├── combined_sampler.py
│   │   │   │   ├── instance_balanced_pos_sampler.py
│   │   │   │   ├── iou_balanced_neg_sampler.py
│   │   │   │   ├── ohem_sampler.py
│   │   │   │   ├── pseudo_sampler.py
│   │   │   │   ├── random_sampler.py
│   │   │   │   ├── random_sampler_fixnum.py
│   │   │   │   └── sampling_result.py
│   │   │   └── transforms.py
│   │   ├── evaluation/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_overlaps.py
│   │   │   ├── class_names.py
│   │   │   ├── coco_utils.py
│   │   │   ├── eval_hooks.py
│   │   │   ├── mean_ap.py
│   │   │   └── recall.py
│   │   ├── loss/
│   │   │   ├── __init__.py
│   │   │   └── losses.py
│   │   ├── mask/
│   │   │   ├── __init__.py
│   │   │   ├── mask_target.py
│   │   │   └── utils.py
│   │   ├── post_processing/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_nms.py
│   │   │   └── merge_augs.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── dist_utils.py
│   │       └── misc.py
│   ├── datasets/
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── concat_dataset.py
│   │   ├── custom.py
│   │   ├── extra_aug.py
│   │   ├── loader/
│   │   │   ├── __init__.py
│   │   │   ├── build_loader.py
│   │   │   └── sampler.py
│   │   ├── repeat_dataset.py
│   │   ├── transforms.py
│   │   ├── utils.py
│   │   ├── voc.py
│   │   └── xml_style.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── anchor_heads/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_head.py
│   │   │   ├── retina_head.py
│   │   │   ├── rpn_head.py
│   │   │   └── ssd_head.py
│   │   ├── backbones/
│   │   │   ├── __init__.py
│   │   │   ├── resnet.py
│   │   │   ├── resnext.py
│   │   │   └── ssd_vgg.py
│   │   ├── bbox_heads/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_head.py
│   │   │   ├── convfc_bbox_head.py
│   │   │   ├── convfc_bbox_head_enhanced.py
│   │   │   └── graph_bbox_head.py
│   │   ├── builder.py
│   │   ├── detectors/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── cascade_rcnn.py
│   │   │   ├── fast_rcnn.py
│   │   │   ├── faster_rcnn.py
│   │   │   ├── hkrm_rcnn.py
│   │   │   ├── mask_rcnn.py
│   │   │   ├── reasoning_rcnn.py
│   │   │   ├── retinanet.py
│   │   │   ├── rpn.py
│   │   │   ├── sgrn.py
│   │   │   ├── single_stage.py
│   │   │   ├── test_mixins.py
│   │   │   └── two_stage.py
│   │   ├── mask_heads/
│   │   │   ├── __init__.py
│   │   │   └── fcn_mask_head.py
│   │   ├── necks/
│   │   │   ├── __init__.py
│   │   │   └── fpn.py
│   │   ├── registry.py
│   │   ├── roi_extractors/
│   │   │   ├── __init__.py
│   │   │   └── single_level.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── conv_module.py
│   │       ├── norm.py
│   │       └── weight_init.py
│   └── ops/
│       ├── __init__.py
│       ├── dcn/
│       │   ├── __init__.py
│       │   ├── functions/
│       │   │   ├── __init__.py
│       │   │   ├── deform_conv.py
│       │   │   └── deform_pool.py
│       │   ├── modules/
│       │   │   ├── __init__.py
│       │   │   ├── deform_conv.py
│       │   │   └── deform_pool.py
│       │   ├── setup.py
│       │   └── src/
│       │       ├── deform_conv_cuda.cpp
│       │       ├── deform_conv_cuda_kernel.cu
│       │       ├── deform_pool_cuda.cpp
│       │       └── deform_pool_cuda_kernel.cu
│       ├── nms/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── __init__.py
│       │   ├── cpu_nms.pyx
│       │   ├── cpu_soft_nms.pyx
│       │   ├── gpu_nms.hpp
│       │   ├── gpu_nms.pyx
│       │   ├── nms_kernel.cu
│       │   ├── nms_wrapper.py
│       │   └── setup.py
│       ├── roi_align/
│       │   ├── __init__.py
│       │   ├── functions/
│       │   │   ├── __init__.py
│       │   │   └── roi_align.py
│       │   ├── gradcheck.py
│       │   ├── modules/
│       │   │   ├── __init__.py
│       │   │   └── roi_align.py
│       │   ├── setup.py
│       │   └── src/
│       │       ├── roi_align_cuda.cpp
│       │       └── roi_align_kernel.cu
│       └── roi_pool/
│           ├── __init__.py
│           ├── functions/
│           │   ├── __init__.py
│           │   └── roi_pool.py
│           ├── gradcheck.py
│           ├── modules/
│           │   ├── __init__.py
│           │   └── roi_pool.py
│           ├── setup.py
│           └── src/
│               ├── roi_pool_cuda.cpp
│               └── roi_pool_kernel.cu
├── setup.py
└── tools/
    ├── coco_eval.py
    ├── convert_datasets/
    │   └── pascal_voc.py
    ├── dist_train.sh
    ├── graph/
    │   ├── new_COCO_graph_a.pkl
    │   ├── new_COCO_graph_r.pkl
    │   ├── new_ade_graph_a.pkl
    │   ├── new_ade_graph_r.pkl
    │   ├── new_vg_big_graph_a.pkl
    │   ├── new_vg_big_graph_r.pkl
    │   ├── new_vg_graph_a.pkl
    │   └── new_vg_graph_r.pkl
    ├── test.py
    ├── train.py
    ├── vis_subgraph.py
    └── voc_eval.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
tools/work_dirs/

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# cython generated cpp
mmdet/ops/nms/*.cpp
mmdet/version.py
data
.vscode
.idea


================================================
FILE: .travis.yml
================================================
dist: trusty
language: python

install:
  - pip install flake8

python:
  - "3.5"
  - "3.6"

script:
  - flake8

================================================
FILE: INSTALL.md
================================================
## Installation

### Requirements

- Linux (tested on Ubuntu 16.04 and CentOS 7.2)
- Python 3.4+
- PyTorch 0.4.1
- Cython
- [mmcv](https://github.com/open-mmlab/mmcv)

### Install mmdetection

a. Install PyTorch 0.4.1 and torchvision following the [official instructions](https://pytorch.org/).

b. Clone the mmdetection repository.

```shell
git clone https://github.com/open-mmlab/mmdetection.git
```

c. Compile cuda extensions.

```shell
cd mmdetection
pip install cython  # or "conda install cython" if you prefer conda
./compile.sh  # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments
```

d. Install mmdetection (other dependencies will be installed automatically).

```shell
python(3) setup.py install  # add --user if you want to install it locally
# or "pip install ."
```

Note: You need to run the last step each time you pull updates from github.
The git commit id will be written to the version number and also saved in trained models.

### Prepare COCO dataset.

It is recommended to symlink the dataset root to `$MMDETECTION/data`.

```
mmdetection
├── mmdet
├── tools
├── configs
├── data
│   ├── coco
│   │   ├── annotations
│   │   ├── train2017
│   │   ├── val2017
│   │   ├── test2017
│   ├── VOCdevkit
│   │   ├── VOC2007
│   │   ├── VOC2012

```

### Scripts
Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
a script for setting up mmdetection with conda.


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MODEL_ZOO.md
================================================
# Benchmark and Model Zoo

## Environment

### Hardware

- 8 NVIDIA Tesla V100 GPUs
- Intel Xeon 4114 CPU @ 2.20GHz

### Software environment

- Python 3.6 / 3.7
- PyTorch 0.4.1
- CUDA 9.0.176
- CUDNN 7.0.4
- NCCL 2.1.15


## Common settings

- All baselines were trained using 8 GPU with a batch size of 16 (2 images per GPU).
- All models were trained on `coco_2017_train`, and tested on the `coco_2017_val`.
- We use distributed training and BN layer stats are fixed.
- We adopt the same training schedules as Detectron. 1x indicates 12 epochs and 2x indicates 24 epochs, which corresponds to slightly less iterations than Detectron and the difference can be ignored.
- All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.
- We report the training GPU memory as the maximum value of `torch.cuda.max_memory_cached()`
for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows, but
closer to the actual requirements.
- We report the inference time as the overall time including data loading,
network forwarding and post processing.
- The training memory and time of 2x schedule is simply copied from 1x.
It should be very close to the actual memory and time.


## Baselines

We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More models with different backbones will be added to the model zoo.

### RPN

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR1000 | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe   | 1x      | 4.5      | 0.379               | 14.4           | 58.2   | -        |
| R-50-FPN | pytorch | 1x      | 4.8      | 0.407               | 14.5           | 57.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth) |
| R-50-FPN | pytorch | 2x      | 4.8      | 0.407               | 14.5           | 57.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth) |
| R-101-FPN | caffe   | 1x      | 7.4      | 0.513               | 11.1           | 59.4   | -        |
| R-101-FPN | pytorch | 1x      | 8.0      | 0.552               | 11.1           | 58.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_1x_20181129-f50da4bd.pth) |
| R-101-FPN | pytorch | 2x      | 8.0      | 0.552               | 11.1           | 59.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_2x_20181129-e42c6c9a.pth) |
| X-101-32x4d-FPN | pytorch |1x | 9.9      | 0.691               | 8.3            | 59.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_1x_20181218-7e379d26.pth)
| X-101-32x4d-FPN | pytorch |2x | 9.9      | 0.691               | 8.3            | 59.9   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_2x_20181218-0510af40.pth)
| X-101-64x4d-FPN | pytorch |1x | 14.6     | 1.032               | 6.2            | 59.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_1x_20181218-c1a24f1f.pth)
| X-101-64x4d-FPN | pytorch |2x | 14.6     | 1.032               | 6.2            | 60.0   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_2x_20181218-c22bdd70.pth)

### Faster R-CNN

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe   | 1x      | 4.9      | 0.525               | 10.0           | 36.7   | -        |
| R-50-FPN | pytorch | 1x      | 5.1      | 0.554               | 9.9            | 36.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth) |
| R-50-FPN | pytorch | 2x      | 5.1      | 0.554               | 9.9            | 37.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth) |
| R-101-FPN | caffe   | 1x      | 7.4      | 0.663               | 8.4           | 38.8   | -        |
| R-101-FPN | pytorch | 1x      | 8.0      | 0.698               | 8.3           | 38.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_1x_20181129-d1468807.pth) |
| R-101-FPN | pytorch | 2x      | 8.0      | 0.698               | 8.3           | 39.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_2x_20181129-73e7ade7.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 9.9      | 0.842               | 7.0           | 40.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_1x_20181218-ad81c133.pth)
| X-101-32x4d-FPN | pytorch | 2x| 9.9      | 0.842               | 7.0           | 40.5    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_2x_20181218-0ed58946.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.1     | 1.181               | 5.2           | 41.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_1x_20181218-c9c69c8f.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.1     | 1.181               | 5.2           | 40.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_2x_20181218-fe94f9b8.pth)

### Mask R-CNN

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe   | 1x      | 5.9      | 0.658               | 7.7            | 37.5   | 34.4    | -        |
| R-50-FPN | pytorch | 1x      | 5.8      | 0.690               | 7.7            | 37.3   | 34.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) |
| R-50-FPN | pytorch | 2x      | 5.8      | 0.690               | 7.7            | 38.6   | 35.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth) |
| R-101-FPN | caffe   | 1x      | 8.8      | 0.791               | 7.0            | 39.9   | 36.1    | -        |
| R-101-FPN | pytorch | 1x      | 9.1      | 0.825               | 6.7            | 39.4   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_1x_20181129-34ad1961.pth) |
| R-101-FPN | pytorch | 2x      | 9.1      | 0.825               | 6.7            | 40.4   | 36.6    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_2x_20181129-a254bdfc.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.9     | 0.972               | 5.8            | 41.2   | 37.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_1x_20181218-44e635cc.pth)
| X-101-64x4d-FPN | pytorch | 2x| 10.9     | 0.972               | 5.8            | 41.4   | 37.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_2x_20181218-f023dffa.pth)
| X-101-32x4d-FPN | pytorch | 1x| 14.1     | 1.302               | 4.7            | 42.2   | 38.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_1x_20181218-cb159987.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.1     | 1.302               | 4.7            | 42.0   | 37.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_2x_20181218-ea936e44.pth)

### Fast R-CNN (with pre-computed proposals)

| Backbone | Style   | Type   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe   | Faster | 1x      | 3.5      | 0.348               | 14.6           | 36.6   | -       | -        |
| R-50-FPN | pytorch | Faster | 1x      | 4.0      | 0.375               | 14.5           | 35.8   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth) |
| R-50-FPN | pytorch | Faster | 2x      | 4.0      | 0.375               | 14.5           | 37.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth) |
| R-101-FPN| caffe   | Faster | 1x      | 7.1      | 0.484               | 11.9           | 38.4   | -       | -        |
| R-101-FPN| pytorch | Faster | 1x      | 7.6      | 0.540               | 11.8           | 38.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_1x_20181129-ffaa2eb0.pth) |
| R-101-FPN| pytorch | Faster | 2x      | 7.6      | 0.540               | 11.8           | 38.8   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_2x_20181129-9dba92ce.pth) |
| R-50-FPN | caffe   | Mask   | 1x      | 5.4      | 0.473               | 10.7           | 37.3   | 34.5    | -        |
| R-50-FPN | pytorch | Mask   | 1x      | 5.3      | 0.504               | 10.6           | 36.8   | 34.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth) |
| R-50-FPN | pytorch | Mask   | 2x      | 5.3      | 0.504               | 10.6           | 37.9   | 34.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) |
| R-101-FPN| caffe   | Mask   | 1x      | 8.6      | 0.607               | 9.5            | 39.4   | 36.1    | -        |
| R-101-FPN| pytorch | Mask   | 1x      | 9.0      | 0.656               | 9.3            | 38.9   | 35.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_1x_20181129-2273fa9b.pth) |
| R-101-FPN| pytorch | Mask   | 2x      | 9.0      | 0.656               | 9.3            | 39.9   | 36.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_2x_20181129-bf63ec5e.pth) |

### RetinaNet

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe   | 1x      | 6.7      | 0.468               | 9.4            | 35.8   | -        |
| R-50-FPN | pytorch | 1x      | 6.9      | 0.496               | 9.1            | 35.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-3d3c2142.pth) |
| R-50-FPN | pytorch | 2x      | 6.9      | 0.496               | 9.1            | 36.5   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_2x_20181125-e0dbec97.pth) |
| R-101-FPN | caffe   | 1x      | 9.2      | 0.614               | 8.2            | 37.8   | -        |
| R-101-FPN | pytorch | 1x      | 9.6      | 0.643               | 8.1            | 37.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_1x_20181129-f738a02f.pth) |
| R-101-FPN | pytorch | 2x      | 9.6      | 0.643               | 8.1            | 38.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_2x_20181129-f654534b.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.8     | 0.792               | 6.7            | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_1x_20181218-c140fb82.pth)
| X-101-32x4d-FPN | pytorch | 2x| 10.8     | 0.792               | 6.7            | 39.3   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_2x_20181218-605dcd0a.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.6     | 1.128               | 5.3            | 40.0   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_1x_20181218-2f6f778b.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.6     | 1.128               | 5.3            | 39.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_2x_20181218-2f598dc5.pth)

### Cascade R-CNN

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe   | 1x      | 5.0      | 0.592               | 8.1            | 40.3   | -        |
| R-50-FPN | pytorch | 1x      | 5.5      | 0.622               | 8.0            | 40.3   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_1x_20181123-b1987c4a.pth) |
| R-50-FPN | pytorch | 20e     | 5.5      | 0.622               | 8.0            | 41.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_20e_20181123-db483a09.pth) |
| R-101-FPN | caffe   | 1x      | 8.5      | 0.731               | 7.0            | 42.2   | -        |
| R-101-FPN | pytorch | 1x      | 8.7      | 0.766               | 6.9            | 42.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_1x_20181129-d64ebac7.pth) |
| R-101-FPN | pytorch | 20e     | 8.7      | 0.766               | 6.9            | 42.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_20e_20181129-b46dcede.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.6     | 0.902               | 5.7            | 43.5   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_1x_20181218-941c0925.pth)
| X-101-32x4d-FPN | pytorch |20e| 10.6     | 0.902               | 5.7            | 44.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_2x_20181218-28f73c4c.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.1     | 1.251               | 4.6            | 44.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_1x_20181218-e2dc376a.pth)
| X-101-64x4d-FPN | pytorch |20e| 14.1     | 1.251               | 4.6            | 44.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_2x_20181218-5add321e.pth)

### Cascade Mask R-CNN

| Backbone | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe   | 1x      | 7.5      | 0.880               | 5.8            | 41.0   | 35.6    | -        |
| R-50-FPN | pytorch | 1x      | 7.6      | 0.910               | 5.7            | 41.3   | 35.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) |
| R-50-FPN | pytorch | 20e     | 7.6      | 0.910               | 5.7            | 42.4   | 36.6    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_20e_20181123-6e0c9713.pth) |
| R-101-FPN | caffe   | 1x      | 10.5     | 1.024               | 5.3            | 43.1   | 37.3    | -        |
| R-101-FPN | pytorch | 1x      | 10.9     | 1.055               | 5.2            | 42.7   | 37.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_1x_20181129-64f00602.pth) |
| R-101-FPN | pytorch | 20e     | 10.9     | 1.055               | 5.2            | 43.4   | 37.6    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_20e_20181129-cb85151d.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 12.67    | 1.181               | 4.2            | 44.4   | 38.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_1x_20181218-1d944c89.pth)
| X-101-32x4d-FPN | pytorch |20e| 12.67    | 1.181               | 4.2            | 44.9   | 38.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_20e_20181218-761a3473.pth)
| X-101-64x4d-FPN | pytorch | 1x| 10.87    | 1.125               | 3.6            | 45.5   | 39.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_1x_20181218-85953a91.pth)
| X-101-64x4d-FPN | pytorch |20e| 10.87    | 1.125               | 3.6            | 45.8   | 39.5    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_20e_20181218-630773a7.pth)

**Notes:**

- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs.
- Cascade Mask R-CNN with X-101-64x4d-FPN was trained using 16 GPU with a batch size of 16 (1 images per GPU).

### SSD

| Backbone | Size | Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:----:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| VGG16    | 300  | caffe  | 120e    | 3.5      | 0.286               | 22.9 / 29.2    | 25.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth)  |
| VGG16    | 512  | caffe  | 120e    | 6.3      | 0.458               | 17.3 / 21.2    | 29.3   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_coco_vgg16_caffe_120e_20181221-d48b0be8.pth) |

### SSD (PASCAL VOC)

| Backbone | Size | Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:----:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| VGG16    | 300  | caffe  | 240e    | 1.2      | 0.189               | 40.1 / 58.0    | 77.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20181221-2f05dd40.pth)  |
| VGG16    | 512  | caffe  | 240e    | 2.9      | 0.261               | 28.1 / 36.2    | 80.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20181221-7652ee18.pth) |

**Notes:**

- `cudnn.benchmark` is set as `True` for SSD training and testing.
- Inference time is reported for batch size = 1 and batch size = 8.
- The speed difference between VOC and COCO is caused by model parameters and nms.

### Group Normalization (GN)

| Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.9   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
| R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.2   | 36.5    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
| R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
| R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.7   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
| R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
| R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |

**Notes:**
- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
- The `3x` schedule is epoch [28, 34, 36].
- The memory is measured with `torch.cuda.max_memory_allocated()` instead of `torch.cuda.max_memory_cached()`. We will update the memory usage of other models in the future.

### Deformable Convolution v2

| Backbone  | Model        | Style   | Conv          | Pool   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:---------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN  | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 3.9      | 0.594               | 10.2           | 40.0   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-e41688c9.pth) |
| R-50-FPN  | Faster       | pytorch | mdconv(c3-c5) | -      | 1x      | 3.7      | 0.598               | 10.0           | 40.3   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-1b768045.pth) |
| R-50-FPN  | Faster       | pytorch | -             | dpool  | 1x      | 4.6      | 0.714               | 8.7            | 37.9   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dpool_r50_fpn_1x_20190125-f4fc1d70.pth) |
| R-50-FPN  | Faster       | pytorch | -             | mdpool | 1x      | 5.2      | 0.769               | 8.2            | 38.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdpool_r50_fpn_1x_20190125-473d0f3d.pth) |
| R-101-FPN | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 5.8      | 0.811               | 8.0            | 42.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-a7e31b65.pth) |
| X-101-32x4d-FPN | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 7.1      | 1.126               | 6.6            | 43.5   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x_20190201-6d46376f.pth) |
| R-50-FPN  | Mask         | pytorch | dconv(c3-c5)  | -      | 1x      | 4.5      | 0.712               | 7.7            | 41.1   | 37.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-4f94ff79.pth) |
| R-50-FPN  | Mask         | pytorch | mdconv(c3-c5) | -      | 1x      | 4.5      | 0.712               | 7.7            | 41.4   | 37.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-c5601dc3.pth) |
| R-101-FPN | Mask         | pytorch | dconv(c3-c5)  | -      | 1x      | 6.4      | 0.939               | 6.5            | 43.2   | 38.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-decb6db5.pth) |
| R-50-FPN  | Cascade      | pytorch | dconv(c3-c5)  | -      | 1x      | 4.4      | 0.660               | 7.6            | 44.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth) |
| R-101-FPN | Cascade      | pytorch | dconv(c3-c5)  | -      | 1x      | 6.3      | 0.881               | 6.8            | 45.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-aaa877cc.pth) |
| R-50-FPN  | Cascade Mask | pytorch | dconv(c3-c5)  | -      | 1x      | 6.6      | 0.942               | 5.7            | 44.5   | 38.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-09d8a443.pth) |
| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5)  | -      | 1x      | 8.5      | 1.156               | 5.1            | 45.8   | 39.5    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-0d62c190.pth) |

**Notes:**

- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling.
- The memory is measured with `torch.cuda.max_memory_allocated()`. The batch size is 16 (2 images per GPU).
- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster.

## Comparison with Detectron

We compare mmdetection with [Detectron](https://github.com/facebookresearch/Detectron)
and [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch),
a third-party port of Detectron to Pytorch. The backbone used is R-50-FPN.

In general, mmdetection has 3 advantages over Detectron.

- **Higher performance** (especially in terms of mask AP)
- **Faster training speed**
- **Memory efficient**

### Performance

Detectron and Detectron.pytorch use caffe-style ResNet as the backbone.
In order to utilize the PyTorch model zoo, we use pytorch-style ResNet in our experiments.

In the meanwhile, we train models with caffe-style ResNet in 1x experiments for comparison.
We find that pytorch-style ResNet usually converges slower than caffe-style ResNet,
thus leading to slightly lower results in 1x schedule, but the final results
of 2x schedule is higher.

We report results using both caffe-style (weights converted from
[here](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#imagenet-pretrained-models))
and pytorch-style (weights from the official model zoo) ResNet backbone,
indicated as *pytorch-style results* / *caffe-style results*.

<table>
  <tr>
    <th>Type</th>
    <th>Lr schd</th>
    <th>Detectron</th>
    <th>Detectron.pytorch</th>
    <th>mmdetection</th>
  </tr>
  <tr>
    <td rowspan="2">RPN</td>
    <td>1x</td>
    <td>57.2</td>
    <td>-</td>
    <td>57.1 / 58.2</td>
  </tr>
  <tr>
    <td>2x</td>
    <td>-</td>
    <td>-</td>
    <td>57.6 / -</td>
  </tr>
  <tr>
    <td rowspan="2">Faster R-CNN</td>
    <td>1x</td>
    <td>36.7</td>
    <td>37.1</td>
    <td>36.4 / 36.7</td>
  </tr>
  <tr>
    <td>2x</td>
    <td>37.9</td>
    <td>-</td>
    <td>37.7 / -</td>
  </tr>
  <tr>
    <td rowspan="2">Mask R-CNN</td>
    <td>1x</td>
    <td>37.7 &amp; 33.9</td>
    <td>37.7 &amp; 33.7</td>
    <td>37.3 &amp; 34.2 / 37.5 &amp; 34.4</td>
  </tr>
  <tr>
    <td>2x</td>
    <td>38.6 &amp; 34.5</td>
    <td>-</td>
    <td>38.6 &amp; 35.1 / -</td>
  </tr>
  <tr>
    <td rowspan="2">Fast R-CNN</td>
    <td>1x</td>
    <td>36.4</td>
    <td>-</td>
    <td>35.8 / 36.6</td>
  </tr>
  <tr>
    <td>2x</td>
    <td>36.8</td>
    <td>-</td>
    <td>37.1 / -</td>
  </tr>
  <tr>
    <td rowspan="2">Fast R-CNN (w/mask)</td>
    <td>1x</td>
    <td>37.3 &amp; 33.7</td>
    <td>-</td>
    <td>36.8 &amp; 34.1 / 37.3 &amp; 34.5</td>
  </tr>
  <tr>
    <td>2x</td>
    <td>37.7 &amp; 34.0</td>
    <td>-</td>
    <td>37.9 &amp; 34.8 / -</td>
  </tr>
</table>

### Training Speed

The training speed is measure with s/iter. The lower, the better.

<table>
  <tr>
    <th>Type</th>
    <th>Detectron (P100<sup>1</sup>)</th>
    <th>Detectron.pytorch (XP<sup>2</sup>)</th>
    <th>mmdetection<sup>3</sup> (V100<sup>4</sup> / XP)</th>
  </tr>
  <tr>
    <td>RPN</td>
    <td>0.416</td>
    <td>-</td>
    <td>0.407 / 0.413</td>
  </tr>
  <tr>
    <td>Faster R-CNN</td>
    <td>0.544</td>
    <td>1.015</td>
    <td>0.554 / 0.579</td>
  </tr>
  <tr>
    <td>Mask R-CNN</td>
    <td>0.889</td>
    <td>1.435</td>
    <td>0.690 / 0.732</td>
  </tr>
  <tr>
    <td>Fast R-CNN</td>
    <td>0.285</td>
    <td>-</td>
    <td>0.375 / 0.398</td>
  </tr>
  <tr>
    <td>Fast R-CNN (w/mask)</td>
    <td>0.377</td>
    <td>-</td>
    <td>0.504 / 0.574</td>
  </tr>
</table>

\*1. Detectron reports the speed on Facebook's Big Basin servers (P100),
on our V100 servers it is slower so we use the official reported values.

\*2. Detectron.pytorch does not report the runtime and we encountered some issue to
run it on V100, so we report the speed on TITAN XP.

\*3. The speed of pytorch-style ResNet is approximately 5% slower than caffe-style,
and we report the pytorch-style results here.

\*4. We also run the models on a DGX-1 server (P100) and the speed is almost the same as our V100 servers.

### Inference Speed

The inference speed is measured with fps (img/s) on a single GPU. The higher, the better.

<table>
  <tr>
    <th>Type</th>
    <th>Detectron (P100)</th>
    <th>Detectron.pytorch (XP)</th>
    <th>mmdetection (V100 / XP)</th>
  </tr>
  <tr>
    <td>RPN</td>
    <td>12.5</td>
    <td>-</td>
    <td>14.5 / 15.4</td>
  </tr>
  <tr>
    <td>Faster R-CNN</td>
    <td>10.3</td>
    <td></td>
    <td>9.9 / 9.8</td>
  </tr>
  <tr>
    <td>Mask R-CNN</td>
    <td>8.5</td>
    <td></td>
    <td>7.7 / 7.4</td>
  </tr>
  <tr>
    <td>Fast R-CNN</td>
    <td>12.5</td>
    <td></td>
    <td>14.5 / 14.1</td>
  </tr>
  <tr>
    <td>Fast R-CNN (w/mask)</td>
    <td>9.9</td>
    <td></td>
    <td>10.6 / 10.3</td>
  </tr>
</table>

### Training memory

We perform various tests and there is no doubt that mmdetection is more memory
efficient than Detectron, and the main cause is the deep learning framework itself, not our efforts.
Besides, Caffe2 and PyTorch have different apis to obtain memory usage
whose implementation is not exactly the same.

`nvidia-smi` shows a larger memory usage for both detectron and mmdetection, e.g.,
we observe a much higher memory usage when we train Mask R-CNN with 2 images per GPU using detectron (10.6G) and mmdetection (9.3G), which is obviously more than actually required.

> With mmdetection, we can train R-50 FPN Mask R-CNN with **4** images per GPU (TITAN XP, 12G),
which is a promising result.


================================================
FILE: README.md
================================================
# Environments
- pytorch 0.3.0/0.4.1
- oldest mmdetection


# Reasoning-RCNN
Reasoning-RCNN: Unifying Adaptive Global Reasoning into Large-scale Object Detection (CVPR2019 Oral)

```
# core files
configs/rrcnn/*
mmdet/models/detectors/reasoning_rcnn.py
mmdet/models/bbox_heads/graph_bbox_head.py
```


# SGRN
Spatial-Aware Graph Relation Network for Large-Scale Object Detection (CVPR2019)

```
# core files
configs/coco_sgrb_fpn_ms.py 
mmdet/models/detectors/sgrn.py
mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py
```


================================================
FILE: TECHNICAL_DETAILS.md
================================================
## Overview

In this section, we will introduce the main units of training a detector:
data loading, model and iteration pipeline.

## Data loading

Following typical conventions, we use `Dataset` and `DataLoader` for data loading
with multiple workers. `Dataset` returns a dict of data items corresponding
the arguments of models' forward method.
Since the data in object detection may not be the same size (image size, gt bbox size, etc.),
we introduce a new `DataContainer` type in `mmcv` to help collect and distribute
data of different size.
See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.

## Model

In mmdetection, model components are basically categorized as 4 types.

- backbone: usually a FCN network to extract feature maps, e.g., ResNet.
- neck: the part between backbones and heads, e.g., FPN, ASPP.
- head: the part for specific tasks, e.g., bbox prediction and mask prediction.
- roi extractor: the part for extracting features from feature maps, e.g., RoI Align.

We also write implement some general detection pipelines with the above components,
such as `SingleStageDetector` and `TwoStageDetector`.

### Build a model with basic components

Following some basic pipelines (e.g., two-stage detectors), the model structure
can be customized through config files with no pains.

If we want to implement some new components, e.g, the path aggregation
FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.

1. create a new file in `mmdet/models/necks/pafpn.py`.

    ```python
    class PAFPN(nn.Module):

        def __init__(self,
                    in_channels,
                    out_channels,
                    num_outs,
                    start_level=0,
                    end_level=-1,
                    add_extra_convs=False):
            pass
        
        def forward(self, inputs):
            # implementation is ignored
            pass
    ```

2. modify the config file from

    ```python
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5)
    ```

    to

    ```python
    neck=dict(
        type='PAFPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5)
    ```

We will release more components (backbones, necks, heads) for research purpose.

### Write a new model

To write a new detection pipeline, you need to inherit from `BaseDetector`,
which defines the following abstract methods.

- `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).
- `forward_train()`: forward method of the training mode
- `simple_test()`: single scale testing without augmentation
- `aug_test()`: testing with augmentation (multi-scale, flip, etc.)

[TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)
is a good example which shows how to do that.

## Iteration pipeline

We adopt distributed training for both single machine and multiple machines.
Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.

Each process keeps an isolated model, data loader, and optimizer.
Model parameters are only synchronized once at the begining.
After a forward and backward pass, gradients will be allreduced among all GPUs,
and the optimizer will update model parameters.
Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.


================================================
FILE: compile.sh
================================================
#!/usr/bin/env bash

PYTHON=${PYTHON:-"python"}

echo "Building roi align op..."
cd mmdet/ops/roi_align
if [ -d "build" ]; then
    rm -r build
fi
$PYTHON setup.py build_ext --inplace

echo "Building roi pool op..."
cd ../roi_pool
if [ -d "build" ]; then
    rm -r build
fi
$PYTHON setup.py build_ext --inplace

echo "Building nms op..."
cd ../nms
make clean
make PYTHON=${PYTHON}

echo "Building dcn..."
cd ../dcn
if [ -d "build" ]; then
    rm -r build
fi
$PYTHON setup.py build_ext --inplace


================================================
FILE: configs/ade_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=446,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train.json',
        img_prefix=data_root + 'train/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/coco_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=81,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSamplerFixnum',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'images/train2017/',
        img_scale=[(1333,600),(1333,1000)],
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'images/val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/coco_sgrb_fpn_ms.py
================================================
# model settings
model = dict(
    type='ThreeStageGraphDetector',
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=[dict(
        type='SharedFCRoIHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=81,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False),
        dict(
            type='ConvFCRoIHeadEnhance',
            enhance_channels=256,
            num_shared_fcs=2,
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=81,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2],
            reg_class_agnostic=False)
    ],
    graph_convolution=dict(
        latent_graph_channel=256,
        n_kernels_gc=8,
        n_graph_node=512,
        neigh_size=32)
)
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        pos_fraction=0.5,
        pos_balance_sampling=False,
        neg_pos_ub=256,
        allowed_border=0,
        crowd_thr=1.1,
        anchor_batch_size=256,
        pos_iou_thr=0.7,
        neg_iou_thr=0.3,
        neg_balance_thr=0,
        min_pos_iou=0.3,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        crowd_thr=1.1,
        roi_batch_size=512,
        add_gt_as_proposals=False,
        pos_fraction=0.25,
        pos_balance_sampling=False,
        neg_pos_ub=512,
        neg_balance_thr=0,
        min_pos_iou=0.5,
        pos_weight=-1,
        debug=False),
    rcnn2=dict(
        pos_iou_thr=0.6,
        neg_iou_thr=0.6,
        crowd_thr=1.1,
        roi_batch_size=512,
        add_gt_as_proposals=False,
        pos_fraction=0.25,
        pos_balance_sampling=False,
        neg_pos_ub=512,
        neg_balance_thr=0,
        min_pos_iou=0.5,
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(score_thr=0.001, max_per_img=150, nms_thr=0.55))
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/xuhang/data/detection_data/COCO2017/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=1,
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'train2017/',
        img_scale=[(1333, 600),(1333, 1000)],
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[4])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 5
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
#load_from = None
#resume_from = './exps/coco_three_stage_graph_fpn_ms/epoch_12.pth'
load_from = './tools/transfer_domian/model/vg_transfer_coco.pth'
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/hkrm/ade_faster_rcnn_r50_fpn_1x.py
================================================
# model settings
model = dict(
    type='HKRMRCNN',
    pretrained='modelzoo://resnet50',
    adja_gt='/home/cyan/code/mmdetection/tools/graph/new_ade_graph_a.pkl',
    adjr_gt='/home/cyan/code/mmdetection/tools/graph/new_ade_graph_r.pkl',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='GraphBBoxHead',
        roi_feat_size=7,
        num_shared_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        num_classes=446,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False,
        num_attr_conv=4,
        num_rela_conv=4,
        num_spat_conv=2,
        with_attr=True,
        with_rela=True,
        with_spat=True,))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSamplerFixnum',
            num=256,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train.json',
        img_prefix=data_root + 'train/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/hkrm/coco_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='HKRMRCNN',
    adja_gt= './graph/new_ade_graph_a.pkl',
    adjr_gt= './graph/new_ade_graph_r.pkl',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='GraphBBoxHead',
        roi_feat_size=7,
        num_shared_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        num_classes=81,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False,
        num_attr_conv=4,
        num_rela_conv=4,
        num_spat_conv=2,
        with_attr=True,
        with_rela=True,
        with_spat=True,))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSamplerFixnum',
            num=256,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'images/train2017/',
        img_scale=[(1333,600),(1333,1000)],
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'images/val2017/',
        img_scale=(800, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/hkrm/vg_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='HKRMRCNN',
    #pretrained='modelzoo://resnet101',
    adja_gt='./graph/new_vg_graph_a.pkl',
    adjr_gt='./graph/new_vg_graph_r.pkl',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='GraphBBoxHead',
        roi_feat_size=7,
        num_shared_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        num_classes=1001,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False,
        num_attr_conv=4,
        num_rela_conv=4,
        num_spat_conv=2,
        with_attr=True,
        with_rela=True,
        with_spat=True, ))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSamplerFixnum',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=512,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train.json',
        img_prefix=data_root + 'train/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/val.json',
        img_prefix=data_root + 'VG/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
================================================
# model settings
model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet50',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=21,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',  # to avoid reloading datasets frequently
        times=3,
        dataset=dict(
            type=dataset_type,
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            img_scale=(1000, 600),
            img_norm_cfg=img_norm_cfg,
            size_divisor=32,
            flip_ratio=0.5,
            with_mask=False,
            with_crowd=True,
            with_label=True)),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(1000, 600),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(1000, 600),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(policy='step', step=[3])  # actual epoch = 3 * 3 = 9
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 4  # actual epoch = 4 * 3 = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x_voc0712'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/pascal_voc/ssd300_voc.py
================================================
# model settings
input_size = 300
model = dict(
    type='SingleStageDetector',
    pretrained='open-mmlab://vgg16_caffe',
    backbone=dict(
        type='SSDVGG',
        input_size=input_size,
        depth=16,
        with_last_pool=False,
        ceil_mode=True,
        out_indices=(3, 4),
        out_feature_indices=(22, 34),
        l2_norm_scale=20),
    neck=None,
    bbox_head=dict(
        type='SSDHead',
        input_size=input_size,
        in_channels=(512, 1024, 512, 256, 256, 256),
        num_classes=21,
        anchor_strides=(8, 16, 32, 64, 100, 300),
        basesize_ratio_range=(0.2, 0.9),
        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
        target_means=(.0, .0, .0, .0),
        target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True
train_cfg = dict(
    assigner=dict(
        type='MaxIoUAssigner',
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        min_pos_iou=0.,
        ignore_iof_thr=-1,
        gt_max_assign_all=False),
    smoothl1_beta=1.,
    allowed_border=-1,
    pos_weight=-1,
    neg_pos_ratio=3,
    debug=False)
test_cfg = dict(
    nms=dict(type='nms', iou_thr=0.45),
    min_bbox_size=0,
    score_thr=0.02,
    max_per_img=200)
# model training and testing settings
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
data = dict(
    imgs_per_gpu=4,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',
        times=10,
        dataset=dict(
            type=dataset_type,
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            img_scale=(300, 300),
            img_norm_cfg=img_norm_cfg,
            size_divisor=None,
            flip_ratio=0.5,
            with_mask=False,
            with_crowd=False,
            with_label=True,
            test_mode=False,
            extra_aug=dict(
                photo_metric_distortion=dict(
                    brightness_delta=32,
                    contrast_range=(0.5, 1.5),
                    saturation_range=(0.5, 1.5),
                    hue_delta=18),
                expand=dict(
                    mean=img_norm_cfg['mean'],
                    to_rgb=img_norm_cfg['to_rgb'],
                    ratio_range=(1, 4)),
                random_crop=dict(
                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
            resize_keep_ratio=False)),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(300, 300),
        img_norm_cfg=img_norm_cfg,
        size_divisor=None,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True,
        resize_keep_ratio=False),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(300, 300),
        img_norm_cfg=img_norm_cfg,
        size_divisor=None,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True,
        resize_keep_ratio=False))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[16, 20])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd300_voc'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/pascal_voc/ssd512_voc.py
================================================
# model settings
input_size = 512
model = dict(
    type='SingleStageDetector',
    pretrained='open-mmlab://vgg16_caffe',
    backbone=dict(
        type='SSDVGG',
        input_size=input_size,
        depth=16,
        with_last_pool=False,
        ceil_mode=True,
        out_indices=(3, 4),
        out_feature_indices=(22, 34),
        l2_norm_scale=20),
    neck=None,
    bbox_head=dict(
        type='SSDHead',
        input_size=input_size,
        in_channels=(512, 1024, 512, 256, 256, 256, 256),
        num_classes=21,
        anchor_strides=(8, 16, 32, 64, 128, 256, 512),
        basesize_ratio_range=(0.15, 0.9),
        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]),
        target_means=(.0, .0, .0, .0),
        target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True
train_cfg = dict(
    assigner=dict(
        type='MaxIoUAssigner',
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        min_pos_iou=0.,
        ignore_iof_thr=-1,
        gt_max_assign_all=False),
    smoothl1_beta=1.,
    allowed_border=-1,
    pos_weight=-1,
    neg_pos_ratio=3,
    debug=False)
test_cfg = dict(
    nms=dict(type='nms', iou_thr=0.45),
    min_bbox_size=0,
    score_thr=0.02,
    max_per_img=200)
# model training and testing settings
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
data = dict(
    imgs_per_gpu=4,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',
        times=10,
        dataset=dict(
            type=dataset_type,
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            img_scale=(512, 512),
            img_norm_cfg=img_norm_cfg,
            size_divisor=None,
            flip_ratio=0.5,
            with_mask=False,
            with_crowd=False,
            with_label=True,
            test_mode=False,
            extra_aug=dict(
                photo_metric_distortion=dict(
                    brightness_delta=32,
                    contrast_range=(0.5, 1.5),
                    saturation_range=(0.5, 1.5),
                    hue_delta=18),
                expand=dict(
                    mean=img_norm_cfg['mean'],
                    to_rgb=img_norm_cfg['to_rgb'],
                    ratio_range=(1, 4)),
                random_crop=dict(
                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
            resize_keep_ratio=False)),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(512, 512),
        img_norm_cfg=img_norm_cfg,
        size_divisor=None,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True,
        resize_keep_ratio=False),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        img_scale=(512, 512),
        img_norm_cfg=img_norm_cfg,
        size_divisor=None,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True,
        resize_keep_ratio=False))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[16, 20])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd512_voc'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/rrcnn/ade_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='ReasoningRCNN',
    num_stages=2,
    adj_gt='./graph/new_ade_graph_r.pkl',
    graph_out_channels=256,
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=[
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1024,
            roi_feat_size=1,
            num_classes=446,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2],
            reg_class_agnostic=True),
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1280,
            roi_feat_size=1,
            num_classes=446,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.05, 0.05, 0.1, 0.1],
            reg_class_agnostic=True)
    ])
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=[
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False),
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.6,
                neg_iou_thr=0.6,
                min_pos_iou=0.6,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)
    ],
    stage_loss_weights=[1, 0.5])
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.6,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
    keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train.json',
        img_prefix=data_root + 'train/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(800, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/ade_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/rrcnn/coco_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='ReasoningRCNN',
    num_stages=2,
    adj_gt='./graph/new_COCO_graph_r.pkl', # relation graph: './graph/new_ade_graph_r.pkl'
    graph_out_channels=256,
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=[
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1024,
            roi_feat_size=1,
            num_classes=81,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2],
            reg_class_agnostic=True),
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1280,
            roi_feat_size=1,
            num_classes=81,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.05, 0.05, 0.1, 0.1],
            reg_class_agnostic=True)
    ])
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=[
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False),
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.6,
                neg_iou_thr=0.6,
                min_pos_iou=0.6,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)
    ],
    stage_loss_weights=[1, 0.5])
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.6,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
    keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'images/train2017/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'images/val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'images/val2017',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/coco_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/rrcnn/vg_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='ReasoningRCNN',
    num_stages=2,
    adj_gt='./graph/new_COCO_graph_r.pkl',
    graph_out_channels=256,
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=[
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1024,
            roi_feat_size=1,
            num_classes=3001,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.1, 0.1, 0.2, 0.2],
            reg_class_agnostic=True),
        dict(
            type='BBoxHead',
            with_avg_pool=False,
            in_channels=1280,
            roi_feat_size=1,
            num_classes=3001,
            target_means=[0., 0., 0., 0.],
            target_stds=[0.05, 0.05, 0.1, 0.1],
            reg_class_agnostic=True)
    ])
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=[
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=0.5,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False),
        dict(
            assigner=dict(
                type='MaxIoUAssigner',
                pos_iou_thr=0.6,
                neg_iou_thr=0.6,
                min_pos_iou=0.6,
                ignore_iof_thr=-1),
            sampler=dict(
                type='RandomSamplerFixnum',
                num=512,
                pos_fraction=0.25,
                neg_pos_ub=-1,
                add_gt_as_proposals=True),
            pos_weight=-1,
            debug=False)
    ],
    stage_loss_weights=[1, 0.5])
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.6,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
    keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'images/train2017/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'images/val2017/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/val_big.json',
        img_prefix=data_root + 'VG',
        img_scale=(800, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/vg_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/vg_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=1001,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train.json',
        img_prefix=data_root + 'train/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'val.json',
        img_prefix=data_root + 'val/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/val.json',
        img_prefix=data_root + 'VG/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=False,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: configs/vgbig_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet101',
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=1001,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=0,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
        max_num=2000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
    # soft-nms is also supported for rcnn testing
    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/train_big.json',
        img_prefix=data_root + 'VG/',
        img_scale=(1333, 200),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0.5,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/val_big.json',
        img_prefix=data_root + 'VG/',
        img_scale=(1333, 800),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_crowd=True,
        with_label=True),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/val_big.json',
        img_prefix=data_root + 'VG/',
        img_scale=(1333, 400),
        img_norm_cfg=img_norm_cfg,
        size_divisor=32,
        flip_ratio=0,
        with_mask=False,
        with_label=True,
        test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]


================================================
FILE: mmdet/__init__.py
================================================
from .version import __version__, short_version

__all__ = ['__version__', 'short_version']


================================================
FILE: mmdet/apis/__init__.py
================================================
from .env import init_dist, get_root_logger, set_random_seed
from .train import train_detector
from .inference import inference_detector, show_result

__all__ = [
    'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
    'inference_detector', 'show_result'
]


================================================
FILE: mmdet/apis/env.py
================================================
import logging
import os
import random

import numpy as np
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from mmcv.runner import get_dist_info


def init_dist(launcher, backend='nccl', **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'mpi':
        _init_dist_mpi(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError('Invalid launcher type: {}'.format(launcher))


def _init_dist_pytorch(backend, **kwargs):
    # TODO: use local_rank instead of rank % num_gpus
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)


def _init_dist_mpi(backend, **kwargs):
    raise NotImplementedError


def _init_dist_slurm(backend, **kwargs):
    raise NotImplementedError


def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def get_root_logger(log_level=logging.INFO):
    logger = logging.getLogger()
    if not logger.hasHandlers():
        logging.basicConfig(
            format='%(asctime)s - %(levelname)s - %(message)s',
            level=log_level)
    rank, _ = get_dist_info()
    if rank != 0:
        logger.setLevel('ERROR')
    return logger


================================================
FILE: mmdet/apis/inference.py
================================================
import mmcv
import numpy as np
import torch

from mmdet.datasets import to_tensor
from mmdet.datasets.transforms import ImageTransform
from mmdet.core import get_classes


def _prepare_data(img, img_transform, cfg, device):
    ori_shape = img.shape
    img, img_shape, pad_shape, scale_factor = img_transform(
        img, scale=cfg.data.test.img_scale)
    img = to_tensor(img).to(device).unsqueeze(0)
    img_meta = [
        dict(
            ori_shape=ori_shape,
            img_shape=img_shape,
            pad_shape=pad_shape,
            scale_factor=scale_factor,
            flip=False)
    ]
    return dict(img=[img], img_meta=[img_meta])


def _inference_single(model, img, img_transform, cfg, device):
    img = mmcv.imread(img)
    data = _prepare_data(img, img_transform, cfg, device)
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result


def _inference_generator(model, imgs, img_transform, cfg, device):
    for img in imgs:
        yield _inference_single(model, img, img_transform, cfg, device)


def inference_detector(model, imgs, cfg, device='cuda:0'):
    img_transform = ImageTransform(
        size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg)
    model = model.to(device)
    model.eval()

    if not isinstance(imgs, list):
        return _inference_single(model, imgs, img_transform, cfg, device)
    else:
        return _inference_generator(model, imgs, img_transform, cfg, device)


def show_result(img, result, dataset='coco', score_thr=0.3):
    class_names = get_classes(dataset)
    labels = [
        np.full(bbox.shape[0], i, dtype=np.int32)
        for i, bbox in enumerate(result)
    ]
    labels = np.concatenate(labels)
    bboxes = np.vstack(result)
    img = mmcv.imread(img)
    mmcv.imshow_det_bboxes(
        img.copy(),
        bboxes,
        labels,
        class_names=class_names,
        score_thr=score_thr)


================================================
FILE: mmdet/apis/train.py
================================================
from __future__ import division

from collections import OrderedDict

import torch
from mmcv.runner import Runner, DistSamplerSeedHook
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel

from mmdet.core import (DistOptimizerHook, DistEvalmAPHook,
                        CocoDistEvalRecallHook, CocoDistEvalmAPHook)
from mmdet.datasets import build_dataloader
from mmdet.models import RPN
from .env import get_root_logger


def parse_losses(losses):
    log_vars = OrderedDict()
    for loss_name, loss_value in losses.items():
        if isinstance(loss_value, torch.Tensor):
            log_vars[loss_name] = loss_value.mean()
        elif isinstance(loss_value, list):
            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
        else:
            raise TypeError(
                '{} is not a tensor or list of tensors'.format(loss_name))

    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)

    log_vars['loss'] = loss
    for name in log_vars:
        log_vars[name] = log_vars[name].item()

    return loss, log_vars


def batch_processor(model, data, train_mode):
    losses = model(**data)
    loss, log_vars = parse_losses(losses)

    outputs = dict(
        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))

    return outputs


def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   logger=None):
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # start training
    if distributed:
        _dist_train(model, dataset, cfg, validate=validate)
    else:
        _non_dist_train(model, dataset, cfg, validate=validate)


def _dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(
            dataset,
            cfg.data.imgs_per_gpu,
            cfg.data.workers_per_gpu,
            dist=True)
    ]
    # put model on gpus
    model = MMDistributedDataParallel(model.cuda())
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    # register hooks
    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
    runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        if isinstance(model.module, RPN):
            # TODO: implement recall hooks for other datasets
            runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
        else:
            if cfg.data.val.type == 'CocoDataset':
                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
            else:
                runner.register_hook(DistEvalmAPHook(cfg.data.val))

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)


def _non_dist_train(model, dataset, cfg, validate=False):
    # prepare data loaders
    data_loaders = [
        build_dataloader(
            dataset,
            cfg.data.imgs_per_gpu,
            cfg.data.workers_per_gpu,
            cfg.gpus,
            dist=False)
    ]
    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)

    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)


================================================
FILE: mmdet/core/__init__.py
================================================
from .anchor import *  # noqa: F401, F403
from .bbox import *  # noqa: F401, F403
from .mask import *  # noqa: F401, F403
from .loss import *  # noqa: F401, F403
from .evaluation import *  # noqa: F401, F403
from .post_processing import *  # noqa: F401, F403
from .utils import *  # noqa: F401, F403


================================================
FILE: mmdet/core/anchor/__init__.py
================================================
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target

__all__ = ['AnchorGenerator', 'anchor_target']


================================================
FILE: mmdet/core/anchor/anchor_generator.py
================================================
import torch


class AnchorGenerator(object):

    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
        self.base_size = base_size
        self.scales = torch.Tensor(scales)
        self.ratios = torch.Tensor(ratios)
        self.scale_major = scale_major
        self.ctr = ctr
        self.base_anchors = self.gen_base_anchors()

    @property
    def num_base_anchors(self):
        return self.base_anchors.size(0)

    def gen_base_anchors(self):
        w = self.base_size
        h = self.base_size
        if self.ctr is None:
            x_ctr = 0.5 * (w - 1)
            y_ctr = 0.5 * (h - 1)
        else:
            x_ctr, y_ctr = self.ctr

        h_ratios = torch.sqrt(self.ratios)
        w_ratios = 1 / h_ratios
        if self.scale_major:
            ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
            hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
        else:
            ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
            hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)

        base_anchors = torch.stack(
            [
                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
            ],
            dim=-1).round()

        return base_anchors

    def _meshgrid(self, x, y, row_major=True):
        xx = x.repeat(len(y))
        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
        if row_major:
            return xx, yy
        else:
            return yy, xx

    def grid_anchors(self, featmap_size, stride=16, device='cuda'):
        base_anchors = self.base_anchors.to(device)

        feat_h, feat_w = featmap_size
        shift_x = torch.arange(0, feat_w, device=device) * stride
        shift_y = torch.arange(0, feat_h, device=device) * stride
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
        shifts = shifts.type_as(base_anchors)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)

        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.view(-1, 4)
        # first A rows correspond to A anchors of (0, 0) in feature map,
        # then (0, 1), (0, 2), ...
        return all_anchors

    def valid_flags(self, featmap_size, valid_size, device='cuda'):
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
        valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
        valid_x[:valid_w] = 1
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
        valid = valid_xx & valid_yy
        valid = valid[:, None].expand(
            valid.size(0), self.num_base_anchors).contiguous().view(-1)
        return valid


================================================
FILE: mmdet/core/anchor/anchor_target.py
================================================
import torch

from ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta
from ..utils import multi_apply


def anchor_target(anchor_list,
                  valid_flag_list,
                  gt_bboxes_list,
                  img_metas,
                  target_means,
                  target_stds,
                  cfg,
                  gt_labels_list=None,
                  label_channels=1,
                  sampling=True,
                  unmap_outputs=True):
    """Compute regression and classification targets for anchors.

    Args:
        anchor_list (list[list]): Multi level anchors of each image.
        valid_flag_list (list[list]): Multi level valid flags of each image.
        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
        img_metas (list[dict]): Meta info of each image.
        target_means (Iterable): Mean value of regression targets.
        target_stds (Iterable): Std value of regression targets.
        cfg (dict): RPN train configs.

    Returns:
        tuple
    """
    num_imgs = len(img_metas)
    assert len(anchor_list) == len(valid_flag_list) == num_imgs

    # anchor number of multi levels
    num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
    # concat all level anchors and flags to a single tensor
    for i in range(num_imgs):
        assert len(anchor_list[i]) == len(valid_flag_list[i])
        anchor_list[i] = torch.cat(anchor_list[i])
        valid_flag_list[i] = torch.cat(valid_flag_list[i])

    # compute targets for each image
    if gt_labels_list is None:
        gt_labels_list = [None for _ in range(num_imgs)]
    (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
     pos_inds_list, neg_inds_list) = multi_apply(
         anchor_target_single,
         anchor_list,
         valid_flag_list,
         gt_bboxes_list,
         gt_labels_list,
         img_metas,
         target_means=target_means,
         target_stds=target_stds,
         cfg=cfg,
         label_channels=label_channels,
         sampling=sampling,
         unmap_outputs=unmap_outputs)
    # no valid anchors
    if any([labels is None for labels in all_labels]):
        return None
    # sampled anchors of all images
    num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
    num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
    # split targets to a list w.r.t. multiple levels
    labels_list = images_to_levels(all_labels, num_level_anchors)
    label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
    bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
    bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
    return (labels_list, label_weights_list, bbox_targets_list,
            bbox_weights_list, num_total_pos, num_total_neg)


def images_to_levels(target, num_level_anchors):
    """Convert targets by image to targets by feature level.

    [target_img0, target_img1] -> [target_level0, target_level1, ...]
    """
    target = torch.stack(target, 0)
    level_targets = []
    start = 0
    for n in num_level_anchors:
        end = start + n
        level_targets.append(target[:, start:end].squeeze(0))
        start = end
    return level_targets


def anchor_target_single(flat_anchors,
                         valid_flags,
                         gt_bboxes,
                         gt_labels,
                         img_meta,
                         target_means,
                         target_stds,
                         cfg,
                         label_channels=1,
                         sampling=True,
                         unmap_outputs=True):
    inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
                                       img_meta['img_shape'][:2],
                                       cfg.allowed_border)
    if not inside_flags.any():
        return (None, ) * 6
    # assign gt and sample anchors
    anchors = flat_anchors[inside_flags, :]

    if sampling:
        assign_result, sampling_result = assign_and_sample(
            anchors, gt_bboxes, None, None, cfg)
    else:
        bbox_assigner = build_assigner(cfg.assigner)
        assign_result = bbox_assigner.assign(anchors, gt_bboxes, None,
                                             gt_labels)
        bbox_sampler = PseudoSampler()
        sampling_result = bbox_sampler.sample(assign_result, anchors,
                                              gt_bboxes)

    num_valid_anchors = anchors.shape[0]
    bbox_targets = torch.zeros_like(anchors)
    bbox_weights = torch.zeros_like(anchors)
    labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
    label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)

    pos_inds = sampling_result.pos_inds
    neg_inds = sampling_result.neg_inds
    if len(pos_inds) > 0:
        pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
                                      sampling_result.pos_gt_bboxes,
                                      target_means, target_stds)
        bbox_targets[pos_inds, :] = pos_bbox_targets
        bbox_weights[pos_inds, :] = 1.0
        if gt_labels is None:
            labels[pos_inds] = 1
        else:
            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
        if cfg.pos_weight <= 0:
            label_weights[pos_inds] = 1.0
        else:
            label_weights[pos_inds] = cfg.pos_weight
    if len(neg_inds) > 0:
        label_weights[neg_inds] = 1.0

    # map up to original set of anchors
    if unmap_outputs:
        num_total_anchors = flat_anchors.size(0)
        labels = unmap(labels, num_total_anchors, inside_flags)
        label_weights = unmap(label_weights, num_total_anchors, inside_flags)
        if label_channels > 1:
            labels, label_weights = expand_binary_labels(
                labels, label_weights, label_channels)
        bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
        bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)

    return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
            neg_inds)


def expand_binary_labels(labels, label_weights, label_channels):
    bin_labels = labels.new_full((labels.size(0), label_channels), 0)
    inds = torch.nonzero(labels >= 1).squeeze()
    if inds.numel() > 0:
        bin_labels[inds, labels[inds] - 1] = 1
    bin_label_weights = label_weights.view(-1, 1).expand(
        label_weights.size(0), label_channels)
    return bin_labels, bin_label_weights


def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
                        allowed_border=0):
    img_h, img_w = img_shape[:2]
    if allowed_border >= 0:
        inside_flags = valid_flags & \
            (flat_anchors[:, 0] >= -allowed_border) & \
            (flat_anchors[:, 1] >= -allowed_border) & \
            (flat_anchors[:, 2] < img_w + allowed_border) & \
            (flat_anchors[:, 3] < img_h + allowed_border)
    else:
        inside_flags = valid_flags
    return inside_flags


def unmap(data, count, inds, fill=0):
    """ Unmap a subset of item (data) back to the original set of items (of
    size count) """
    if data.dim() == 1:
        ret = data.new_full((count, ), fill)
        ret[inds] = data
    else:
        new_size = (count, ) + data.size()[1:]
        ret = data.new_full(new_size, fill)
        ret[inds, :] = data
    return ret


================================================
FILE: mmdet/core/bbox/__init__.py
================================================
from .geometry import bbox_overlaps
from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       CombinedSampler, SamplingResult, RandomSamplerFixnum)
from .assign_sampling import build_assigner, build_sampler, assign_and_sample
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target

__all__ = [
    'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
    'BaseSampler', 'PseudoSampler', 'RandomSampler',
    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
    'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
    'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
    'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target',
    'RandomSamplerFixnum'
]


================================================
FILE: mmdet/core/bbox/assign_sampling.py
================================================
import mmcv

from . import assigners, samplers


def build_assigner(cfg, **kwargs):
    if isinstance(cfg, assigners.BaseAssigner):
        return cfg
    elif isinstance(cfg, dict):
        return mmcv.runner.obj_from_dict(
            cfg, assigners, default_args=kwargs)
    else:
        raise TypeError('Invalid type {} for building a sampler'.format(
            type(cfg)))


def build_sampler(cfg, **kwargs):
    if isinstance(cfg, samplers.BaseSampler):
        return cfg
    elif isinstance(cfg, dict):
        return mmcv.runner.obj_from_dict(
            cfg, samplers, default_args=kwargs)
    else:
        raise TypeError('Invalid type {} for building a sampler'.format(
            type(cfg)))


def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
    bbox_assigner = build_assigner(cfg.assigner)
    bbox_sampler = build_sampler(cfg.sampler)
    assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
                                         gt_labels)
    sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
                                          gt_labels)
    return assign_result, sampling_result


================================================
FILE: mmdet/core/bbox/assigners/__init__.py
================================================
from .base_assigner import BaseAssigner
from .max_iou_assigner import MaxIoUAssigner
from .assign_result import AssignResult

__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']


================================================
FILE: mmdet/core/bbox/assigners/assign_result.py
================================================
import torch


class AssignResult(object):

    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
        self.num_gts = num_gts
        self.gt_inds = gt_inds
        self.max_overlaps = max_overlaps
        self.labels = labels

    def add_gt_(self, gt_labels):
        self_inds = torch.arange(
            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
        self.gt_inds = torch.cat([self_inds, self.gt_inds])
        self.max_overlaps = torch.cat(
            [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
        if self.labels is not None:
            self.labels = torch.cat([gt_labels, self.labels])


================================================
FILE: mmdet/core/bbox/assigners/base_assigner.py
================================================
from abc import ABCMeta, abstractmethod


class BaseAssigner(metaclass=ABCMeta):

    @abstractmethod
    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
        pass


================================================
FILE: mmdet/core/bbox/assigners/max_iou_assigner.py
================================================
import torch

from .base_assigner import BaseAssigner
from .assign_result import AssignResult
from ..geometry import bbox_overlaps


class MaxIoUAssigner(BaseAssigner):
    """Assign a corresponding gt bbox or background to each bbox.

    Each proposals will be assigned with `-1`, `0`, or a positive integer
    indicating the ground truth index.

    - -1: don't care
    - 0: negative sample, no assigned gt
    - positive integer: positive sample, index (1-based) of assigned gt

    Args:
        pos_iou_thr (float): IoU threshold for positive bboxes.
        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
        min_pos_iou (float): Minimum iou for a bbox to be considered as a
            positive bbox. Positive samples can have smaller IoU than
            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
        gt_max_assign_all (bool): Whether to assign all bboxes with the same
            highest overlap with some gt to that gt.
        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
            `gt_bboxes_ignore` is specified). Negative values mean not
            ignoring any bboxes.
    """

    def __init__(self,
                 pos_iou_thr,
                 neg_iou_thr,
                 min_pos_iou=.0,
                 gt_max_assign_all=True,
                 ignore_iof_thr=-1):
        self.pos_iou_thr = pos_iou_thr
        self.neg_iou_thr = neg_iou_thr
        self.min_pos_iou = min_pos_iou
        self.gt_max_assign_all = gt_max_assign_all
        self.ignore_iof_thr = ignore_iof_thr

    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
        """Assign gt to bboxes.

        This method assign a gt bbox to every bbox (proposal/anchor), each bbox
        will be assigned with -1, 0, or a positive number. -1 means don't care,
        0 means negative sample, positive number is the index (1-based) of
        assigned gt.
        The assignment is done in following steps, the order matters.

        1. assign every bbox to -1
        2. assign proposals whose iou with all gts < neg_iou_thr to 0
        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
           assign it to that bbox
        4. for each gt bbox, assign its nearest proposals (may be more than
           one) to itself

        Args:
            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
                labelled as `ignored`, e.g., crowd boxes in COCO.
            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).

        Returns:
            :obj:`AssignResult`: The assign result.
        """
        if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
            raise ValueError('No gt or bboxes')
        bboxes = bboxes[:, :4]
        overlaps = bbox_overlaps(gt_bboxes, bboxes)

        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
                gt_bboxes_ignore.numel() > 0):
            ignore_overlaps = bbox_overlaps(
                bboxes, gt_bboxes_ignore, mode='iof')
            ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
            ignore_bboxes_inds = torch.nonzero(
                ignore_max_overlaps > self.ignore_iof_thr).squeeze()
            if ignore_bboxes_inds.numel() > 0:
                overlaps[ignore_bboxes_inds[:, 0], :] = -1

        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
        return assign_result

    def assign_wrt_overlaps(self, overlaps, gt_labels=None):
        """Assign w.r.t. the overlaps of bboxes with gts.

        Args:
            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
                shape(k, n).
            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).

        Returns:
            :obj:`AssignResult`: The assign result.
        """
        if overlaps.numel() == 0:
            raise ValueError('No gt or proposals')

        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)

        # 1. assign -1 by default
        assigned_gt_inds = overlaps.new_full(
            (num_bboxes, ), -1, dtype=torch.long)

        # for each anchor, which gt best overlaps with it
        # for each anchor, the max iou of all gts
        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
        # for each gt, which anchor best overlaps with it
        # for each gt, the max iou of all proposals
        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)

        # 2. assign negative: below
        if isinstance(self.neg_iou_thr, float):
            assigned_gt_inds[(max_overlaps >= 0)
                             & (max_overlaps < self.neg_iou_thr)] = 0
        elif isinstance(self.neg_iou_thr, tuple):
            assert len(self.neg_iou_thr) == 2
            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
                             & (max_overlaps < self.neg_iou_thr[1])] = 0

        # 3. assign positive: above positive IoU threshold
        pos_inds = max_overlaps >= self.pos_iou_thr
        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1

        # 4. assign fg: for each gt, proposals with highest IoU
        for i in range(num_gts):
            if gt_max_overlaps[i] >= self.min_pos_iou:
                if self.gt_max_assign_all:
                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
                    assigned_gt_inds[max_iou_inds] = i + 1
                else:
                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1

        if gt_labels is not None:
            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
            if pos_inds.numel() > 0:
                assigned_labels[pos_inds] = gt_labels[
                    assigned_gt_inds[pos_inds] - 1]
        else:
            assigned_labels = None

        return AssignResult(
            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)


================================================
FILE: mmdet/core/bbox/bbox_target.py
================================================
import torch

from .transforms import bbox2delta
from ..utils import multi_apply


def bbox_target(pos_bboxes_list,
                neg_bboxes_list,
                pos_gt_bboxes_list,
                pos_gt_labels_list,
                cfg,
                reg_classes=1,
                target_means=[.0, .0, .0, .0],
                target_stds=[1.0, 1.0, 1.0, 1.0],
                concat=True):
    labels, label_weights, bbox_targets, bbox_weights = multi_apply(
        bbox_target_single,
        pos_bboxes_list,
        neg_bboxes_list,
        pos_gt_bboxes_list,
        pos_gt_labels_list,
        cfg=cfg,
        reg_classes=reg_classes,
        target_means=target_means,
        target_stds=target_stds)

    if concat:
        labels = torch.cat(labels, 0)
        label_weights = torch.cat(label_weights, 0)
        bbox_targets = torch.cat(bbox_targets, 0)
        bbox_weights = torch.cat(bbox_weights, 0)
    return labels, label_weights, bbox_targets, bbox_weights


def bbox_target_single(pos_bboxes,
                       neg_bboxes,
                       pos_gt_bboxes,
                       pos_gt_labels,
                       cfg,
                       reg_classes=1,
                       target_means=[.0, .0, .0, .0],
                       target_stds=[1.0, 1.0, 1.0, 1.0]):
    num_pos = pos_bboxes.size(0)
    num_neg = neg_bboxes.size(0)
    num_samples = num_pos + num_neg
    labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
    label_weights = pos_bboxes.new_zeros(num_samples)
    bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
    bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
    if num_pos > 0:
        labels[:num_pos] = pos_gt_labels
        pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
        label_weights[:num_pos] = pos_weight
        pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
                                      target_stds)
        bbox_targets[:num_pos, :] = pos_bbox_targets
        bbox_weights[:num_pos, :] = 1
    if num_neg > 0:
        label_weights[-num_neg:] = 1.0
    if reg_classes > 1:
        bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
                                                   labels, reg_classes)

    return labels, label_weights, bbox_targets, bbox_weights


def expand_target(bbox_targets, bbox_weights, labels, num_classes):
    bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
                                                  4 * num_classes))
    bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
                                                  4 * num_classes))
    for i in torch.nonzero(labels > 0).squeeze(-1):
        start, end = labels[i] * 4, (labels[i] + 1) * 4
        bbox_targets_expand[i, start:end] = bbox_targets[i, :]
        bbox_weights_expand[i, start:end] = bbox_weights[i, :]
    return bbox_targets_expand, bbox_weights_expand


================================================
FILE: mmdet/core/bbox/geometry.py
================================================
import torch


def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
    """Calculate overlap between two set of bboxes.

    If ``is_aligned`` is ``False``, then calculate the ious between each bbox
    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
    bboxes1 and bboxes2.

    Args:
        bboxes1 (Tensor): shape (m, 4)
        bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
            must be equal.
        mode (str): "iou" (intersection over union) or iof (intersection over
            foreground).

    Returns:
        ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
    """

    assert mode in ['iou', 'iof']

    rows = bboxes1.size(0)
    cols = bboxes2.size(0)
    if is_aligned:
        assert rows == cols

    if rows * cols == 0:
        return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)

    if is_aligned:
        lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
        rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]

        wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
        overlap = wh[:, 0] * wh[:, 1]
        area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
            bboxes1[:, 3] - bboxes1[:, 1] + 1)

        if mode == 'iou':
            area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
                bboxes2[:, 3] - bboxes2[:, 1] + 1)
            ious = overlap / (area1 + area2 - overlap)
        else:
            ious = overlap / area1
    else:
        lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
        rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]

        wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
        overlap = wh[:, :, 0] * wh[:, :, 1]
        area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
            bboxes1[:, 3] - bboxes1[:, 1] + 1)

        if mode == 'iou':
            area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
                bboxes2[:, 3] - bboxes2[:, 1] + 1)
            ious = overlap / (area1[:, None] + area2 - overlap)
        else:
            ious = overlap / (area1[:, None])

    return ious


================================================
FILE: mmdet/core/bbox/samplers/__init__.py
================================================
from .base_sampler import BaseSampler
from .pseudo_sampler import PseudoSampler
from .random_sampler import RandomSampler
from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
from .iou_balanced_neg_sampler import IoUBalancedNegSampler
from .combined_sampler import CombinedSampler
from .ohem_sampler import OHEMSampler
from .sampling_result import SamplingResult
from .random_sampler_fixnum import RandomSamplerFixnum

__all__ = [
    'BaseSampler', 'PseudoSampler', 'RandomSampler',
    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
    'OHEMSampler', 'SamplingResult', 'RandomSamplerFixnum'
]


================================================
FILE: mmdet/core/bbox/samplers/base_sampler.py
================================================
from abc import ABCMeta, abstractmethod

import torch

from .sampling_result import SamplingResult


class BaseSampler(metaclass=ABCMeta):

    def __init__(self,
                 num,
                 pos_fraction,
                 neg_pos_ub=-1,
                 add_gt_as_proposals=True,
                 **kwargs):
        self.num = num
        self.pos_fraction = pos_fraction
        self.neg_pos_ub = neg_pos_ub
        self.add_gt_as_proposals = add_gt_as_proposals
        self.pos_sampler = self
        self.neg_sampler = self

    @abstractmethod
    def _sample_pos(self, assign_result, num_expected, **kwargs):
        pass

    @abstractmethod
    def _sample_neg(self, assign_result, num_expected, **kwargs):
        pass

    def sample(self,
               assign_result,
               bboxes,
               gt_bboxes,
               gt_labels=None,
               **kwargs):
        """Sample positive and negative bboxes.

        This is a simple implementation of bbox sampling given candidates,
        assigning results and ground truth bboxes.

        Args:
            assign_result (:obj:`AssignResult`): Bbox assigning results.
            bboxes (Tensor): Boxes to be sampled from.
            gt_bboxes (Tensor): Ground truth bboxes.
            gt_labels (Tensor, optional): Class labels of ground truth bboxes.

        Returns:
            :obj:`SamplingResult`: Sampling result.
        """
        bboxes = bboxes[:, :4]

        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
        if self.add_gt_as_proposals:
            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
            assign_result.add_gt_(gt_labels)
            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
            gt_flags = torch.cat([gt_ones, gt_flags])

        num_expected_pos = int(self.num * self.pos_fraction)
        pos_inds = self.pos_sampler._sample_pos(
            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
        # We found that sampled indices have duplicated items occasionally.
        # (may be a bug of PyTorch)
        pos_inds = pos_inds.unique()
        num_sampled_pos = pos_inds.numel()
        num_expected_neg = self.num - num_sampled_pos
        if self.neg_pos_ub >= 0:
            _pos = max(1, num_sampled_pos)
            neg_upper_bound = int(self.neg_pos_ub * _pos)
            if num_expected_neg > neg_upper_bound:
                num_expected_neg = neg_upper_bound
        neg_inds = self.neg_sampler._sample_neg(
            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
        neg_inds = neg_inds.unique()

        return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
                              assign_result, gt_flags)


================================================
FILE: mmdet/core/bbox/samplers/combined_sampler.py
================================================
from .base_sampler import BaseSampler
from ..assign_sampling import build_sampler


class CombinedSampler(BaseSampler):

    def __init__(self, pos_sampler, neg_sampler, **kwargs):
        super(CombinedSampler, self).__init__(**kwargs)
        self.pos_sampler = build_sampler(pos_sampler, **kwargs)
        self.neg_sampler = build_sampler(neg_sampler, **kwargs)

    def _sample_pos(self, **kwargs):
        raise NotImplementedError

    def _sample_neg(self, **kwargs):
        raise NotImplementedError


================================================
FILE: mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
================================================
import numpy as np
import torch

from .random_sampler import RandomSampler


class InstanceBalancedPosSampler(RandomSampler):

    def _sample_pos(self, assign_result, num_expected, **kwargs):
        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
        if pos_inds.numel() != 0:
            pos_inds = pos_inds.squeeze(1)
        if pos_inds.numel() <= num_expected:
            return pos_inds
        else:
            unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
            num_gts = len(unique_gt_inds)
            num_per_gt = int(round(num_expected / float(num_gts)) + 1)
            sampled_inds = []
            for i in unique_gt_inds:
                inds = torch.nonzero(assign_result.gt_inds == i.item())
                if inds.numel() != 0:
                    inds = inds.squeeze(1)
                else:
                    continue
                if len(inds) > num_per_gt:
                    inds = self.random_choice(inds, num_per_gt)
                sampled_inds.append(inds)
            sampled_inds = torch.cat(sampled_inds)
            if len(sampled_inds) < num_expected:
                num_extra = num_expected - len(sampled_inds)
                extra_inds = np.array(
                    list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
                if len(extra_inds) > num_extra:
                    extra_inds = self.random_choice(extra_inds, num_extra)
                extra_inds = torch.from_numpy(extra_inds).to(
                    assign_result.gt_inds.device).long()
                sampled_inds = torch.cat([sampled_inds, extra_inds])
            elif len(sampled_inds) > num_expected:
                sampled_inds = self.random_choice(sampled_inds, num_expected)
            return sampled_inds


================================================
FILE: mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
================================================
import numpy as np
import torch

from .random_sampler import RandomSampler


class IoUBalancedNegSampler(RandomSampler):

    def __init__(self,
                 num,
                 pos_fraction,
                 hard_thr=0.1,
                 hard_fraction=0.5,
                 **kwargs):
        super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
                                                    **kwargs)
        assert hard_thr > 0
        assert 0 < hard_fraction < 1
        self.hard_thr = hard_thr
        self.hard_fraction = hard_fraction

    def _sample_neg(self, assign_result, num_expected, **kwargs):
        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
        if neg_inds.numel() != 0:
            neg_inds = neg_inds.squeeze(1)
        if len(neg_inds) <= num_expected:
            return neg_inds
        else:
            max_overlaps = assign_result.max_overlaps.cpu().numpy()
            # balance sampling for negative samples
            neg_set = set(neg_inds.cpu().numpy())
            easy_set = set(
                np.where(
                    np.logical_and(max_overlaps >= 0,
                                   max_overlaps < self.hard_thr))[0])
            hard_set = set(np.where(max_overlaps >= self.hard_thr)[0])
            easy_neg_inds = list(easy_set & neg_set)
            hard_neg_inds = list(hard_set & neg_set)

            num_expected_hard = int(num_expected * self.hard_fraction)
            if len(hard_neg_inds) > num_expected_hard:
                sampled_hard_inds = self.random_choice(hard_neg_inds,
                                                       num_expected_hard)
            else:
                sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)
            num_expected_easy = num_expected - len(sampled_hard_inds)
            if len(easy_neg_inds) > num_expected_easy:
                sampled_easy_inds = self.random_choice(easy_neg_inds,
                                                       num_expected_easy)
            else:
                sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)
            sampled_inds = np.concatenate((sampled_easy_inds,
                                           sampled_hard_inds))
            if len(sampled_inds) < num_expected:
                num_extra = num_expected - len(sampled_inds)
                extra_inds = np.array(list(neg_set - set(sampled_inds)))
                if len(extra_inds) > num_extra:
                    extra_inds = self.random_choice(extra_inds, num_extra)
                sampled_inds = np.concatenate((sampled_inds, extra_inds))
            sampled_inds = torch.from_numpy(sampled_inds).long().to(
                assign_result.gt_inds.device)
            return sampled_inds


================================================
FILE: mmdet/core/bbox/samplers/ohem_sampler.py
================================================
import torch

from .base_sampler import BaseSampler
from ..transforms import bbox2roi


class OHEMSampler(BaseSampler):

    def __init__(self,
                 num,
                 pos_fraction,
                 context,
                 neg_pos_ub=-1,
                 add_gt_as_proposals=True,
                 **kwargs):
        super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
                                          add_gt_as_proposals)
        self.bbox_roi_extractor = context.bbox_roi_extractor
        self.bbox_head = context.bbox_head

    def hard_mining(self, inds, num_expected, bboxes, labels, feats):
        with torch.no_grad():
            rois = bbox2roi([bboxes])
            bbox_feats = self.bbox_roi_extractor(
                feats[:self.bbox_roi_extractor.num_inputs], rois)
            cls_score, _ = self.bbox_head(bbox_feats)
            loss = self.bbox_head.loss(
                cls_score=cls_score,
                bbox_pred=None,
                labels=labels,
                label_weights=cls_score.new_ones(cls_score.size(0)),
                bbox_targets=None,
                bbox_weights=None,
                reduce=False)['loss_cls']
            _, topk_loss_inds = loss.topk(num_expected)
        return inds[topk_loss_inds]

    def _sample_pos(self,
                    assign_result,
                    num_expected,
                    bboxes=None,
                    feats=None,
                    **kwargs):
        # Sample some hard positive samples
        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
        if pos_inds.numel() != 0:
            pos_inds = pos_inds.squeeze(1)
        if pos_inds.numel() <= num_expected:
            return pos_inds
        else:
            return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
                                    assign_result.labels[pos_inds], feats)

    def _sample_neg(self,
                    assign_result,
                    num_expected,
                    bboxes=None,
                    feats=None,
                    **kwargs):
        # Sample some hard negative samples
        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
        if neg_inds.numel() != 0:
            neg_inds = neg_inds.squeeze(1)
        if len(neg_inds) <= num_expected:
            return neg_inds
        else:
            return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
                                    assign_result.labels[neg_inds], feats)


================================================
FILE: mmdet/core/bbox/samplers/pseudo_sampler.py
================================================
import torch

from .base_sampler import BaseSampler
from .sampling_result import SamplingResult


class PseudoSampler(BaseSampler):

    def __init__(self, **kwargs):
        pass

    def _sample_pos(self, **kwargs):
        raise NotImplementedError

    def _sample_neg(self, **kwargs):
        raise NotImplementedError

    def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
        pos_inds = torch.nonzero(
            assign_result.gt_inds > 0).squeeze(-1).unique()
        neg_inds = torch.nonzero(
            assign_result.gt_inds == 0).squeeze(-1).unique()
        gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
                                         assign_result, gt_flags)
        return sampling_result


================================================
FILE: mmdet/core/bbox/samplers/random_sampler.py
================================================
import numpy as np
import torch

from .base_sampler import BaseSampler


class RandomSampler(BaseSampler):

    def __init__(self,
                 num,
                 pos_fraction,
                 neg_pos_ub=-1,
                 add_gt_as_proposals=True,
                 **kwargs):
        super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
                                            add_gt_as_proposals)

    @staticmethod
    def random_choice(gallery, num):
        """Random select some elements from the gallery.

        It seems that Pytorch's implementation is slower than numpy so we use
        numpy to randperm the indices.
        """
        assert len(gallery) >= num
        if isinstance(gallery, list):
            gallery = np.array(gallery)
        cands = np.arange(len(gallery))
        np.random.shuffle(cands)
        rand_inds = cands[:num]
        if not isinstance(gallery, np.ndarray):
            rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
        return gallery[rand_inds]

    def _sample_pos(self, assign_result, num_expected, **kwargs):
        """Randomly sample some positive samples."""
        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
        if pos_inds.numel() != 0:
            pos_inds = pos_inds.squeeze(1)
        if pos_inds.numel() <= num_expected:
            return pos_inds
        else:
            return self.random_choice(pos_inds, num_expected)

    def _sample_neg(self, assign_result, num_expected, **kwargs):
        """Randomly sample some negative samples."""
        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
        if neg_inds.numel() != 0:
            neg_inds = neg_inds.squeeze(1)
        if len(neg_inds) <= num_expected:
            return neg_inds
        else:
            return self.random_choice(neg_inds, num_expected)


================================================
FILE: mmdet/core/bbox/samplers/random_sampler_fixnum.py
================================================
import numpy as np
import torch

from .base_sampler import BaseSampler
from .sampling_result import SamplingResult


class RandomSamplerFixnum(BaseSampler):

    def __init__(self,
                 num,
                 pos_fraction,
                 neg_pos_ub=-1,
                 add_gt_as_proposals=True,
                 **kwargs):
        super(RandomSamplerFixnum, self).__init__(num, pos_fraction, neg_pos_ub,
                                            add_gt_as_proposals)

    @staticmethod
    def random_choice(gallery, num):
        """Random select some elements from the gallery.

        It seems that Pytorch's implementation is slower than numpy so we use
        numpy to randperm the indices.
        """
        assert len(gallery) >= num
        if isinstance(gallery, list):
            gallery = np.array(gallery)
        cands = np.arange(len(gallery))
        np.random.shuffle(cands)
        rand_inds = cands[:num]
        if not isinstance(gallery, np.ndarray):
            rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
        return gallery[rand_inds]

    # def _sample_pos(self, assign_result, num_expected, **kwargs):
    #     """Randomly sample some positive samples."""
    #     pos_inds = torch.nonzero(assign_result.gt_inds > 0)
    #     if pos_inds.numel() != 0:
    #         pos_inds = pos_inds.squeeze(1)
    #     if pos_inds.numel() <= num_expected:
    #         return pos_inds
    #     else:
    #         return self.random_choice(pos_inds, num_expected)

    def _sample_pos(self, assign_result, num_expected, **kwargs):
        """Balance sampling for positive bboxes/anchors.

        1. calculate average positive num for each gt: num_per_gt
        2. sample at most num_per_gt positives for each gt
        3. random sampling from rest anchors if not enough fg
        """
        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
        if pos_inds.numel() != 0:
            pos_inds = pos_inds.squeeze(1)
        if pos_inds.numel() <= num_expected:
            repeat_ = num_expected // pos_inds.numel()
            return torch.cat((pos_inds.repeat(repeat_), self.random_choice(pos_inds, num_expected % pos_inds.numel())))
        else:
            return self.random_choice(pos_inds, num_expected)

    # def _sample_neg(self, assign_result, num_expected, **kwargs):
    #     """Randomly sample some negative samples."""
    #     neg_inds = torch.nonzero(assign_result.gt_inds == 0)
    #     if neg_inds.numel() != 0:
    #         neg_inds = neg_inds.squeeze(1)
    #     if len(neg_inds) <= num_expected:
    #         return neg_inds
    #     else:
    #         return self.random_choice(neg_inds, num_expected)
    def _sample_neg(self, assign_result, num_expected, **kwargs):
        """Balance sampling for negative bboxes/anchors.

        Negative samples are split into 2 set: hard (balance_thr <= iou <
        neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
        by `hard_fraction`.
        """
        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
        if neg_inds.numel() != 0:
            neg_inds = neg_inds.squeeze(1)
        if len(neg_inds) <= num_expected:
            repeat_ = num_expected // neg_inds.numel()
            return torch.cat((neg_inds.repeat(repeat_), self.random_choice(neg_inds, num_expected % neg_inds.numel())))
        else:
            return self.random_choice(neg_inds, num_expected)



    def sample(self,
               assign_result,
               bboxes,
               gt_bboxes,
               gt_labels=None,
               has_roi_score=False,
               **kwargs):
        """Sample positive and negative bboxes.

        This is a simple implementation of bbox sampling given candidates,
        assigning results and ground truth bboxes.

        Args:
            assign_result (:obj:`AssignResult`): Bbox assigning results.
            bboxes (Tensor): Boxes to be sampled from.
            gt_bboxes (Tensor): Ground truth bboxes.
            gt_labels (Tensor, optional): Class labels of ground truth bboxes.

        Returns:
            :obj:`SamplingResult`: Sampling result.
        """
        if has_roi_score:
            gt_bboxes_new = gt_bboxes.new_ones((gt_bboxes.shape[0], 5))
            gt_bboxes_new[:, :4] = gt_bboxes
            gt_bboxes = gt_bboxes_new
        else:
            bboxes = bboxes[:, :4]

        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
        if self.add_gt_as_proposals:
            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
            assign_result.add_gt_(gt_labels)
            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
            gt_flags = torch.cat([gt_ones, gt_flags])

        num_expected_pos = int(self.num * self.pos_fraction)
        # sample pos inds must be fixed
        pos_inds = self.pos_sampler._sample_pos(
            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
        # We found that sampled indices have duplicated items occasionally.
        # (may be a bug of PyTorch)
        # pos_inds = pos_inds.unique()
        num_sampled_pos = pos_inds.numel()
        num_expected_neg = self.num - num_sampled_pos
        if self.neg_pos_ub >= 0:
            _pos = max(1, num_sampled_pos)
            neg_upper_bound = int(self.neg_pos_ub * _pos)
            if num_expected_neg > neg_upper_bound:
                num_expected_neg = neg_upper_bound
        neg_inds = self.neg_sampler._sample_neg(
            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
        # neg_inds = neg_inds.unique()

        return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
                              assign_result, gt_flags)




================================================
FILE: mmdet/core/bbox/samplers/sampling_result.py
================================================
import torch


class SamplingResult(object):

    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
                 gt_flags):
        self.pos_inds = pos_inds
        self.neg_inds = neg_inds
        self.pos_bboxes = bboxes[pos_inds]
        self.neg_bboxes = bboxes[neg_inds]
        self.pos_is_gt = gt_flags[pos_inds]

        self.num_gts = gt_bboxes.shape[0]
        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
        self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
        if assign_result.labels is not None:
            self.pos_gt_labels = assign_result.labels[pos_inds]
        else:
            self.pos_gt_labels = None

    @property
    def bboxes(self):
        return torch.cat([self.pos_bboxes, self.neg_bboxes])


================================================
FILE: mmdet/core/bbox/transforms.py
================================================
import mmcv
import numpy as np
import torch


def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
    assert proposals.size() == gt.size()

    proposals = proposals.float()
    gt = gt.float()
    px = (proposals[..., 0] + proposals[..., 2]) * 0.5
    py = (proposals[..., 1] + proposals[..., 3]) * 0.5
    pw = proposals[..., 2] - proposals[..., 0] + 1.0
    ph = proposals[..., 3] - proposals[..., 1] + 1.0

    gx = (gt[..., 0] + gt[..., 2]) * 0.5
    gy = (gt[..., 1] + gt[..., 3]) * 0.5
    gw = gt[..., 2] - gt[..., 0] + 1.0
    gh = gt[..., 3] - gt[..., 1] + 1.0

    dx = (gx - px) / pw
    dy = (gy - py) / ph
    dw = torch.log(gw / pw)
    dh = torch.log(gh / ph)
    deltas = torch.stack([dx, dy, dw, dh], dim=-1)

    means = deltas.new_tensor(means).unsqueeze(0)
    stds = deltas.new_tensor(stds).unsqueeze(0)
    deltas = deltas.sub_(means).div_(stds)

    return deltas


def delta2bbox(rois,
               deltas,
               means=[0, 0, 0, 0],
               stds=[1, 1, 1, 1],
               max_shape=None,
               wh_ratio_clip=16 / 1000):
    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
    denorm_deltas = deltas * stds + means
    dx = denorm_deltas[:, 0::4]
    dy = denorm_deltas[:, 1::4]
    dw = denorm_deltas[:, 2::4]
    dh = denorm_deltas[:, 3::4]
    max_ratio = np.abs(np.log(wh_ratio_clip))
    dw = dw.clamp(min=-max_ratio, max=max_ratio)
    dh = dh.clamp(min=-max_ratio, max=max_ratio)
    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
    pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)
    ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)
    gw = pw * dw.exp()
    gh = ph * dh.exp()
    gx = torch.addcmul(px, 1, pw, dx)  # gx = px + pw * dx
    gy = torch.addcmul(py, 1, ph, dy)  # gy = py + ph * dy
    x1 = gx - gw * 0.5 + 0.5
    y1 = gy - gh * 0.5 + 0.5
    x2 = gx + gw * 0.5 - 0.5
    y2 = gy + gh * 0.5 - 0.5
    if max_shape is not None:
        x1 = x1.clamp(min=0, max=max_shape[1] - 1)
        y1 = y1.clamp(min=0, max=max_shape[0] - 1)
        x2 = x2.clamp(min=0, max=max_shape[1] - 1)
        y2 = y2.clamp(min=0, max=max_shape[0] - 1)
    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
    return bboxes


def bbox_flip(bboxes, img_shape):
    """Flip bboxes horizontally.

    Args:
        bboxes(Tensor or ndarray): Shape (..., 4*k)
        img_shape(tuple): Image shape.

    Returns:
        Same type as `bboxes`: Flipped bboxes.
    """
    if isinstance(bboxes, torch.Tensor):
        assert bboxes.shape[-1] % 4 == 0
        flipped = bboxes.clone()
        flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1
        flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1
        return flipped
    elif isinstance(bboxes, np.ndarray):
        return mmcv.bbox_flip(bboxes, img_shape)


def bbox_mapping(bboxes, img_shape, scale_factor, flip):
    """Map bboxes from the original image scale to testing scale"""
    new_bboxes = bboxes * scale_factor
    if flip:
        new_bboxes = bbox_flip(new_bboxes, img_shape)
    return new_bboxes


def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
    """Map bboxes from testing scale to original image scale"""
    new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
    new_bboxes = new_bboxes / scale_factor
    return new_bboxes


def bbox2roi(bbox_list):
    """Convert a list of bboxes to roi format.

    Args:
        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
            of images.

    Returns:
        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
    """
    rois_list = []
    for img_id, bboxes in enumerate(bbox_list):
        if bboxes.size(0) > 0:
            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
            rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
        else:
            rois = bboxes.new_zeros((0, 5))
        rois_list.append(rois)
    rois = torch.cat(rois_list, 0)
    return rois


def roi2bbox(rois):
    bbox_list = []
    img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
    for img_id in img_ids:
        inds = (rois[:, 0] == img_id.item())
        bbox = rois[inds, 1:]
        bbox_list.append(bbox)
    return bbox_list


def bbox2result(bboxes, labels, num_classes):
    """Convert detection results to a list of numpy arrays.

    Args:
        bboxes (Tensor): shape (n, 5)
        labels (Tensor): shape (n, )
        num_classes (int): class number, including background class

    Returns:
        list(ndarray): bbox results of each class
    """
    if bboxes.shape[0] == 0:
        return [
            np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)
        ]
    else:
        bboxes = bboxes.cpu().numpy()
        labels = labels.cpu().numpy()
        return [bboxes[labels == i, :] for i in range(num_classes - 1)]


================================================
FILE: mmdet/core/evaluation/__init__.py
================================================
from .class_names import (voc_classes, imagenet_det_classes,
                          imagenet_vid_classes, coco_classes, dataset_aliases,
                          get_classes)
from .coco_utils import coco_eval, fast_eval_recall, results2json
from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,
                         CocoDistEvalmAPHook)
from .mean_ap import average_precision, eval_map, print_map_summary
from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
                     plot_iou_recall)

__all__ = [
    'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
    'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
    'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
    'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
    'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
    'plot_num_recall', 'plot_iou_recall'
]


================================================
FILE: mmdet/core/evaluation/bbox_overlaps.py
================================================
import numpy as np


def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
    """Calculate the ious between each bbox of bboxes1 and bboxes2.

    Args:
        bboxes1(ndarray): shape (n, 4)
        bboxes2(ndarray): shape (k, 4)
        mode(str): iou (intersection over union) or iof (intersection
            over foreground)

    Returns:
        ious(ndarray): shape (n, k)
    """

    assert mode in ['iou', 'iof']

    bboxes1 = bboxes1.astype(np.float32)
    bboxes2 = bboxes2.astype(np.float32)
    rows = bboxes1.shape[0]
    cols = bboxes2.shape[0]
    ious = np.zeros((rows, cols), dtype=np.float32)
    if rows * cols == 0:
        return ious
    exchange = False
    if bboxes1.shape[0] > bboxes2.shape[0]:
        bboxes1, bboxes2 = bboxes2, bboxes1
        ious = np.zeros((cols, rows), dtype=np.float32)
        exchange = True
    area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
        bboxes1[:, 3] - bboxes1[:, 1] + 1)
    area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
        bboxes2[:, 3] - bboxes2[:, 1] + 1)
    for i in range(bboxes1.shape[0]):
        x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
        y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
        x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
        y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
        overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
            y_end - y_start + 1, 0)
        if mode == 'iou':
            union = area1[i] + area2 - overlap
        else:
            union = area1[i] if not exchange else area2
        ious[i, :] = overlap / union
    if exchange:
        ious = ious.T
    return ious


================================================
FILE: mmdet/core/evaluation/class_names.py
================================================
import mmcv


def voc_classes():
    return [
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
    ]


def imagenet_det_classes():
    return [
        'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
        'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
        'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
        'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
        'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
        'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
        'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
        'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
        'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
        'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
        'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
        'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
        'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
        'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
        'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
        'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
        'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
        'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
        'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
        'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
        'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
        'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
        'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
        'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
        'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
        'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
        'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
        'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
        'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
        'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
        'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
        'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
        'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
        'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
        'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
        'whale', 'wine_bottle', 'zebra'
    ]


def imagenet_vid_classes():
    return [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]


def coco_classes():
    return [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',
        'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',
        'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'
    ]


dataset_aliases = {
    'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
    'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
    'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
    'coco': ['coco', 'mscoco', 'ms_coco']
}


def get_classes(dataset):
    """Get class names of a dataset."""
    alias2name = {}
    for name, aliases in dataset_aliases.items():
        for alias in aliases:
            alias2name[alias] = name

    if mmcv.is_str(dataset):
        if dataset in alias2name:
            labels = eval(alias2name[dataset] + '_classes()')
        else:
            raise ValueError('Unrecognized dataset: {}'.format(dataset))
    else:
        raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
    return labels


================================================
FILE: mmdet/core/evaluation/coco_utils.py
================================================
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

from .recall import eval_recalls


def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
    for res_type in result_types:
        assert res_type in [
            'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
        ]

    if mmcv.is_str(coco):
        coco = COCO(coco)
    assert isinstance(coco, COCO)

    if result_types == ['proposal_fast']:
        ar = fast_eval_recall(result_file, coco, np.array(max_dets))
        for i, num in enumerate(max_dets):
            print('AR@{}\t= {:.4f}'.format(num, ar[i]))
        return

    assert result_file.endswith('.json')
    coco_dets = coco.loadRes(result_file)

    img_ids = coco.getImgIds()
    for res_type in result_types:
        iou_type = 'bbox' if res_type == 'proposal' else res_type
        cocoEval = COCOeval(coco, coco_dets, iou_type)
        cocoEval.params.imgIds = img_ids
        if res_type == 'proposal':
            cocoEval.params.useCats = 0
            cocoEval.params.maxDets = list(max_dets)
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()


def fast_eval_recall(results,
                     coco,
                     max_dets,
                     iou_thrs=np.arange(0.5, 0.96, 0.05)):
    if mmcv.is_str(results):
        assert results.endswith('.pkl')
        results = mmcv.load(results)
    elif not isinstance(results, list):
        raise TypeError(
            'results must be a list of numpy arrays or a filename, not {}'.
            format(type(results)))

    gt_bboxes = []
    img_ids = coco.getImgIds()
    for i in range(len(img_ids)):
        ann_ids = coco.getAnnIds(imgIds=img_ids[i])
        ann_info = coco.loadAnns(ann_ids)
        if len(ann_info) == 0:
            gt_bboxes.append(np.zeros((0, 4)))
            continue
        bboxes = []
        for ann in ann_info:
            if ann.get('ignore', False) or ann['iscrowd']:
                continue
            x1, y1, w, h = ann['bbox']
            bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
        bboxes = np.array(bboxes, dtype=np.float32)
        if bboxes.shape[0] == 0:
            bboxes = np.zeros((0, 4))
        gt_bboxes.append(bboxes)

    recalls = eval_recalls(
        gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
    ar = recalls.mean(axis=1)
    return ar


def xyxy2xywh(bbox):
    _bbox = bbox.tolist()
    return [
        _bbox[0],
        _bbox[1],
        _bbox[2] - _bbox[0] + 1,
        _bbox[3] - _bbox[1] + 1,
    ]


def proposal2json(dataset, results):
    json_results = []
    for idx in range(len(dataset)):
        img_id = dataset.img_ids[idx]
        bboxes = results[idx]
        for i in range(bboxes.shape[0]):
            data = dict()
            data['image_id'] = img_id
            data['bbox'] = xyxy2xywh(bboxes[i])
            data['score'] = float(bboxes[i][4])
            data['category_id'] = 1
            json_results.append(data)
    return json_results


def det2json(dataset, results):
    json_results = []
    for idx in range(len(dataset)):
        img_id = dataset.img_ids[idx]
        result = results[idx]
        for label in range(len(result)):
            bboxes = result[label]
            for i in range(bboxes.shape[0]):
                data = dict()
                data['image_id'] = img_id
                data['bbox'] = xyxy2xywh(bboxes[i])
                data['score'] = float(bboxes[i][4])
                data['category_id'] = dataset.cat_ids[label]
                json_results.append(data)
    return json_results


def segm2json(dataset, results):
    json_results = []
    for idx in range(len(dataset)):
        img_id = dataset.img_ids[idx]
        det, seg = results[idx]
        for label in range(len(det)):
            bboxes = det[label]
            segms = seg[label]
            for i in range(bboxes.shape[0]):
                data = dict()
                data['image_id'] = img_id
                data['bbox'] = xyxy2xywh(bboxes[i])
                data['score'] = float(bboxes[i][4])
                data['category_id'] = dataset.cat_ids[label]
                segms[i]['counts'] = segms[i]['counts'].decode()
                data['segmentation'] = segms[i]
                json_results.append(data)
    return json_results


def results2json(dataset, results, out_file):
    if isinstance(results[0], list):
        json_results = det2json(dataset, results)
    elif isinstance(results[0], tuple):
        json_results = segm2json(dataset, results)
    elif isinstance(results[0], np.ndarray):
        json_results = proposal2json(dataset, results)
    else:
        raise TypeError('invalid type of results')
    mmcv.dump(json_results, out_file)


================================================
FILE: mmdet/core/evaluation/eval_hooks.py
================================================
import os
import os.path as osp
import shutil
import time

import mmcv
import numpy as np
import torch
from mmcv.runner import Hook, obj_from_dict
from mmcv.parallel import scatter, collate
from pycocotools.cocoeval import COCOeval
from torch.utils.data import Dataset

from .coco_utils import results2json, fast_eval_recall
from .mean_ap import eval_map
from mmdet import datasets


class DistEvalHook(Hook):

    def __init__(self, dataset, interval=1):
        if isinstance(dataset, Dataset):
            self.dataset = dataset
        elif isinstance(dataset, dict):
            self.dataset = obj_from_dict(dataset, datasets,
                                         {'test_mode': True})
        else:
            raise TypeError(
                'dataset must be a Dataset object or a dict, not {}'.format(
                    type(dataset)))
        self.interval = interval
        self.lock_dir = None

    def _barrier(self, rank, world_size):
        """Due to some issues with `torch.distributed.barrier()`, we have to
        implement this ugly barrier function.
        """
        if rank == 0:
            for i in range(1, world_size):
                tmp = osp.join(self.lock_dir, '{}.pkl'.format(i))
                while not (osp.exists(tmp)):
                    time.sleep(1)
            for i in range(1, world_size):
                tmp = osp.join(self.lock_dir, '{}.pkl'.format(i))
                os.remove(tmp)
        else:
            tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank))
            mmcv.dump([], tmp)
            while osp.exists(tmp):
                time.sleep(1)

    def before_run(self, runner):
        self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook')
        if runner.rank == 0:
            if osp.exists(self.lock_dir):
                shutil.rmtree(self.lock_dir)
            mmcv.mkdir_or_exist(self.lock_dir)

    def after_run(self, runner):
        if runner.rank == 0:
            shutil.rmtree(self.lock_dir)

    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(
                collate([data], samples_per_gpu=1),
                [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(
                    return_loss=False, rescale=True, **data_gpu)
            results[idx] = result

            batch_size = runner.world_size
            for _ in range(batch_size):
                prog_bar.update()

        if runner.rank == 0:
            print('\n')
            self._barrier(runner.rank, runner.world_size)
            for i in range(1, runner.world_size):
                tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
                tmp_results = mmcv.load(tmp_file)
                for idx in range(i, len(results), runner.world_size):
                    results[idx] = tmp_results[idx]
                os.remove(tmp_file)
            self.evaluate(runner, results)
        else:
            tmp_file = osp.join(runner.work_dir,
                                'temp_{}.pkl'.format(runner.rank))
            mmcv.dump(results, tmp_file)
            self._barrier(runner.rank, runner.world_size)
        self._barrier(runner.rank, runner.world_size)

    def evaluate(self):
        raise NotImplementedError


class DistEvalmAPHook(DistEvalHook):

    def evaluate(self, runner, results):
        gt_bboxes = []
        gt_labels = []
        gt_ignore = [] if self.dataset.with_crowd else None
        for i in range(len(self.dataset)):
            ann = self.dataset.get_ann_info(i)
            bboxes = ann['bboxes']
            labels = ann['labels']
            if gt_ignore is not None:
                ignore = np.concatenate([
                    np.zeros(bboxes.shape[0], dtype=np.bool),
                    np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
                ])
                gt_ignore.append(ignore)
                bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
                labels = np.concatenate([labels, ann['labels_ignore']])
            gt_bboxes.append(bboxes)
            gt_labels.append(labels)
        # If the dataset is VOC2007, then use 11 points mAP evaluation.
        if hasattr(self.dataset, 'year') and self.dataset.year == 2007:
            ds_name = 'voc07'
        else:
            ds_name = self.dataset.CLASSES
        mean_ap, eval_results = eval_map(
            results,
            gt_bboxes,
            gt_labels,
            gt_ignore=gt_ignore,
            scale_ranges=None,
            iou_thr=0.5,
            dataset=ds_name,
            print_summary=True)
        runne
Download .txt
gitextract_155bakyx/

├── .gitignore
├── .travis.yml
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── compile.sh
├── configs/
│   ├── ade_faster_rcnn_r101_fpn_1x.py
│   ├── coco_faster_rcnn_r101_fpn_1x.py
│   ├── coco_sgrb_fpn_ms.py
│   ├── hkrm/
│   │   ├── ade_faster_rcnn_r50_fpn_1x.py
│   │   ├── coco_faster_rcnn_r101_fpn_1x.py
│   │   └── vg_faster_rcnn_r101_fpn_1x.py
│   ├── pascal_voc/
│   │   ├── faster_rcnn_r50_fpn_1x_voc0712.py
│   │   ├── ssd300_voc.py
│   │   └── ssd512_voc.py
│   ├── rrcnn/
│   │   ├── ade_reasoning_rcnn_r101_fpn_1x.py
│   │   ├── coco_reasoning_rcnn_r101_fpn_1x.py
│   │   └── vg_reasoning_rcnn_r101_fpn_1x.py
│   ├── vg_faster_rcnn_r101_fpn_1x.py
│   └── vgbig_faster_rcnn_r101_fpn_1x.py
├── mmdet/
│   ├── __init__.py
│   ├── apis/
│   │   ├── __init__.py
│   │   ├── env.py
│   │   ├── inference.py
│   │   └── train.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── anchor/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_generator.py
│   │   │   └── anchor_target.py
│   │   ├── bbox/
│   │   │   ├── __init__.py
│   │   │   ├── assign_sampling.py
│   │   │   ├── assigners/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── assign_result.py
│   │   │   │   ├── base_assigner.py
│   │   │   │   └── max_iou_assigner.py
│   │   │   ├── bbox_target.py
│   │   │   ├── geometry.py
│   │   │   ├── samplers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_sampler.py
│   │   │   │   ├── combined_sampler.py
│   │   │   │   ├── instance_balanced_pos_sampler.py
│   │   │   │   ├── iou_balanced_neg_sampler.py
│   │   │   │   ├── ohem_sampler.py
│   │   │   │   ├── pseudo_sampler.py
│   │   │   │   ├── random_sampler.py
│   │   │   │   ├── random_sampler_fixnum.py
│   │   │   │   └── sampling_result.py
│   │   │   └── transforms.py
│   │   ├── evaluation/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_overlaps.py
│   │   │   ├── class_names.py
│   │   │   ├── coco_utils.py
│   │   │   ├── eval_hooks.py
│   │   │   ├── mean_ap.py
│   │   │   └── recall.py
│   │   ├── loss/
│   │   │   ├── __init__.py
│   │   │   └── losses.py
│   │   ├── mask/
│   │   │   ├── __init__.py
│   │   │   ├── mask_target.py
│   │   │   └── utils.py
│   │   ├── post_processing/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_nms.py
│   │   │   └── merge_augs.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── dist_utils.py
│   │       └── misc.py
│   ├── datasets/
│   │   ├── __init__.py
│   │   ├── coco.py
│   │   ├── concat_dataset.py
│   │   ├── custom.py
│   │   ├── extra_aug.py
│   │   ├── loader/
│   │   │   ├── __init__.py
│   │   │   ├── build_loader.py
│   │   │   └── sampler.py
│   │   ├── repeat_dataset.py
│   │   ├── transforms.py
│   │   ├── utils.py
│   │   ├── voc.py
│   │   └── xml_style.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── anchor_heads/
│   │   │   ├── __init__.py
│   │   │   ├── anchor_head.py
│   │   │   ├── retina_head.py
│   │   │   ├── rpn_head.py
│   │   │   └── ssd_head.py
│   │   ├── backbones/
│   │   │   ├── __init__.py
│   │   │   ├── resnet.py
│   │   │   ├── resnext.py
│   │   │   └── ssd_vgg.py
│   │   ├── bbox_heads/
│   │   │   ├── __init__.py
│   │   │   ├── bbox_head.py
│   │   │   ├── convfc_bbox_head.py
│   │   │   ├── convfc_bbox_head_enhanced.py
│   │   │   └── graph_bbox_head.py
│   │   ├── builder.py
│   │   ├── detectors/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── cascade_rcnn.py
│   │   │   ├── fast_rcnn.py
│   │   │   ├── faster_rcnn.py
│   │   │   ├── hkrm_rcnn.py
│   │   │   ├── mask_rcnn.py
│   │   │   ├── reasoning_rcnn.py
│   │   │   ├── retinanet.py
│   │   │   ├── rpn.py
│   │   │   ├── sgrn.py
│   │   │   ├── single_stage.py
│   │   │   ├── test_mixins.py
│   │   │   └── two_stage.py
│   │   ├── mask_heads/
│   │   │   ├── __init__.py
│   │   │   └── fcn_mask_head.py
│   │   ├── necks/
│   │   │   ├── __init__.py
│   │   │   └── fpn.py
│   │   ├── registry.py
│   │   ├── roi_extractors/
│   │   │   ├── __init__.py
│   │   │   └── single_level.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── conv_module.py
│   │       ├── norm.py
│   │       └── weight_init.py
│   └── ops/
│       ├── __init__.py
│       ├── dcn/
│       │   ├── __init__.py
│       │   ├── functions/
│       │   │   ├── __init__.py
│       │   │   ├── deform_conv.py
│       │   │   └── deform_pool.py
│       │   ├── modules/
│       │   │   ├── __init__.py
│       │   │   ├── deform_conv.py
│       │   │   └── deform_pool.py
│       │   ├── setup.py
│       │   └── src/
│       │       ├── deform_conv_cuda.cpp
│       │       ├── deform_conv_cuda_kernel.cu
│       │       ├── deform_pool_cuda.cpp
│       │       └── deform_pool_cuda_kernel.cu
│       ├── nms/
│       │   ├── .gitignore
│       │   ├── Makefile
│       │   ├── __init__.py
│       │   ├── cpu_nms.pyx
│       │   ├── cpu_soft_nms.pyx
│       │   ├── gpu_nms.hpp
│       │   ├── gpu_nms.pyx
│       │   ├── nms_kernel.cu
│       │   ├── nms_wrapper.py
│       │   └── setup.py
│       ├── roi_align/
│       │   ├── __init__.py
│       │   ├── functions/
│       │   │   ├── __init__.py
│       │   │   └── roi_align.py
│       │   ├── gradcheck.py
│       │   ├── modules/
│       │   │   ├── __init__.py
│       │   │   └── roi_align.py
│       │   ├── setup.py
│       │   └── src/
│       │       ├── roi_align_cuda.cpp
│       │       └── roi_align_kernel.cu
│       └── roi_pool/
│           ├── __init__.py
│           ├── functions/
│           │   ├── __init__.py
│           │   └── roi_pool.py
│           ├── gradcheck.py
│           ├── modules/
│           │   ├── __init__.py
│           │   └── roi_pool.py
│           ├── setup.py
│           └── src/
│               ├── roi_pool_cuda.cpp
│               └── roi_pool_kernel.cu
├── setup.py
└── tools/
    ├── coco_eval.py
    ├── convert_datasets/
    │   └── pascal_voc.py
    ├── dist_train.sh
    ├── graph/
    │   ├── new_COCO_graph_a.pkl
    │   ├── new_COCO_graph_r.pkl
    │   ├── new_ade_graph_a.pkl
    │   ├── new_ade_graph_r.pkl
    │   ├── new_vg_big_graph_a.pkl
    │   ├── new_vg_big_graph_r.pkl
    │   ├── new_vg_graph_a.pkl
    │   └── new_vg_graph_r.pkl
    ├── test.py
    ├── train.py
    ├── vis_subgraph.py
    └── voc_eval.py
Download .txt
SYMBOL INDEX (544 symbols across 97 files)

FILE: mmdet/apis/env.py
  function init_dist (line 12) | def init_dist(launcher, backend='nccl', **kwargs):
  function _init_dist_pytorch (line 25) | def _init_dist_pytorch(backend, **kwargs):
  function _init_dist_mpi (line 33) | def _init_dist_mpi(backend, **kwargs):
  function _init_dist_slurm (line 37) | def _init_dist_slurm(backend, **kwargs):
  function set_random_seed (line 41) | def set_random_seed(seed):
  function get_root_logger (line 48) | def get_root_logger(log_level=logging.INFO):

FILE: mmdet/apis/inference.py
  function _prepare_data (line 10) | def _prepare_data(img, img_transform, cfg, device):
  function _inference_single (line 26) | def _inference_single(model, img, img_transform, cfg, device):
  function _inference_generator (line 34) | def _inference_generator(model, imgs, img_transform, cfg, device):
  function inference_detector (line 39) | def inference_detector(model, imgs, cfg, device='cuda:0'):
  function show_result (line 51) | def show_result(img, result, dataset='coco', score_thr=0.3):

FILE: mmdet/apis/train.py
  function parse_losses (line 16) | def parse_losses(losses):
  function batch_processor (line 36) | def batch_processor(model, data, train_mode):
  function train_detector (line 46) | def train_detector(model,
  function _dist_train (line 62) | def _dist_train(model, dataset, cfg, validate=False):
  function _non_dist_train (line 99) | def _non_dist_train(model, dataset, cfg, validate=False):

FILE: mmdet/core/anchor/anchor_generator.py
  class AnchorGenerator (line 4) | class AnchorGenerator(object):
    method __init__ (line 6) | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=No...
    method num_base_anchors (line 15) | def num_base_anchors(self):
    method gen_base_anchors (line 18) | def gen_base_anchors(self):
    method _meshgrid (line 45) | def _meshgrid(self, x, y, row_major=True):
    method grid_anchors (line 53) | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
    method valid_flags (line 72) | def valid_flags(self, featmap_size, valid_size, device='cuda'):

FILE: mmdet/core/anchor/anchor_target.py
  function anchor_target (line 7) | def anchor_target(anchor_list,
  function images_to_levels (line 75) | def images_to_levels(target, num_level_anchors):
  function anchor_target_single (line 90) | def anchor_target_single(flat_anchors,
  function expand_binary_labels (line 160) | def expand_binary_labels(labels, label_weights, label_channels):
  function anchor_inside_flags (line 170) | def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
  function unmap (line 184) | def unmap(data, count, inds, fill=0):

FILE: mmdet/core/bbox/assign_sampling.py
  function build_assigner (line 6) | def build_assigner(cfg, **kwargs):
  function build_sampler (line 17) | def build_sampler(cfg, **kwargs):
  function assign_and_sample (line 28) | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):

FILE: mmdet/core/bbox/assigners/assign_result.py
  class AssignResult (line 4) | class AssignResult(object):
    method __init__ (line 6) | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
    method add_gt_ (line 12) | def add_gt_(self, gt_labels):

FILE: mmdet/core/bbox/assigners/base_assigner.py
  class BaseAssigner (line 4) | class BaseAssigner(metaclass=ABCMeta):
    method assign (line 7) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...

FILE: mmdet/core/bbox/assigners/max_iou_assigner.py
  class MaxIoUAssigner (line 8) | class MaxIoUAssigner(BaseAssigner):
    method __init__ (line 31) | def __init__(self,
    method assign (line 43) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...
    method assign_wrt_overlaps (line 87) | def assign_wrt_overlaps(self, overlaps, gt_labels=None):

FILE: mmdet/core/bbox/bbox_target.py
  function bbox_target (line 7) | def bbox_target(pos_bboxes_list,
  function bbox_target_single (line 35) | def bbox_target_single(pos_bboxes,
  function expand_target (line 67) | def expand_target(bbox_targets, bbox_weights, labels, num_classes):

FILE: mmdet/core/bbox/geometry.py
  function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):

FILE: mmdet/core/bbox/samplers/base_sampler.py
  class BaseSampler (line 8) | class BaseSampler(metaclass=ABCMeta):
    method __init__ (line 10) | def __init__(self,
    method _sample_pos (line 24) | def _sample_pos(self, assign_result, num_expected, **kwargs):
    method _sample_neg (line 28) | def _sample_neg(self, assign_result, num_expected, **kwargs):
    method sample (line 31) | def sample(self,

FILE: mmdet/core/bbox/samplers/combined_sampler.py
  class CombinedSampler (line 5) | class CombinedSampler(BaseSampler):
    method __init__ (line 7) | def __init__(self, pos_sampler, neg_sampler, **kwargs):
    method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
    method _sample_neg (line 15) | def _sample_neg(self, **kwargs):

FILE: mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
  class InstanceBalancedPosSampler (line 7) | class InstanceBalancedPosSampler(RandomSampler):
    method _sample_pos (line 9) | def _sample_pos(self, assign_result, num_expected, **kwargs):

FILE: mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
  class IoUBalancedNegSampler (line 7) | class IoUBalancedNegSampler(RandomSampler):
    method __init__ (line 9) | def __init__(self,
    method _sample_neg (line 22) | def _sample_neg(self, assign_result, num_expected, **kwargs):

FILE: mmdet/core/bbox/samplers/ohem_sampler.py
  class OHEMSampler (line 7) | class OHEMSampler(BaseSampler):
    method __init__ (line 9) | def __init__(self,
    method hard_mining (line 21) | def hard_mining(self, inds, num_expected, bboxes, labels, feats):
    method _sample_pos (line 38) | def _sample_pos(self,
    method _sample_neg (line 54) | def _sample_neg(self,

FILE: mmdet/core/bbox/samplers/pseudo_sampler.py
  class PseudoSampler (line 7) | class PseudoSampler(BaseSampler):
    method __init__ (line 9) | def __init__(self, **kwargs):
    method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
    method _sample_neg (line 15) | def _sample_neg(self, **kwargs):
    method sample (line 18) | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):

FILE: mmdet/core/bbox/samplers/random_sampler.py
  class RandomSampler (line 7) | class RandomSampler(BaseSampler):
    method __init__ (line 9) | def __init__(self,
    method random_choice (line 19) | def random_choice(gallery, num):
    method _sample_pos (line 35) | def _sample_pos(self, assign_result, num_expected, **kwargs):
    method _sample_neg (line 45) | def _sample_neg(self, assign_result, num_expected, **kwargs):

FILE: mmdet/core/bbox/samplers/random_sampler_fixnum.py
  class RandomSamplerFixnum (line 8) | class RandomSamplerFixnum(BaseSampler):
    method __init__ (line 10) | def __init__(self,
    method random_choice (line 20) | def random_choice(gallery, num):
    method _sample_pos (line 46) | def _sample_pos(self, assign_result, num_expected, **kwargs):
    method _sample_neg (line 71) | def _sample_neg(self, assign_result, num_expected, **kwargs):
    method sample (line 89) | def sample(self,

FILE: mmdet/core/bbox/samplers/sampling_result.py
  class SamplingResult (line 4) | class SamplingResult(object):
    method __init__ (line 6) | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
    method bboxes (line 23) | def bboxes(self):

FILE: mmdet/core/bbox/transforms.py
  function bbox2delta (line 6) | def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
  function delta2bbox (line 34) | def delta2bbox(rois,
  function bbox_flip (line 71) | def bbox_flip(bboxes, img_shape):
  function bbox_mapping (line 91) | def bbox_mapping(bboxes, img_shape, scale_factor, flip):
  function bbox_mapping_back (line 99) | def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
  function bbox2roi (line 106) | def bbox2roi(bbox_list):
  function roi2bbox (line 128) | def roi2bbox(rois):
  function bbox2result (line 138) | def bbox2result(bboxes, labels, num_classes):

FILE: mmdet/core/evaluation/bbox_overlaps.py
  function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):

FILE: mmdet/core/evaluation/class_names.py
  function voc_classes (line 4) | def voc_classes():
  function imagenet_det_classes (line 12) | def imagenet_det_classes():
  function imagenet_vid_classes (line 53) | def imagenet_vid_classes():
  function coco_classes (line 63) | def coco_classes():
  function get_classes (line 89) | def get_classes(dataset):

FILE: mmdet/core/evaluation/coco_utils.py
  function coco_eval (line 9) | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
  function fast_eval_recall (line 41) | def fast_eval_recall(results,
  function xyxy2xywh (line 78) | def xyxy2xywh(bbox):
  function proposal2json (line 88) | def proposal2json(dataset, results):
  function det2json (line 103) | def det2json(dataset, results):
  function segm2json (line 120) | def segm2json(dataset, results):
  function results2json (line 140) | def results2json(dataset, results, out_file):

FILE: mmdet/core/evaluation/eval_hooks.py
  class DistEvalHook (line 19) | class DistEvalHook(Hook):
    method __init__ (line 21) | def __init__(self, dataset, interval=1):
    method _barrier (line 34) | def _barrier(self, rank, world_size):
    method before_run (line 52) | def before_run(self, runner):
    method after_run (line 59) | def after_run(self, runner):
    method after_train_epoch (line 63) | def after_train_epoch(self, runner):
    method evaluate (line 102) | def evaluate(self):
  class DistEvalmAPHook (line 106) | class DistEvalmAPHook(DistEvalHook):
    method evaluate (line 108) | def evaluate(self, runner, results):
  class CocoDistEvalRecallHook (line 144) | class CocoDistEvalRecallHook(DistEvalHook):
    method __init__ (line 146) | def __init__(self,
    method evaluate (line 154) | def evaluate(self, runner, results):
  class CocoDistEvalmAPHook (line 164) | class CocoDistEvalmAPHook(DistEvalHook):
    method evaluate (line 166) | def evaluate(self, runner, results):

FILE: mmdet/core/evaluation/mean_ap.py
  function average_precision (line 9) | def average_precision(recalls, precisions, mode='area'):
  function tpfp_imagenet (line 56) | def tpfp_imagenet(det_bboxes,
  function tpfp_default (line 137) | def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=...
  function get_cls_results (line 202) | def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_...
  function eval_map (line 220) | def eval_map(det_results,
  function print_map_summary (line 332) | def print_map_summary(mean_ap, results, dataset=None):

FILE: mmdet/core/evaluation/recall.py
  function _recalls (line 7) | def _recalls(all_ious, proposal_nums, thrs):
  function set_recall_param (line 40) | def set_recall_param(proposal_nums, iou_thrs):
  function eval_recalls (line 62) | def eval_recalls(gts,
  function print_recall_summary (line 105) | def print_recall_summary(recalls,
  function plot_num_recall (line 138) | def plot_num_recall(recalls, proposal_nums):
  function plot_iou_recall (line 163) | def plot_iou_recall(recalls, iou_thrs):

FILE: mmdet/core/loss/losses.py
  function weighted_nll_loss (line 6) | def weighted_nll_loss(pred, label, weight, avg_factor=None):
  function weighted_cross_entropy (line 13) | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=...
  function weighted_binary_cross_entropy (line 23) | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
  function sigmoid_focal_loss (line 31) | def sigmoid_focal_loss(pred,
  function weighted_sigmoid_focal_loss (line 46) | def weighted_sigmoid_focal_loss(pred,
  function mask_cross_entropy (line 60) | def mask_cross_entropy(pred, target, label):
  function smooth_l1_loss (line 68) | def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'):
  function weighted_smoothl1 (line 84) | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
  function accuracy (line 91) | def accuracy(pred, target, topk=1):

FILE: mmdet/core/mask/mask_target.py
  function mask_target (line 6) | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_...
  function mask_target_single (line 15) | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):

FILE: mmdet/core/mask/utils.py
  function split_combined_polys (line 4) | def split_combined_polys(polys, poly_lens, polys_per_mask):

FILE: mmdet/core/post_processing/bbox_nms.py
  function multiclass_nms (line 6) | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_n...

FILE: mmdet/core/post_processing/merge_augs.py
  function merge_aug_proposals (line 9) | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
  function merge_aug_bboxes (line 41) | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
  function merge_aug_scores (line 68) | def merge_aug_scores(aug_scores):
  function merge_aug_masks (line 76) | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):

FILE: mmdet/core/utils/dist_utils.py
  function _allreduce_coalesced (line 9) | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
  function allreduce_grads (line 31) | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
  class DistOptimizerHook (line 44) | class DistOptimizerHook(OptimizerHook):
    method __init__ (line 46) | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
    method after_train_iter (line 51) | def after_train_iter(self, runner):

FILE: mmdet/core/utils/misc.py
  function tensor2imgs (line 8) | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
  function multi_apply (line 21) | def multi_apply(func, *args, **kwargs):
  function unmap (line 27) | def unmap(data, count, inds, fill=0):

FILE: mmdet/datasets/coco.py
  class CocoDataset (line 7) | class CocoDataset(CustomDataset):
    method load_annotations (line 24) | def load_annotations(self, ann_file):
    method get_ann_info (line 39) | def get_ann_info(self, idx):
    method _filter_imgs (line 45) | def _filter_imgs(self, min_size=32):
    method _parse_ann_info (line 56) | def _parse_ann_info(self, ann_info, with_mask=True):

FILE: mmdet/datasets/concat_dataset.py
  class ConcatDataset (line 5) | class ConcatDataset(_ConcatDataset):
    method __init__ (line 15) | def __init__(self, datasets):

FILE: mmdet/datasets/custom.py
  class CustomDataset (line 14) | class CustomDataset(Dataset):
    method __init__ (line 38) | def __init__(self,
    method __len__ (line 114) | def __len__(self):
    method load_annotations (line 117) | def load_annotations(self, ann_file):
    method load_proposals (line 120) | def load_proposals(self, proposal_file):
    method get_ann_info (line 123) | def get_ann_info(self, idx):
    method _filter_imgs (line 126) | def _filter_imgs(self, min_size=32):
    method _set_group_flag (line 134) | def _set_group_flag(self):
    method _rand_another (line 146) | def _rand_another(self, idx):
    method __getitem__ (line 150) | def __getitem__(self, idx):
    method prepare_train_img (line 160) | def prepare_train_img(self, idx):
    method prepare_test_img (line 239) | def prepare_test_img(self, idx):

FILE: mmdet/datasets/extra_aug.py
  class PhotoMetricDistortion (line 8) | class PhotoMetricDistortion(object):
    method __init__ (line 10) | def __init__(self,
    method __call__ (line 20) | def __call__(self, img, boxes, labels):
  class Expand (line 67) | class Expand(object):
    method __init__ (line 69) | def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
    method __call__ (line 76) | def __call__(self, img, boxes, labels):
  class RandomCrop (line 92) | class RandomCrop(object):
    method __init__ (line 94) | def __init__(self,
    method __call__ (line 101) | def __call__(self, img, boxes, labels):
  class ExtraAugmentation (line 146) | class ExtraAugmentation(object):
    method __init__ (line 148) | def __init__(self,
    method __call__ (line 161) | def __call__(self, img, boxes, labels):

FILE: mmdet/datasets/loader/build_loader.py
  function build_dataloader (line 15) | def build_dataloader(dataset,

FILE: mmdet/datasets/loader/sampler.py
  class GroupSampler (line 11) | class GroupSampler(Sampler):
    method __init__ (line 13) | def __init__(self, dataset, samples_per_gpu=1):
    method __iter__ (line 24) | def __iter__(self):
    method __len__ (line 47) | def __len__(self):
  class DistributedGroupSampler (line 51) | class DistributedGroupSampler(Sampler):
    method __init__ (line 66) | def __init__(self,
    method __iter__ (line 92) | def __iter__(self):
    method __len__ (line 128) | def __len__(self):
    method set_epoch (line 131) | def set_epoch(self, epoch):

FILE: mmdet/datasets/repeat_dataset.py
  class RepeatDataset (line 4) | class RepeatDataset(object):
    method __init__ (line 6) | def __init__(self, dataset, times):
    method __getitem__ (line 15) | def __getitem__(self, idx):
    method __len__ (line 18) | def __len__(self):

FILE: mmdet/datasets/transforms.py
  class ImageTransform (line 8) | class ImageTransform(object):
    method __init__ (line 18) | def __init__(self,
    method __call__ (line 28) | def __call__(self, img, scale, flip=False, keep_ratio=True):
  function bbox_flip (line 49) | def bbox_flip(bboxes, img_shape):
  class BboxTransform (line 64) | class BboxTransform(object):
    method __init__ (line 72) | def __init__(self, max_num_gts=None):
    method __call__ (line 75) | def __call__(self, bboxes, img_shape, scale_factor, flip=False):
  class MaskTransform (line 90) | class MaskTransform(object):
    method __call__ (line 98) | def __call__(self, masks, pad_shape, scale_factor, flip=False):
  class Numpy2Tensor (line 112) | class Numpy2Tensor(object):
    method __init__ (line 114) | def __init__(self):
    method __call__ (line 117) | def __call__(self, *args):

FILE: mmdet/datasets/utils.py
  function to_tensor (line 15) | def to_tensor(data):
  function random_scale (line 36) | def random_scale(img_scales, mode='range'):
  function show_ann (line 70) | def show_ann(coco, img, ann_info):
  function get_dataset (line 77) | def get_dataset(data_cfg):

FILE: mmdet/datasets/voc.py
  class VOCDataset (line 4) | class VOCDataset(XMLDataset):
    method __init__ (line 11) | def __init__(self, **kwargs):

FILE: mmdet/datasets/xml_style.py
  class XMLDataset (line 10) | class XMLDataset(CustomDataset):
    method __init__ (line 12) | def __init__(self, **kwargs):
    method load_annotations (line 16) | def load_annotations(self, ann_file):
    method get_ann_info (line 32) | def get_ann_info(self, idx):

FILE: mmdet/models/anchor_heads/anchor_head.py
  class AnchorHead (line 16) | class AnchorHead(nn.Module):
    method __init__ (line 33) | def __init__(self,
    method _init_layers (line 72) | def _init_layers(self):
    method init_weights (line 77) | def init_weights(self):
    method forward_single (line 81) | def forward_single(self, x):
    method forward (line 86) | def forward(self, feats):
    method get_anchors (line 89) | def get_anchors(self, featmap_sizes, img_metas):
    method loss_single (line 128) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
    method loss (line 172) | def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
    method get_bboxes (line 210) | def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,
    method get_bboxes_single (line 236) | def get_bboxes_single(self,

FILE: mmdet/models/anchor_heads/retina_head.py
  class RetinaHead (line 11) | class RetinaHead(AnchorHead):
    method __init__ (line 13) | def __init__(self,
    method _init_layers (line 34) | def _init_layers(self):
    method init_weights (line 52) | def init_weights(self):
    method forward_single (line 61) | def forward_single(self, x):

FILE: mmdet/models/anchor_heads/rpn_head.py
  class RPNHead (line 13) | class RPNHead(AnchorHead):
    method __init__ (line 15) | def __init__(self, in_channels, **kwargs):
    method _init_layers (line 18) | def _init_layers(self):
    method init_weights (line 25) | def init_weights(self):
    method forward_single (line 30) | def forward_single(self, x):
    method loss (line 37) | def loss(self, cls_scores, bbox_preds, gt_bboxes, img_metas, cfg):
    method get_bboxes_single (line 43) | def get_bboxes_single(self,

FILE: mmdet/models/anchor_heads/ssd_head.py
  class SSDHead (line 14) | class SSDHead(AnchorHead):
    method __init__ (line 16) | def __init__(self,
    method init_weights (line 95) | def init_weights(self):
    method forward (line 100) | def forward(self, feats):
    method loss_single (line 109) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
    method loss (line 133) | def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,

FILE: mmdet/models/backbones/resnet.py
  function conv3x3 (line 14) | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
  class BasicBlock (line 26) | class BasicBlock(nn.Module):
    method __init__ (line 29) | def __init__(self,
    method norm1 (line 55) | def norm1(self):
    method norm2 (line 59) | def norm2(self):
    method forward (line 62) | def forward(self, x):
  class Bottleneck (line 81) | class Bottleneck(nn.Module):
    method __init__ (line 84) | def __init__(self,
    method norm1 (line 176) | def norm1(self):
    method norm2 (line 180) | def norm2(self):
    method norm3 (line 184) | def norm3(self):
    method forward (line 187) | def forward(self, x):
  function make_res_layer (line 229) | def make_res_layer(block,
  class ResNet (line 280) | class ResNet(nn.Module):
    method __init__ (line 312) | def __init__(self,
    method norm1 (line 380) | def norm1(self):
    method _make_stem_layer (line 383) | def _make_stem_layer(self):
    method _freeze_stages (line 392) | def _freeze_stages(self):
    method init_weights (line 403) | def init_weights(self, pretrained=None):
    method forward (line 429) | def forward(self, x):
    method train (line 445) | def train(self, mode=True):

FILE: mmdet/models/backbones/resnext.py
  class Bottleneck (line 12) | class Bottleneck(_Bottleneck):
    method __init__ (line 14) | def __init__(self, *args, groups=1, base_width=4, **kwargs):
  function make_res_layer (line 87) | def make_res_layer(block,
  class ResNeXt (line 144) | class ResNeXt(ResNet):
    method __init__ (line 176) | def __init__(self, groups=1, base_width=4, **kwargs):

FILE: mmdet/models/backbones/ssd_vgg.py
  class SSDVGG (line 13) | class SSDVGG(VGG):
    method __init__ (line 19) | def __init__(self,
    method init_weights (line 55) | def init_weights(self, pretrained=None):
    method forward (line 76) | def forward(self, x):
    method _make_extra_layers (line 92) | def _make_extra_layers(self, outplanes):
  class L2Norm (line 119) | class L2Norm(nn.Module):
    method __init__ (line 121) | def __init__(self, n_dims, scale=20., eps=1e-10):
    method forward (line 128) | def forward(self, x):

FILE: mmdet/models/bbox_heads/bbox_head.py
  class BBoxHead (line 11) | class BBoxHead(nn.Module):
    method __init__ (line 15) | def __init__(self,
    method init_weights (line 49) | def init_weights(self):
    method forward (line 57) | def forward(self, x):
    method get_target (line 65) | def get_target(self, sampling_results, gt_bboxes, gt_labels,
    method loss (line 83) | def loss(self,
    method get_det_bboxes (line 104) | def get_det_bboxes(self,
    method refine_bboxes (line 134) | def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
    method regress_by_class (line 174) | def regress_by_class(self, rois, label, bbox_pred, img_meta):

FILE: mmdet/models/bbox_heads/convfc_bbox_head.py
  class ConvFCBBoxHead (line 9) | class ConvFCBBoxHead(BBoxHead):
    method __init__ (line 18) | def __init__(self,
    method _add_conv_fc_branch (line 82) | def _add_conv_fc_branch(self,
    method init_weights (line 123) | def init_weights(self):
    method forward (line 131) | def forward(self, x):
  class SharedFCBBoxHead (line 171) | class SharedFCBBoxHead(ConvFCBBoxHead):
    method __init__ (line 173) | def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):

FILE: mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py
  class ConvFCRoIHeadEnhance (line 8) | class ConvFCRoIHeadEnhance(BBoxHead):
    method __init__ (line 17) | def __init__(self,
    method _add_conv_fc_branch (line 85) | def _add_conv_fc_branch(self,
    method init_weights (line 126) | def init_weights(self):
    method forward (line 134) | def forward(self, x, enhanced_feature=None):

FILE: mmdet/models/bbox_heads/graph_bbox_head.py
  class GraphBBoxHead (line 10) | class GraphBBoxHead(BBoxHead):
    method __init__ (line 19) | def __init__(self,
    method _add_conv_fc_branch (line 87) | def _add_conv_fc_branch(self,
    method init_weights (line 131) | def init_weights(self):
    method forward (line 140) | def forward(self, x, geom_f, bs):
    method loss (line 211) | def loss(self, cls_score, bbox_pred, A_pred, A_gt, labels, label_weigh...
    method propagate_em (line 232) | def propagate_em(self, x, A, W):

FILE: mmdet/models/builder.py
  function _build_module (line 7) | def _build_module(cfg, registry, default_args):
  function build (line 26) | def build(cfg, registry, default_args=None):
  function build_backbone (line 34) | def build_backbone(cfg):
  function build_neck (line 38) | def build_neck(cfg):
  function build_roi_extractor (line 42) | def build_roi_extractor(cfg):
  function build_head (line 46) | def build_head(cfg):
  function build_detector (line 50) | def build_detector(cfg, train_cfg=None, test_cfg=None):

FILE: mmdet/models/detectors/base.py
  class BaseDetector (line 12) | class BaseDetector(nn.Module):
    method __init__ (line 17) | def __init__(self):
    method with_neck (line 21) | def with_neck(self):
    method with_bbox (line 25) | def with_bbox(self):
    method with_mask (line 29) | def with_mask(self):
    method extract_feat (line 33) | def extract_feat(self, imgs):
    method extract_feats (line 36) | def extract_feats(self, imgs):
    method forward_train (line 42) | def forward_train(self, imgs, img_metas, **kwargs):
    method simple_test (line 46) | def simple_test(self, img, img_meta, **kwargs):
    method aug_test (line 50) | def aug_test(self, imgs, img_metas, **kwargs):
    method init_weights (line 53) | def init_weights(self, pretrained=None):
    method forward_test (line 58) | def forward_test(self, imgs, img_metas, **kwargs):
    method forward (line 78) | def forward(self, img, img_meta, return_loss=True, **kwargs):
    method show_result (line 84) | def show_result(self,

FILE: mmdet/models/detectors/cascade_rcnn.py
  class CascadeRCNN (line 15) | class CascadeRCNN(BaseDetector, RPNTestMixin):
    method __init__ (line 17) | def __init__(self,
    method with_rpn (line 80) | def with_rpn(self):
    method init_weights (line 83) | def init_weights(self, pretrained=None):
    method extract_feat (line 102) | def extract_feat(self, img):
    method forward_train (line 108) | def forward_train(self,
    method simple_test (line 190) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
    method aug_test (line 307) | def aug_test(self, img, img_meta, proposals=None, rescale=False):
    method show_result (line 310) | def show_result(self, data, result, img_norm_cfg, **kwargs):

FILE: mmdet/models/detectors/fast_rcnn.py
  class FastRCNN (line 6) | class FastRCNN(TwoStageDetector):
    method __init__ (line 8) | def __init__(self,
    method forward_test (line 29) | def forward_test(self, imgs, img_metas, proposals, **kwargs):

FILE: mmdet/models/detectors/faster_rcnn.py
  class FasterRCNN (line 6) | class FasterRCNN(TwoStageDetector):
    method __init__ (line 8) | def __init__(self,

FILE: mmdet/models/detectors/hkrm_rcnn.py
  class HKRMRCNN (line 14) | class HKRMRCNN(BaseDetector, RPNTestMixin, BBoxTestMixin,
    method __init__ (line 17) | def __init__(self,
    method with_rpn (line 70) | def with_rpn(self):
    method init_weights (line 73) | def init_weights(self, pretrained=None):
    method extract_feat (line 90) | def extract_feat(self, img):
    method forward_train (line 96) | def forward_train(self,
    method simple_test (line 217) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
    method aug_test (line 238) | def aug_test(self, imgs, img_metas, rescale=False):
    method simple_test_bboxes_hkrm (line 267) | def simple_test_bboxes_hkrm(self,
    method aug_test_bboxes_hkrm (line 305) | def aug_test_bboxes_hkrm(self, feats, img_metas, proposal_list, rcnn_t...

FILE: mmdet/models/detectors/mask_rcnn.py
  class MaskRCNN (line 6) | class MaskRCNN(TwoStageDetector):
    method __init__ (line 8) | def __init__(self,

FILE: mmdet/models/detectors/reasoning_rcnn.py
  class ReasoningRCNN (line 19) | class ReasoningRCNN(BaseDetector, RPNTestMixin):
    method __init__ (line 21) | def __init__(self,
    method with_rpn (line 131) | def with_rpn(self):
    method init_weights (line 134) | def init_weights(self, pretrained=None):
    method extract_feat (line 154) | def extract_feat(self, img):
    method forward_upper_neck (line 160) | def forward_upper_neck(self, x, stage):
    method forward_train (line 167) | def forward_train(self,
    method simple_test (line 299) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
    method aug_test (line 469) | def aug_test(self, img, img_meta, proposals=None, rescale=False):
    method show_result (line 472) | def show_result(self, data, result, img_norm_cfg, **kwargs):

FILE: mmdet/models/detectors/retinanet.py
  class RetinaNet (line 6) | class RetinaNet(SingleStageDetector):
    method __init__ (line 8) | def __init__(self,

FILE: mmdet/models/detectors/rpn.py
  class RPN (line 11) | class RPN(BaseDetector, RPNTestMixin):
    method __init__ (line 13) | def __init__(self,
    method init_weights (line 28) | def init_weights(self, pretrained=None):
    method extract_feat (line 35) | def extract_feat(self, img):
    method forward_train (line 41) | def forward_train(self, img, img_meta, gt_bboxes=None):
    method simple_test (line 52) | def simple_test(self, img, img_meta, rescale=False):
    method aug_test (line 61) | def aug_test(self, imgs, img_metas, rescale=False):
    method show_result (line 74) | def show_result(self, data, result, img_norm_cfg):

FILE: mmdet/models/detectors/sgrn.py
  class ThreeStageGraphDetector (line 13) | class ThreeStageGraphDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
    method __init__ (line 16) | def __init__(self,
    method with_rpn (line 83) | def with_rpn(self):
    method init_weights (line 86) | def init_weights(self, pretrained=None):
    method extract_feat (line 103) | def extract_feat(self, img):
    method forward_train (line 109) | def forward_train(self,
    method simple_test (line 299) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
    method simple_test_bboxes_ms (line 330) | def simple_test_bboxes_ms(self,
    method aug_test (line 472) | def aug_test(self, imgs, img_metas, rescale=False):
    method _compute_pseudo (line 501) | def _compute_pseudo(self, bb_centre):
    method _create_neighbourhood (line 529) | def _create_neighbourhood(self,
    method _create_neighbourhood_feat (line 572) | def _create_neighbourhood_feat(self, image, top_ind):
    method _create_neighbourhood_pseudo (line 590) | def _create_neighbourhood_pseudo(self, pseudo, top_ind):
  class GraphLearner (line 607) | class GraphLearner(nn.Module):
    method __init__ (line 608) | def __init__(self, in_feature_dim, combined_feature_dim, dropout=0.5):
    method forward (line 632) | def forward(self, graph_nodes):
  class NeighbourhoodGraphConvolution (line 658) | class NeighbourhoodGraphConvolution(Module):
    method __init__ (line 664) | def __init__(self,
    method init_parameters (line 699) | def init_parameters(self):
    method forward (line 706) | def forward(self, neighbourhood_features, neighbourhood_pseudo_coord):
    method get_gaussian_weights (line 733) | def get_gaussian_weights(self, pseudo_coord):
    method convolution (line 760) | def convolution(self, neighbourhood, weights):

FILE: mmdet/models/detectors/single_stage.py
  class SingleStageDetector (line 10) | class SingleStageDetector(BaseDetector):
    method __init__ (line 12) | def __init__(self,
    method init_weights (line 28) | def init_weights(self, pretrained=None):
    method extract_feat (line 39) | def extract_feat(self, img):
    method forward_train (line 45) | def forward_train(self, img, img_metas, gt_bboxes, gt_labels):
    method simple_test (line 52) | def simple_test(self, img, img_meta, rescale=False):
    method aug_test (line 63) | def aug_test(self, imgs, img_metas, rescale=False):

FILE: mmdet/models/detectors/test_mixins.py
  class RPNTestMixin (line 5) | class RPNTestMixin(object):
    method simple_test_rpn (line 7) | def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
    method aug_test_rpn (line 13) | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
  class BBoxTestMixin (line 28) | class BBoxTestMixin(object):
    method simple_test_bboxes (line 30) | def simple_test_bboxes(self,
    method aug_test_bboxes (line 53) | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_c...
  class MaskTestMixin (line 88) | class MaskTestMixin(object):
    method simple_test_mask (line 90) | def simple_test_mask(self,
    method aug_test_mask (line 115) | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):

FILE: mmdet/models/detectors/two_stage.py
  class TwoStageDetector (line 12) | class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
    method __init__ (line 15) | def __init__(self,
    method with_rpn (line 53) | def with_rpn(self):
    method init_weights (line 56) | def init_weights(self, pretrained=None):
    method extract_feat (line 74) | def extract_feat(self, img):
    method forward_train (line 80) | def forward_train(self,
    method simple_test (line 155) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
    method aug_test (line 176) | def aug_test(self, imgs, img_metas, rescale=False):

FILE: mmdet/models/mask_heads/fcn_mask_head.py
  class FCNMaskHead (line 13) | class FCNMaskHead(nn.Module):
    method __init__ (line 15) | def __init__(self,
    method init_weights (line 73) | def init_weights(self):
    method forward (line 81) | def forward(self, x):
    method get_target (line 91) | def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
    method loss (line 100) | def loss(self, mask_pred, mask_targets, labels):
    method get_seg_masks (line 110) | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,

FILE: mmdet/models/necks/fpn.py
  class FPN (line 10) | class FPN(nn.Module):
    method __init__ (line 12) | def __init__(self,
    method init_weights (line 90) | def init_weights(self):
    method forward (line 95) | def forward(self, inputs):

FILE: mmdet/models/registry.py
  class Registry (line 4) | class Registry(object):
    method __init__ (line 6) | def __init__(self, name):
    method name (line 11) | def name(self):
    method module_dict (line 15) | def module_dict(self):
    method _register_module (line 18) | def _register_module(self, module_class):
    method register_module (line 34) | def register_module(self, cls):

FILE: mmdet/models/roi_extractors/single_level.py
  class SingleRoIExtractor (line 11) | class SingleRoIExtractor(nn.Module):
    method __init__ (line 24) | def __init__(self,
    method num_inputs (line 36) | def num_inputs(self):
    method init_weights (line 40) | def init_weights(self):
    method build_roi_layers (line 43) | def build_roi_layers(self, layer_cfg, featmap_strides):
    method map_roi_levels (line 52) | def map_roi_levels(self, rois, num_levels):
    method forward (line 73) | def forward(self, feats, rois):

FILE: mmdet/models/utils/conv_module.py
  class ConvModule (line 9) | class ConvModule(nn.Module):
    method __init__ (line 11) | def __init__(self,
    method norm (line 68) | def norm(self):
    method init_weights (line 71) | def init_weights(self):
    method forward (line 77) | def forward(self, x, activate=True, norm=True):

FILE: mmdet/models/utils/norm.py
  function build_norm_layer (line 13) | def build_norm_layer(cfg, num_features, postfix=''):

FILE: mmdet/models/utils/weight_init.py
  function xavier_init (line 5) | def xavier_init(module, gain=1, bias=0, distribution='normal'):
  function normal_init (line 15) | def normal_init(module, mean=0, std=1, bias=0):
  function uniform_init (line 21) | def uniform_init(module, a=0, b=1, bias=0):
  function kaiming_init (line 27) | def kaiming_init(module,
  function bias_init_with_prob (line 43) | def bias_init_with_prob(prior_prob):

FILE: mmdet/ops/dcn/functions/deform_conv.py
  class DeformConvFunction (line 8) | class DeformConvFunction(Function):
    method forward (line 11) | def forward(ctx,
    method backward (line 55) | def backward(ctx, grad_output):
    method _output_size (line 92) | def _output_size(input, weight, padding, dilation, stride):
  class ModulatedDeformConvFunction (line 108) | class ModulatedDeformConvFunction(Function):
    method forward (line 111) | def forward(ctx,
    method backward (line 146) | def backward(ctx, grad_output):
    method _infer_shape (line 168) | def _infer_shape(ctx, input, weight):

FILE: mmdet/ops/dcn/functions/deform_pool.py
  class DeformRoIPoolingFunction (line 7) | class DeformRoIPoolingFunction(Function):
    method forward (line 10) | def forward(ctx,
    method backward (line 50) | def backward(ctx, grad_output):

FILE: mmdet/ops/dcn/modules/deform_conv.py
  class DeformConv (line 10) | class DeformConv(nn.Module):
    method __init__ (line 12) | def __init__(self,
    method reset_parameters (line 46) | def reset_parameters(self):
    method forward (line 53) | def forward(self, input, offset):
  class ModulatedDeformConv (line 59) | class ModulatedDeformConv(nn.Module):
    method __init__ (line 61) | def __init__(self,
    method reset_parameters (line 91) | def reset_parameters(self):
    method forward (line 100) | def forward(self, input, offset, mask):
  class ModulatedDeformConvPack (line 106) | class ModulatedDeformConvPack(ModulatedDeformConv):
    method __init__ (line 108) | def __init__(self,
    method init_offset (line 132) | def init_offset(self):
    method forward (line 136) | def forward(self, input):

FILE: mmdet/ops/dcn/modules/deform_pool.py
  class DeformRoIPooling (line 6) | class DeformRoIPooling(nn.Module):
    method __init__ (line 8) | def __init__(self,
    method forward (line 27) | def forward(self, data, rois, offset):
  class DeformRoIPoolingPack (line 36) | class DeformRoIPoolingPack(DeformRoIPooling):
    method __init__ (line 38) | def __init__(self,
    method forward (line 66) | def forward(self, data, rois):
  class ModulatedDeformRoIPoolingPack (line 89) | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
    method __init__ (line 91) | def __init__(self,
    method forward (line 128) | def forward(self, data, rois):

FILE: mmdet/ops/dcn/src/deform_conv_cuda.cpp
  function shape_check (line 62) | void shape_check(at::Tensor input, at::Tensor offset,
  function deform_conv_forward_cuda (line 153) | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
  function deform_conv_backward_input_cuda (line 249) | int deform_conv_backward_input_cuda(
  function deform_conv_backward_parameters_cuda (line 347) | int deform_conv_backward_parameters_cuda(
  function modulated_deform_conv_cuda_forward (line 446) | void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor wei...
  function modulated_deform_conv_cuda_backward (line 520) | void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor we...
  function PYBIND11_MODULE (line 623) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)

FILE: mmdet/ops/dcn/src/deform_pool_cuda.cpp
  function deform_psroi_pooling_cuda_forward (line 53) | void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
  function deform_psroi_pooling_cuda_backward (line 92) | void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
  function PYBIND11_MODULE (line 138) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)

FILE: mmdet/ops/nms/nms_wrapper.py
  function nms (line 9) | def nms(dets, iou_thr, device_id=None):
  function soft_nms (line 37) | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):

FILE: mmdet/ops/nms/setup.py
  function customize_compiler_for_nvcc (line 25) | def customize_compiler_for_nvcc(self):
  class custom_build_ext (line 63) | class custom_build_ext(build_ext):
    method build_extensions (line 65) | def build_extensions(self):

FILE: mmdet/ops/roi_align/functions/roi_align.py
  class RoIAlignFunction (line 6) | class RoIAlignFunction(Function):
    method forward (line 9) | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
    method backward (line 39) | def backward(ctx, grad_output):

FILE: mmdet/ops/roi_align/modules/roi_align.py
  class RoIAlign (line 5) | class RoIAlign(Module):
    method __init__ (line 7) | def __init__(self, out_size, spatial_scale, sample_num=0):
    method forward (line 14) | def forward(self, features, rois):

FILE: mmdet/ops/roi_align/src/roi_align_cuda.cpp
  function roi_align_forward_cuda (line 27) | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
  function roi_align_backward_cuda (line 55) | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
  function PYBIND11_MODULE (line 82) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: mmdet/ops/roi_pool/functions/roi_pool.py
  class RoIPoolFunction (line 7) | class RoIPoolFunction(Function):
    method forward (line 10) | def forward(ctx, features, rois, out_size, spatial_scale):
    method backward (line 39) | def backward(ctx, grad_output):

FILE: mmdet/ops/roi_pool/modules/roi_pool.py
  class RoIPool (line 5) | class RoIPool(Module):
    method __init__ (line 7) | def __init__(self, out_size, spatial_scale):
    method forward (line 13) | def forward(self, features, rois):

FILE: mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
  function roi_pooling_forward_cuda (line 26) | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
  function roi_pooling_backward_cuda (line 54) | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
  function PYBIND11_MODULE (line 83) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: setup.py
  function readme (line 7) | def readme():
  function get_git_hash (line 22) | def get_git_hash():
  function get_hash (line 48) | def get_hash():
  function write_version_py (line 63) | def write_version_py():
  function get_version (line 77) | def get_version():

FILE: tools/coco_eval.py
  function main (line 6) | def main():

FILE: tools/convert_datasets/pascal_voc.py
  function parse_xml (line 13) | def parse_xml(args):
  function cvt_annotations (line 67) | def cvt_annotations(devkit_path, years, split, out_file):
  function parse_args (line 94) | def parse_args():
  function main (line 103) | def main():

FILE: tools/test.py
  function single_test (line 14) | def single_test(model, data_loader, show=False):
  function _data_func (line 35) | def _data_func(data, device_id):
  function parse_args (line 40) | def parse_args():
  function main (line 63) | def main():

FILE: tools/train.py
  function parse_args (line 14) | def parse_args():
  function main (line 42) | def main():

FILE: tools/voc_eval.py
  function voc_eval (line 10) | def voc_eval(result_file, dataset, iou_thr=0.5):
  function main (line 46) | def main():
Condensed preview — 179 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (705K chars).
[
  {
    "path": ".gitignore",
    "chars": 1300,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\ntools/work_dirs/\n\n# C extensions\n*.so\n\n# Distr"
  },
  {
    "path": ".travis.yml",
    "chars": 111,
    "preview": "dist: trusty\nlanguage: python\n\ninstall:\n  - pip install flake8\n\npython:\n  - \"3.5\"\n  - \"3.6\"\n\nscript:\n  - flake8"
  },
  {
    "path": "INSTALL.md",
    "chars": 1464,
    "preview": "## Installation\n\n### Requirements\n\n- Linux (tested on Ubuntu 16.04 and CentOS 7.2)\n- Python 3.4+\n- PyTorch 0.4.1\n- Cytho"
  },
  {
    "path": "LICENSE",
    "chars": 11357,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "MODEL_ZOO.md",
    "chars": 31264,
    "preview": "# Benchmark and Model Zoo\n\n## Environment\n\n### Hardware\n\n- 8 NVIDIA Tesla V100 GPUs\n- Intel Xeon 4114 CPU @ 2.20GHz\n\n###"
  },
  {
    "path": "README.md",
    "chars": 525,
    "preview": "# Environments\n- pytorch 0.3.0/0.4.1\n- oldest mmdetection\n\n\n# Reasoning-RCNN\nReasoning-RCNN: Unifying Adaptive Global Re"
  },
  {
    "path": "TECHNICAL_DETAILS.md",
    "chars": 3594,
    "preview": "## Overview\n\nIn this section, we will introduce the main units of training a detector:\ndata loading, model and iteration"
  },
  {
    "path": "compile.sh",
    "chars": 495,
    "preview": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\necho \"Building roi align op...\"\ncd mmdet/ops/roi_align\nif [ -d \"build\" "
  },
  {
    "path": "configs/ade_faster_rcnn_r101_fpn_1x.py",
    "chars": 4405,
    "preview": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        "
  },
  {
    "path": "configs/coco_faster_rcnn_r101_fpn_1x.py",
    "chars": 4528,
    "preview": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        "
  },
  {
    "path": "configs/coco_sgrb_fpn_ms.py",
    "chars": 5173,
    "preview": "# model settings\nmodel = dict(\n    type='ThreeStageGraphDetector',\n    pretrained='modelzoo://resnet101',\n    backbone=d"
  },
  {
    "path": "configs/hkrm/ade_faster_rcnn_r50_fpn_1x.py",
    "chars": 4702,
    "preview": "# model settings\nmodel = dict(\n    type='HKRMRCNN',\n    pretrained='modelzoo://resnet50',\n    adja_gt='/home/cyan/code/m"
  },
  {
    "path": "configs/hkrm/coco_faster_rcnn_r101_fpn_1x.py",
    "chars": 4719,
    "preview": "# model settings\nmodel = dict(\n    type='HKRMRCNN',\n    adja_gt= './graph/new_ade_graph_a.pkl',\n    adjr_gt= './graph/ne"
  },
  {
    "path": "configs/hkrm/vg_faster_rcnn_r101_fpn_1x.py",
    "chars": 4646,
    "preview": "# model settings\nmodel = dict(\n    type='HKRMRCNN',\n    #pretrained='modelzoo://resnet101',\n    adja_gt='./graph/new_vg_"
  },
  {
    "path": "configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py",
    "chars": 4721,
    "preview": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        t"
  },
  {
    "path": "configs/pascal_voc/ssd300_voc.py",
    "chars": 4023,
    "preview": "# model settings\ninput_size = 300\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe"
  },
  {
    "path": "configs/pascal_voc/ssd512_voc.py",
    "chars": 4042,
    "preview": "# model settings\ninput_size = 512\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe"
  },
  {
    "path": "configs/rrcnn/ade_reasoning_rcnn_r101_fpn_1x.py",
    "chars": 5336,
    "preview": "# model settings\nmodel = dict(\n    type='ReasoningRCNN',\n    num_stages=2,\n    adj_gt='./graph/new_ade_graph_r.pkl',\n   "
  },
  {
    "path": "configs/rrcnn/coco_reasoning_rcnn_r101_fpn_1x.py",
    "chars": 5496,
    "preview": "# model settings\nmodel = dict(\n    type='ReasoningRCNN',\n    num_stages=2,\n    adj_gt='./graph/new_COCO_graph_r.pkl', # "
  },
  {
    "path": "configs/rrcnn/vg_reasoning_rcnn_r101_fpn_1x.py",
    "chars": 5421,
    "preview": "# model settings\nmodel = dict(\n    type='ReasoningRCNN',\n    num_stages=2,\n    adj_gt='./graph/new_COCO_graph_r.pkl',\n  "
  },
  {
    "path": "configs/vg_faster_rcnn_r101_fpn_1x.py",
    "chars": 4412,
    "preview": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        "
  },
  {
    "path": "configs/vgbig_faster_rcnn_r101_fpn_1x.py",
    "chars": 4442,
    "preview": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        "
  },
  {
    "path": "mmdet/__init__.py",
    "chars": 92,
    "preview": "from .version import __version__, short_version\n\n__all__ = ['__version__', 'short_version']\n"
  },
  {
    "path": "mmdet/apis/__init__.py",
    "chars": 278,
    "preview": "from .env import init_dist, get_root_logger, set_random_seed\nfrom .train import train_detector\nfrom .inference import in"
  },
  {
    "path": "mmdet/apis/env.py",
    "chars": 1514,
    "preview": "import logging\nimport os\nimport random\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nimport torch.mu"
  },
  {
    "path": "mmdet/apis/inference.py",
    "chars": 1930,
    "preview": "import mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.datasets import to_tensor\nfrom mmdet.datasets.transforms import "
  },
  {
    "path": "mmdet/apis/train.py",
    "chars": 3964,
    "preview": "from __future__ import division\n\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv.runner import Runner, DistS"
  },
  {
    "path": "mmdet/core/__init__.py",
    "chars": 300,
    "preview": "from .anchor import *  # noqa: F401, F403\nfrom .bbox import *  # noqa: F401, F403\nfrom .mask import *  # noqa: F401, F40"
  },
  {
    "path": "mmdet/core/anchor/__init__.py",
    "chars": 135,
    "preview": "from .anchor_generator import AnchorGenerator\nfrom .anchor_target import anchor_target\n\n__all__ = ['AnchorGenerator', 'a"
  },
  {
    "path": "mmdet/core/anchor/anchor_generator.py",
    "chars": 3117,
    "preview": "import torch\n\n\nclass AnchorGenerator(object):\n\n    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=N"
  },
  {
    "path": "mmdet/core/anchor/anchor_target.py",
    "chars": 7498,
    "preview": "import torch\n\nfrom ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta\nfrom ..utils import multi_"
  },
  {
    "path": "mmdet/core/bbox/__init__.py",
    "chars": 1046,
    "preview": "from .geometry import bbox_overlaps\nfrom .assigners import BaseAssigner, MaxIoUAssigner, AssignResult\nfrom .samplers imp"
  },
  {
    "path": "mmdet/core/bbox/assign_sampling.py",
    "chars": 1185,
    "preview": "import mmcv\n\nfrom . import assigners, samplers\n\n\ndef build_assigner(cfg, **kwargs):\n    if isinstance(cfg, assigners.Bas"
  },
  {
    "path": "mmdet/core/bbox/assigners/__init__.py",
    "chars": 187,
    "preview": "from .base_assigner import BaseAssigner\nfrom .max_iou_assigner import MaxIoUAssigner\nfrom .assign_result import AssignRe"
  },
  {
    "path": "mmdet/core/bbox/assigners/assign_result.py",
    "chars": 664,
    "preview": "import torch\n\n\nclass AssignResult(object):\n\n    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):\n       "
  },
  {
    "path": "mmdet/core/bbox/assigners/base_assigner.py",
    "chars": 195,
    "preview": "from abc import ABCMeta, abstractmethod\n\n\nclass BaseAssigner(metaclass=ABCMeta):\n\n    @abstractmethod\n    def assign(sel"
  },
  {
    "path": "mmdet/core/bbox/assigners/max_iou_assigner.py",
    "chars": 6126,
    "preview": "import torch\n\nfrom .base_assigner import BaseAssigner\nfrom .assign_result import AssignResult\nfrom ..geometry import bbo"
  },
  {
    "path": "mmdet/core/bbox/bbox_target.py",
    "chars": 2974,
    "preview": "import torch\n\nfrom .transforms import bbox2delta\nfrom ..utils import multi_apply\n\n\ndef bbox_target(pos_bboxes_list,\n    "
  },
  {
    "path": "mmdet/core/bbox/geometry.py",
    "chars": 2163,
    "preview": "import torch\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):\n    \"\"\"Calculate overlap between two s"
  },
  {
    "path": "mmdet/core/bbox/samplers/__init__.py",
    "chars": 640,
    "preview": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSample"
  },
  {
    "path": "mmdet/core/bbox/samplers/base_sampler.py",
    "chars": 2753,
    "preview": "from abc import ABCMeta, abstractmethod\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass BaseSampler(m"
  },
  {
    "path": "mmdet/core/bbox/samplers/combined_sampler.py",
    "chars": 509,
    "preview": "from .base_sampler import BaseSampler\nfrom ..assign_sampling import build_sampler\n\n\nclass CombinedSampler(BaseSampler):\n"
  },
  {
    "path": "mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py",
    "chars": 1765,
    "preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass InstanceBalancedPosSampler(RandomSamp"
  },
  {
    "path": "mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py",
    "chars": 2757,
    "preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass IoUBalancedNegSampler(RandomSampler):"
  },
  {
    "path": "mmdet/core/bbox/samplers/ohem_sampler.py",
    "chars": 2512,
    "preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom ..transforms import bbox2roi\n\n\nclass OHEMSampler(BaseSampler):\n"
  },
  {
    "path": "mmdet/core/bbox/samplers/pseudo_sampler.py",
    "chars": 829,
    "preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\nclass PseudoSampler(Ba"
  },
  {
    "path": "mmdet/core/bbox/samplers/random_sampler.py",
    "chars": 1858,
    "preview": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\n\n\nclass RandomSampler(BaseSampler):\n\n    def __in"
  },
  {
    "path": "mmdet/core/bbox/samplers/random_sampler_fixnum.py",
    "chars": 5756,
    "preview": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\ncla"
  },
  {
    "path": "mmdet/core/bbox/samplers/sampling_result.py",
    "chars": 790,
    "preview": "import torch\n\n\nclass SamplingResult(object):\n\n    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_resul"
  },
  {
    "path": "mmdet/core/bbox/transforms.py",
    "chars": 5036,
    "preview": "import mmcv\nimport numpy as np\nimport torch\n\n\ndef bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):\n    "
  },
  {
    "path": "mmdet/core/evaluation/__init__.py",
    "chars": 967,
    "preview": "from .class_names import (voc_classes, imagenet_det_classes,\n                          imagenet_vid_classes, coco_classe"
  },
  {
    "path": "mmdet/core/evaluation/bbox_overlaps.py",
    "chars": 1642,
    "preview": "import numpy as np\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou'):\n    \"\"\"Calculate the ious between each bbox of bbo"
  },
  {
    "path": "mmdet/core/evaluation/class_names.py",
    "chars": 5155,
    "preview": "import mmcv\n\n\ndef voc_classes():\n    return [\n        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'c"
  },
  {
    "path": "mmdet/core/evaluation/coco_utils.py",
    "chars": 4807,
    "preview": "import mmcv\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\nfrom .recall"
  },
  {
    "path": "mmdet/core/evaluation/eval_hooks.py",
    "chars": 6788,
    "preview": "import os\nimport os.path as osp\nimport shutil\nimport time\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.runner "
  },
  {
    "path": "mmdet/core/evaluation/mean_ap.py",
    "chars": 15761,
    "preview": "import mmcv\nimport numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\nfrom .cla"
  },
  {
    "path": "mmdet/core/evaluation/recall.py",
    "chars": 5961,
    "preview": "import numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\n\n\ndef _recalls(all_io"
  },
  {
    "path": "mmdet/core/loss/__init__.py",
    "chars": 508,
    "preview": "from .losses import (weighted_nll_loss, weighted_cross_entropy,\n                     weighted_binary_cross_entropy, sigm"
  },
  {
    "path": "mmdet/core/loss/losses.py",
    "chars": 3790,
    "preview": "# TODO merge naive and weighted loss.\nimport torch\nimport torch.nn.functional as F\n\n\ndef weighted_nll_loss(pred, label, "
  },
  {
    "path": "mmdet/core/mask/__init__.py",
    "chars": 128,
    "preview": "from .utils import split_combined_polys\nfrom .mask_target import mask_target\n\n__all__ = ['split_combined_polys', 'mask_t"
  },
  {
    "path": "mmdet/core/mask/mask_target.py",
    "chars": 1427,
    "preview": "import torch\nimport numpy as np\nimport mmcv\n\n\ndef mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_li"
  },
  {
    "path": "mmdet/core/mask/utils.py",
    "chars": 1172,
    "preview": "import mmcv\n\n\ndef split_combined_polys(polys, poly_lens, polys_per_mask):\n    \"\"\"Split the combined 1-D polys into masks"
  },
  {
    "path": "mmdet/core/post_processing/__init__.py",
    "chars": 283,
    "preview": "from .bbox_nms import multiclass_nms\nfrom .merge_augs import (merge_aug_proposals, merge_aug_bboxes,\n                   "
  },
  {
    "path": "mmdet/core/post_processing/bbox_nms.py",
    "chars": 1980,
    "preview": "import torch\n\nfrom mmdet.ops.nms import nms_wrapper\n\n\ndef multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg,"
  },
  {
    "path": "mmdet/core/post_processing/merge_augs.py",
    "chars": 3317,
    "preview": "import torch\n\nimport numpy as np\n\nfrom mmdet.ops import nms\nfrom ..bbox import bbox_mapping_back\n\n\ndef merge_aug_proposa"
  },
  {
    "path": "mmdet/core/utils/__init__.py",
    "chars": 210,
    "preview": "from .dist_utils import allreduce_grads, DistOptimizerHook\nfrom .misc import tensor2imgs, unmap, multi_apply\n\n__all__ = "
  },
  {
    "path": "mmdet/core/utils/dist_utils.py",
    "chars": 1941,
    "preview": "from collections import OrderedDict\n\nimport torch.distributed as dist\nfrom torch._utils import (_flatten_dense_tensors, "
  },
  {
    "path": "mmdet/core/utils/misc.py",
    "chars": 1108,
    "preview": "from functools import partial\n\nimport mmcv\nimport numpy as np\nfrom six.moves import map, zip\n\n\ndef tensor2imgs(tensor, m"
  },
  {
    "path": "mmdet/datasets/__init__.py",
    "chars": 657,
    "preview": "from .custom import CustomDataset\nfrom .xml_style import XMLDataset\nfrom .coco import CocoDataset\nfrom .voc import VOCDa"
  },
  {
    "path": "mmdet/datasets/coco.py",
    "chars": 4871,
    "preview": "import numpy as np\nfrom pycocotools.coco import COCO\n\nfrom .custom import CustomDataset\n\n\nclass CocoDataset(CustomDatase"
  },
  {
    "path": "mmdet/datasets/concat_dataset.py",
    "chars": 698,
    "preview": "import numpy as np\nfrom torch.utils.data.dataset import ConcatDataset as _ConcatDataset\n\n\nclass ConcatDataset(_ConcatDat"
  },
  {
    "path": "mmdet/datasets/custom.py",
    "chars": 11042,
    "preview": "import os.path as osp\n\nimport mmcv\nimport numpy as np\nfrom mmcv.parallel import DataContainer as DC\nfrom torch.utils.dat"
  },
  {
    "path": "mmdet/datasets/extra_aug.py",
    "chars": 5634,
    "preview": "import mmcv\nimport numpy as np\nfrom numpy import random\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\n\n"
  },
  {
    "path": "mmdet/datasets/loader/__init__.py",
    "chars": 183,
    "preview": "from .build_loader import build_dataloader\nfrom .sampler import GroupSampler, DistributedGroupSampler\n\n__all__ = [\n    '"
  },
  {
    "path": "mmdet/datasets/loader/build_loader.py",
    "chars": 1356,
    "preview": "from functools import partial\n\nfrom mmcv.runner import get_dist_info\nfrom mmcv.parallel import collate\nfrom torch.utils."
  },
  {
    "path": "mmdet/datasets/loader/sampler.py",
    "chars": 4682,
    "preview": "from __future__ import division\n\nimport math\nimport torch\nimport numpy as np\n\nfrom torch.distributed import get_world_si"
  },
  {
    "path": "mmdet/datasets/repeat_dataset.py",
    "chars": 479,
    "preview": "import numpy as np\n\n\nclass RepeatDataset(object):\n\n    def __init__(self, dataset, times):\n        self.dataset = datase"
  },
  {
    "path": "mmdet/datasets/transforms.py",
    "chars": 3723,
    "preview": "import mmcv\nimport numpy as np\nimport torch\n\n__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tenso"
  },
  {
    "path": "mmdet/datasets/utils.py",
    "chars": 3683,
    "preview": "import copy\nfrom collections import Sequence\n\nimport mmcv\nfrom mmcv.runner import obj_from_dict\nimport torch\n\nimport mat"
  },
  {
    "path": "mmdet/datasets/voc.py",
    "chars": 638,
    "preview": "from .xml_style import XMLDataset\n\n\nclass VOCDataset(XMLDataset):\n\n    CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat'"
  },
  {
    "path": "mmdet/datasets/xml_style.py",
    "chars": 2687,
    "preview": "import os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom .custom import CustomData"
  },
  {
    "path": "mmdet/models/__init__.py",
    "chars": 640,
    "preview": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSample"
  },
  {
    "path": "mmdet/models/anchor_heads/__init__.py",
    "chars": 194,
    "preview": "from .anchor_head import AnchorHead\nfrom .rpn_head import RPNHead\nfrom .retina_head import RetinaHead\nfrom .ssd_head imp"
  },
  {
    "path": "mmdet/models/anchor_heads/anchor_head.py",
    "chars": 11539,
    "preview": "from __future__ import division\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n"
  },
  {
    "path": "mmdet/models/anchor_heads/retina_head.py",
    "chars": 2459,
    "preview": "import numpy as np\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n\nfrom .anchor_head import AnchorHead\nfrom ..re"
  },
  {
    "path": "mmdet/models/anchor_heads/rpn_head.py",
    "chars": 3870,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core imp"
  },
  {
    "path": "mmdet/models/anchor_heads/ssd_head.py",
    "chars": 7420,
    "preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\n"
  },
  {
    "path": "mmdet/models/backbones/__init__.py",
    "chars": 127,
    "preview": "from .resnet import ResNet\nfrom .resnext import ResNeXt\nfrom .ssd_vgg import SSDVGG\n\n__all__ = ['ResNet', 'ResNeXt', 'SS"
  },
  {
    "path": "mmdet/models/backbones/resnet.py",
    "chars": 14617,
    "preview": "import logging\n\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\n\nfrom mmcv.cnn import constant_init, kaiming_i"
  },
  {
    "path": "mmdet/models/backbones/resnext.py",
    "chars": 7229,
    "preview": "import math\n\nimport torch.nn as nn\n\nfrom mmdet.ops import DeformConv, ModulatedDeformConv\nfrom .resnet import Bottleneck"
  },
  {
    "path": "mmdet/models/backbones/ssd_vgg.py",
    "chars": 4510,
    "preview": "import logging\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (VGG, xavier_ini"
  },
  {
    "path": "mmdet/models/bbox_heads/__init__.py",
    "chars": 217,
    "preview": "from .bbox_head import BBoxHead\nfrom .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead\nfrom .graph_bbox_head imp"
  },
  {
    "path": "mmdet/models/bbox_heads/bbox_head.py",
    "chars": 7406,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.core import (delta2bbox, multiclass_nms, "
  },
  {
    "path": "mmdet/models/bbox_heads/convfc_bbox_head.py",
    "chars": 7019,
    "preview": "import torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HE"
  },
  {
    "path": "mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py",
    "chars": 6868,
    "preview": "import torch\nimport torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..utils import ConvModule\n\n\nclass ConvFCRoIHead"
  },
  {
    "path": "mmdet/models/bbox_heads/graph_bbox_head.py",
    "chars": 9497,
    "preview": "import torch.nn as nn\nimport torch\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import Conv"
  },
  {
    "path": "mmdet/models/builder.py",
    "chars": 1500,
    "preview": "import mmcv\nfrom torch import nn\n\nfrom .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS\n\n\ndef _build_"
  },
  {
    "path": "mmdet/models/detectors/__init__.py",
    "chars": 463,
    "preview": "from .base import BaseDetector\nfrom .single_stage import SingleStageDetector\nfrom .two_stage import TwoStageDetector\nfro"
  },
  {
    "path": "mmdet/models/detectors/base.py",
    "chars": 4354,
    "preview": "import logging\nfrom abc import ABCMeta, abstractmethod\n\nimport mmcv\nimport numpy as np\nimport torch.nn as nn\nimport pyco"
  },
  {
    "path": "mmdet/models/detectors/cascade_rcnn.py",
    "chars": 13078,
    "preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins im"
  },
  {
    "path": "mmdet/models/detectors/fast_rcnn.py",
    "chars": 1691,
    "preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FastRCNN(Two"
  },
  {
    "path": "mmdet/models/detectors/faster_rcnn.py",
    "chars": 704,
    "preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FasterRCNN(T"
  },
  {
    "path": "mmdet/models/detectors/hkrm_rcnn.py",
    "chars": 14274,
    "preview": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin,"
  },
  {
    "path": "mmdet/models/detectors/mask_rcnn.py",
    "chars": 849,
    "preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass MaskRCNN(Two"
  },
  {
    "path": "mmdet/models/detectors/reasoning_rcnn.py",
    "chars": 20528,
    "preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins im"
  },
  {
    "path": "mmdet/models/detectors/retinanet.py",
    "chars": 488,
    "preview": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass Retina"
  },
  {
    "path": "mmdet/models/detectors/rpn.py",
    "chars": 3195,
    "preview": "import mmcv\n\nfrom mmdet.core import tensor2imgs, bbox_mapping\nfrom .base import BaseDetector\nfrom .test_mixins import RP"
  },
  {
    "path": "mmdet/models/detectors/sgrn.py",
    "chars": 31745,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.parameter import Parameter\nfrom torch.n"
  },
  {
    "path": "mmdet/models/detectors/single_stage.py",
    "chars": 2167,
    "preview": "import torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet"
  },
  {
    "path": "mmdet/models/detectors/test_mixins.py",
    "chars": 6167,
    "preview": "from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,\n                        merge_aug_bboxes, merge_aug"
  },
  {
    "path": "mmdet/models/detectors/two_stage.py",
    "chars": 7582,
    "preview": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin,"
  },
  {
    "path": "mmdet/models/mask_heads/__init__.py",
    "chars": 66,
    "preview": "from .fcn_mask_head import FCNMaskHead\n\n__all__ = ['FCNMaskHead']\n"
  },
  {
    "path": "mmdet/models/mask_heads/fcn_mask_head.py",
    "chars": 6283,
    "preview": "import mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nimport torch\nimport torch.nn as nn\n\nfrom ..registry "
  },
  {
    "path": "mmdet/models/necks/__init__.py",
    "chars": 40,
    "preview": "from .fpn import FPN\n\n__all__ = ['FPN']\n"
  },
  {
    "path": "mmdet/models/necks/fpn.py",
    "chars": 4744,
    "preview": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\nfrom ..utils import ConvModule\nf"
  },
  {
    "path": "mmdet/models/registry.py",
    "chars": 1144,
    "preview": "import torch.nn as nn\n\n\nclass Registry(object):\n\n    def __init__(self, name):\n        self._name = name\n        self._m"
  },
  {
    "path": "mmdet/models/roi_extractors/__init__.py",
    "chars": 79,
    "preview": "from .single_level import SingleRoIExtractor\n\n__all__ = ['SingleRoIExtractor']\n"
  },
  {
    "path": "mmdet/models/roi_extractors/single_level.py",
    "chars": 3075,
    "preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom mmdet import ops\nfrom ..registry import ROI_EX"
  },
  {
    "path": "mmdet/models/utils/__init__.py",
    "chars": 339,
    "preview": "from .conv_module import ConvModule\nfrom .norm import build_norm_layer\nfrom .weight_init import (xavier_init, normal_ini"
  },
  {
    "path": "mmdet/models/utils/conv_module.py",
    "chars": 2871,
    "preview": "import warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import kaiming_init, constant_init\n\nfrom .norm import build_norm_la"
  },
  {
    "path": "mmdet/models/utils/norm.py",
    "chars": 1687,
    "preview": "import torch.nn as nn\n\n\nnorm_cfg = {\n    # format: layer_type: (abbreviation, module)\n    'BN': ('bn', nn.BatchNorm2d),\n"
  },
  {
    "path": "mmdet/models/utils/weight_init.py",
    "chars": 1455,
    "preview": "import numpy as np\nimport torch.nn as nn\n\n\ndef xavier_init(module, gain=1, bias=0, distribution='normal'):\n    assert di"
  },
  {
    "path": "mmdet/ops/__init__.py",
    "chars": 675,
    "preview": "from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack,\n                  ModulatedDeformRoIPoolingPack, M"
  },
  {
    "path": "mmdet/ops/dcn/__init__.py",
    "chars": 622,
    "preview": "from .functions.deform_conv import deform_conv, modulated_deform_conv\nfrom .functions.deform_pool import deform_roi_pool"
  },
  {
    "path": "mmdet/ops/dcn/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/dcn/functions/deform_conv.py",
    "chars": 7291,
    "preview": "import torch\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\n\nfrom .. import deform_conv_cu"
  },
  {
    "path": "mmdet/ops/dcn/functions/deform_pool.py",
    "chars": 2370,
    "preview": "import torch\nfrom torch.autograd import Function\n\nfrom .. import deform_pool_cuda\n\n\nclass DeformRoIPoolingFunction(Funct"
  },
  {
    "path": "mmdet/ops/dcn/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/dcn/modules/deform_conv.py",
    "chars": 4714,
    "preview": "import math\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn.modules.utils import _pair\n\nfrom ..functions.deform_conv i"
  },
  {
    "path": "mmdet/ops/dcn/modules/deform_pool.py",
    "chars": 6313,
    "preview": "from torch import nn\n\nfrom ..functions.deform_pool import deform_roi_pooling\n\n\nclass DeformRoIPooling(nn.Module):\n\n    d"
  },
  {
    "path": "mmdet/ops/dcn/setup.py",
    "chars": 469,
    "preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='defor"
  },
  {
    "path": "mmdet/ops/dcn/src/deform_conv_cuda.cpp",
    "chars": 30729,
    "preview": "// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform"
  },
  {
    "path": "mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu",
    "chars": 42171,
    "preview": "/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contribu"
  },
  {
    "path": "mmdet/ops/dcn/src/deform_pool_cuda.cpp",
    "chars": 6826,
    "preview": "// author: Charles Shang\n// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu\n\n// mod"
  },
  {
    "path": "mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu",
    "chars": 16025,
    "preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
  },
  {
    "path": "mmdet/ops/nms/.gitignore",
    "chars": 6,
    "preview": "*.cpp\n"
  },
  {
    "path": "mmdet/ops/nms/Makefile",
    "chars": 124,
    "preview": "PYTHON=${PYTHON:-python}\n\nall:\n\techo \"Compiling nms kernels...\"\n\t$(PYTHON) setup.py build_ext --inplace\n\nclean:\n\trm -f *"
  },
  {
    "path": "mmdet/ops/nms/__init__.py",
    "chars": 70,
    "preview": "from .nms_wrapper import nms, soft_nms\n\n__all__ = ['nms', 'soft_nms']\n"
  },
  {
    "path": "mmdet/ops/nms/cpu_nms.pyx",
    "chars": 2241,
    "preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
  },
  {
    "path": "mmdet/ops/nms/cpu_soft_nms.pyx",
    "chars": 3942,
    "preview": "# ----------------------------------------------------------\n# Soft-NMS: Improving Object Detection With One Line of Cod"
  },
  {
    "path": "mmdet/ops/nms/gpu_nms.hpp",
    "chars": 180,
    "preview": "void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,\n          int boxes_dim, float nms_overla"
  },
  {
    "path": "mmdet/ops/nms/gpu_nms.pyx",
    "chars": 1433,
    "preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
  },
  {
    "path": "mmdet/ops/nms/nms_kernel.cu",
    "chars": 7120,
    "preview": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n//"
  },
  {
    "path": "mmdet/ops/nms/nms_wrapper.py",
    "chars": 1915,
    "preview": "import numpy as np\nimport torch\n\nfrom .gpu_nms import gpu_nms\nfrom .cpu_nms import cpu_nms\nfrom .cpu_soft_nms import cpu"
  },
  {
    "path": "mmdet/ops/nms/setup.py",
    "chars": 2485,
    "preview": "import os.path as osp\nfrom distutils.core import setup, Extension\n\nimport numpy as np\nfrom Cython.Build import cythonize"
  },
  {
    "path": "mmdet/ops/roi_align/__init__.py",
    "chars": 120,
    "preview": "from .functions.roi_align import roi_align\nfrom .modules.roi_align import RoIAlign\n\n__all__ = ['roi_align', 'RoIAlign']\n"
  },
  {
    "path": "mmdet/ops/roi_align/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/roi_align/functions/roi_align.py",
    "chars": 2080,
    "preview": "from torch.autograd import Function, Variable\n\nfrom .. import roi_align_cuda\n\n\nclass RoIAlignFunction(Function):\n\n    @s"
  },
  {
    "path": "mmdet/ops/roi_align/gradcheck.py",
    "chars": 866,
    "preview": "import numpy as np\nimport torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(o"
  },
  {
    "path": "mmdet/ops/roi_align/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/roi_align/modules/roi_align.py",
    "chars": 535,
    "preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_align import RoIAlignFunction\n\n\nclass RoIAlign(Module):\n"
  },
  {
    "path": "mmdet/ops/roi_align/setup.py",
    "chars": 332,
    "preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='roi_a"
  },
  {
    "path": "mmdet/ops/roi_align/src/roi_align_cuda.cpp",
    "chars": 2963,
    "preview": "#include <torch/torch.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIAlignForwardLaucher(const at::Tensor features, cons"
  },
  {
    "path": "mmdet/ops/roi_align/src/roi_align_kernel.cu",
    "chars": 11825,
    "preview": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\nusing namespace at;  // temporal fix for pytorch<=0.4.1 (see #9848"
  },
  {
    "path": "mmdet/ops/roi_pool/__init__.py",
    "chars": 114,
    "preview": "from .functions.roi_pool import roi_pool\nfrom .modules.roi_pool import RoIPool\n\n__all__ = ['roi_pool', 'RoIPool']\n"
  },
  {
    "path": "mmdet/ops/roi_pool/functions/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/roi_pool/functions/roi_pool.py",
    "chars": 1807,
    "preview": "import torch\nfrom torch.autograd import Function\n\nfrom .. import roi_pool_cuda\n\n\nclass RoIPoolFunction(Function):\n\n    @"
  },
  {
    "path": "mmdet/ops/roi_pool/gradcheck.py",
    "chars": 500,
    "preview": "import torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(osp.abspath(osp.join"
  },
  {
    "path": "mmdet/ops/roi_pool/modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "mmdet/ops/roi_pool/modules/roi_pool.py",
    "chars": 399,
    "preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_pool import roi_pool\n\n\nclass RoIPool(Module):\n\n    def _"
  },
  {
    "path": "mmdet/ops/roi_pool/setup.py",
    "chars": 322,
    "preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='roi_p"
  },
  {
    "path": "mmdet/ops/roi_pool/src/roi_pool_cuda.cpp",
    "chars": 2955,
    "preview": "#include <torch/torch.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIPoolForwardLaucher(const at::Tensor features, const"
  },
  {
    "path": "mmdet/ops/roi_pool/src/roi_pool_kernel.cu",
    "chars": 6953,
    "preview": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\nusing namespace at;  // temporal fix for pytorch<=0.4.1 (see #9848"
  },
  {
    "path": "setup.py",
    "chars": 3013,
    "preview": "import os\nimport subprocess\nimport time\nfrom setuptools import find_packages, setup\n\n\ndef readme():\n    with open('READM"
  },
  {
    "path": "tools/coco_eval.py",
    "chars": 794,
    "preview": "from argparse import ArgumentParser\n\nfrom mmdet.core import coco_eval\n\n\ndef main():\n    parser = ArgumentParser(descript"
  },
  {
    "path": "tools/convert_datasets/pascal_voc.py",
    "chars": 4581,
    "preview": "import argparse\nimport os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.cor"
  },
  {
    "path": "tools/dist_train.sh",
    "chars": 159,
    "preview": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname \"$0\")"
  },
  {
    "path": "tools/test.py",
    "chars": 4570,
    "preview": "import argparse\n\nimport torch\nimport mmcv\nfrom mmcv.runner import load_checkpoint, parallel_test, obj_from_dict\nfrom mmc"
  },
  {
    "path": "tools/train.py",
    "chars": 2763,
    "preview": "from __future__ import division\n\nimport argparse\nfrom mmcv import Config\n\nfrom mmdet import __version__\nfrom mmdet.datas"
  },
  {
    "path": "tools/vis_subgraph.py",
    "chars": 2364,
    "preview": "import numpy as np\nimport pickle\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport matplotlib.cm as cm\n\nsns.s"
  },
  {
    "path": "tools/voc_eval.py",
    "chars": 1824,
    "preview": "from argparse import ArgumentParser\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet import datasets\nfrom mmdet.core import e"
  }
]

// ... and 8 more files (download for full content)

About this extraction

This page contains the full source code of the chanyn/Reasoning-RCNN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 179 files (138.1 MB), approximately 175.1k tokens, and a symbol index with 544 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!