Showing preview only (709K chars total). Download the full file or copy to clipboard to get everything.
Repository: chanyn/Reasoning-RCNN
Branch: master
Commit: 9bd7c7ab0ffd
Files: 179
Total size: 138.1 MB
Directory structure:
gitextract_155bakyx/
├── .gitignore
├── .travis.yml
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── compile.sh
├── configs/
│ ├── ade_faster_rcnn_r101_fpn_1x.py
│ ├── coco_faster_rcnn_r101_fpn_1x.py
│ ├── coco_sgrb_fpn_ms.py
│ ├── hkrm/
│ │ ├── ade_faster_rcnn_r50_fpn_1x.py
│ │ ├── coco_faster_rcnn_r101_fpn_1x.py
│ │ └── vg_faster_rcnn_r101_fpn_1x.py
│ ├── pascal_voc/
│ │ ├── faster_rcnn_r50_fpn_1x_voc0712.py
│ │ ├── ssd300_voc.py
│ │ └── ssd512_voc.py
│ ├── rrcnn/
│ │ ├── ade_reasoning_rcnn_r101_fpn_1x.py
│ │ ├── coco_reasoning_rcnn_r101_fpn_1x.py
│ │ └── vg_reasoning_rcnn_r101_fpn_1x.py
│ ├── vg_faster_rcnn_r101_fpn_1x.py
│ └── vgbig_faster_rcnn_r101_fpn_1x.py
├── mmdet/
│ ├── __init__.py
│ ├── apis/
│ │ ├── __init__.py
│ │ ├── env.py
│ │ ├── inference.py
│ │ └── train.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── anchor/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_generator.py
│ │ │ └── anchor_target.py
│ │ ├── bbox/
│ │ │ ├── __init__.py
│ │ │ ├── assign_sampling.py
│ │ │ ├── assigners/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── assign_result.py
│ │ │ │ ├── base_assigner.py
│ │ │ │ └── max_iou_assigner.py
│ │ │ ├── bbox_target.py
│ │ │ ├── geometry.py
│ │ │ ├── samplers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_sampler.py
│ │ │ │ ├── combined_sampler.py
│ │ │ │ ├── instance_balanced_pos_sampler.py
│ │ │ │ ├── iou_balanced_neg_sampler.py
│ │ │ │ ├── ohem_sampler.py
│ │ │ │ ├── pseudo_sampler.py
│ │ │ │ ├── random_sampler.py
│ │ │ │ ├── random_sampler_fixnum.py
│ │ │ │ └── sampling_result.py
│ │ │ └── transforms.py
│ │ ├── evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_overlaps.py
│ │ │ ├── class_names.py
│ │ │ ├── coco_utils.py
│ │ │ ├── eval_hooks.py
│ │ │ ├── mean_ap.py
│ │ │ └── recall.py
│ │ ├── loss/
│ │ │ ├── __init__.py
│ │ │ └── losses.py
│ │ ├── mask/
│ │ │ ├── __init__.py
│ │ │ ├── mask_target.py
│ │ │ └── utils.py
│ │ ├── post_processing/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_nms.py
│ │ │ └── merge_augs.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── dist_utils.py
│ │ └── misc.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── coco.py
│ │ ├── concat_dataset.py
│ │ ├── custom.py
│ │ ├── extra_aug.py
│ │ ├── loader/
│ │ │ ├── __init__.py
│ │ │ ├── build_loader.py
│ │ │ └── sampler.py
│ │ ├── repeat_dataset.py
│ │ ├── transforms.py
│ │ ├── utils.py
│ │ ├── voc.py
│ │ └── xml_style.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── anchor_heads/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_head.py
│ │ │ ├── retina_head.py
│ │ │ ├── rpn_head.py
│ │ │ └── ssd_head.py
│ │ ├── backbones/
│ │ │ ├── __init__.py
│ │ │ ├── resnet.py
│ │ │ ├── resnext.py
│ │ │ └── ssd_vgg.py
│ │ ├── bbox_heads/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_head.py
│ │ │ ├── convfc_bbox_head.py
│ │ │ ├── convfc_bbox_head_enhanced.py
│ │ │ └── graph_bbox_head.py
│ │ ├── builder.py
│ │ ├── detectors/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── cascade_rcnn.py
│ │ │ ├── fast_rcnn.py
│ │ │ ├── faster_rcnn.py
│ │ │ ├── hkrm_rcnn.py
│ │ │ ├── mask_rcnn.py
│ │ │ ├── reasoning_rcnn.py
│ │ │ ├── retinanet.py
│ │ │ ├── rpn.py
│ │ │ ├── sgrn.py
│ │ │ ├── single_stage.py
│ │ │ ├── test_mixins.py
│ │ │ └── two_stage.py
│ │ ├── mask_heads/
│ │ │ ├── __init__.py
│ │ │ └── fcn_mask_head.py
│ │ ├── necks/
│ │ │ ├── __init__.py
│ │ │ └── fpn.py
│ │ ├── registry.py
│ │ ├── roi_extractors/
│ │ │ ├── __init__.py
│ │ │ └── single_level.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── conv_module.py
│ │ ├── norm.py
│ │ └── weight_init.py
│ └── ops/
│ ├── __init__.py
│ ├── dcn/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ ├── deform_conv.py
│ │ │ └── deform_pool.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── deform_conv.py
│ │ │ └── deform_pool.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── deform_conv_cuda.cpp
│ │ ├── deform_conv_cuda_kernel.cu
│ │ ├── deform_pool_cuda.cpp
│ │ └── deform_pool_cuda_kernel.cu
│ ├── nms/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── cpu_nms.pyx
│ │ ├── cpu_soft_nms.pyx
│ │ ├── gpu_nms.hpp
│ │ ├── gpu_nms.pyx
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ └── setup.py
│ ├── roi_align/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── gradcheck.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── roi_align_cuda.cpp
│ │ └── roi_align_kernel.cu
│ └── roi_pool/
│ ├── __init__.py
│ ├── functions/
│ │ ├── __init__.py
│ │ └── roi_pool.py
│ ├── gradcheck.py
│ ├── modules/
│ │ ├── __init__.py
│ │ └── roi_pool.py
│ ├── setup.py
│ └── src/
│ ├── roi_pool_cuda.cpp
│ └── roi_pool_kernel.cu
├── setup.py
└── tools/
├── coco_eval.py
├── convert_datasets/
│ └── pascal_voc.py
├── dist_train.sh
├── graph/
│ ├── new_COCO_graph_a.pkl
│ ├── new_COCO_graph_r.pkl
│ ├── new_ade_graph_a.pkl
│ ├── new_ade_graph_r.pkl
│ ├── new_vg_big_graph_a.pkl
│ ├── new_vg_big_graph_r.pkl
│ ├── new_vg_graph_a.pkl
│ └── new_vg_graph_r.pkl
├── test.py
├── train.py
├── vis_subgraph.py
└── voc_eval.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
tools/work_dirs/
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# cython generated cpp
mmdet/ops/nms/*.cpp
mmdet/version.py
data
.vscode
.idea
================================================
FILE: .travis.yml
================================================
dist: trusty
language: python
install:
- pip install flake8
python:
- "3.5"
- "3.6"
script:
- flake8
================================================
FILE: INSTALL.md
================================================
## Installation
### Requirements
- Linux (tested on Ubuntu 16.04 and CentOS 7.2)
- Python 3.4+
- PyTorch 0.4.1
- Cython
- [mmcv](https://github.com/open-mmlab/mmcv)
### Install mmdetection
a. Install PyTorch 0.4.1 and torchvision following the [official instructions](https://pytorch.org/).
b. Clone the mmdetection repository.
```shell
git clone https://github.com/open-mmlab/mmdetection.git
```
c. Compile cuda extensions.
```shell
cd mmdetection
pip install cython # or "conda install cython" if you prefer conda
./compile.sh # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments
```
d. Install mmdetection (other dependencies will be installed automatically).
```shell
python(3) setup.py install # add --user if you want to install it locally
# or "pip install ."
```
Note: You need to run the last step each time you pull updates from github.
The git commit id will be written to the version number and also saved in trained models.
### Prepare COCO dataset.
It is recommended to symlink the dataset root to `$MMDETECTION/data`.
```
mmdetection
├── mmdet
├── tools
├── configs
├── data
│ ├── coco
│ │ ├── annotations
│ │ ├── train2017
│ │ ├── val2017
│ │ ├── test2017
│ ├── VOCdevkit
│ │ ├── VOC2007
│ │ ├── VOC2012
```
### Scripts
Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
a script for setting up mmdetection with conda.
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: MODEL_ZOO.md
================================================
# Benchmark and Model Zoo
## Environment
### Hardware
- 8 NVIDIA Tesla V100 GPUs
- Intel Xeon 4114 CPU @ 2.20GHz
### Software environment
- Python 3.6 / 3.7
- PyTorch 0.4.1
- CUDA 9.0.176
- CUDNN 7.0.4
- NCCL 2.1.15
## Common settings
- All baselines were trained using 8 GPU with a batch size of 16 (2 images per GPU).
- All models were trained on `coco_2017_train`, and tested on the `coco_2017_val`.
- We use distributed training and BN layer stats are fixed.
- We adopt the same training schedules as Detectron. 1x indicates 12 epochs and 2x indicates 24 epochs, which corresponds to slightly less iterations than Detectron and the difference can be ignored.
- All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.
- We report the training GPU memory as the maximum value of `torch.cuda.max_memory_cached()`
for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows, but
closer to the actual requirements.
- We report the inference time as the overall time including data loading,
network forwarding and post processing.
- The training memory and time of 2x schedule is simply copied from 1x.
It should be very close to the actual memory and time.
## Baselines
We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More models with different backbones will be added to the model zoo.
### RPN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR1000 | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe | 1x | 4.5 | 0.379 | 14.4 | 58.2 | - |
| R-50-FPN | pytorch | 1x | 4.8 | 0.407 | 14.5 | 57.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth) |
| R-50-FPN | pytorch | 2x | 4.8 | 0.407 | 14.5 | 57.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth) |
| R-101-FPN | caffe | 1x | 7.4 | 0.513 | 11.1 | 59.4 | - |
| R-101-FPN | pytorch | 1x | 8.0 | 0.552 | 11.1 | 58.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_1x_20181129-f50da4bd.pth) |
| R-101-FPN | pytorch | 2x | 8.0 | 0.552 | 11.1 | 59.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_2x_20181129-e42c6c9a.pth) |
| X-101-32x4d-FPN | pytorch |1x | 9.9 | 0.691 | 8.3 | 59.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_1x_20181218-7e379d26.pth)
| X-101-32x4d-FPN | pytorch |2x | 9.9 | 0.691 | 8.3 | 59.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_2x_20181218-0510af40.pth)
| X-101-64x4d-FPN | pytorch |1x | 14.6 | 1.032 | 6.2 | 59.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_1x_20181218-c1a24f1f.pth)
| X-101-64x4d-FPN | pytorch |2x | 14.6 | 1.032 | 6.2 | 60.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_2x_20181218-c22bdd70.pth)
### Faster R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe | 1x | 4.9 | 0.525 | 10.0 | 36.7 | - |
| R-50-FPN | pytorch | 1x | 5.1 | 0.554 | 9.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth) |
| R-50-FPN | pytorch | 2x | 5.1 | 0.554 | 9.9 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth) |
| R-101-FPN | caffe | 1x | 7.4 | 0.663 | 8.4 | 38.8 | - |
| R-101-FPN | pytorch | 1x | 8.0 | 0.698 | 8.3 | 38.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_1x_20181129-d1468807.pth) |
| R-101-FPN | pytorch | 2x | 8.0 | 0.698 | 8.3 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_2x_20181129-73e7ade7.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 9.9 | 0.842 | 7.0 | 40.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_1x_20181218-ad81c133.pth)
| X-101-32x4d-FPN | pytorch | 2x| 9.9 | 0.842 | 7.0 | 40.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_2x_20181218-0ed58946.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.1 | 1.181 | 5.2 | 41.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_1x_20181218-c9c69c8f.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.1 | 1.181 | 5.2 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_2x_20181218-fe94f9b8.pth)
### Mask R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe | 1x | 5.9 | 0.658 | 7.7 | 37.5 | 34.4 | - |
| R-50-FPN | pytorch | 1x | 5.8 | 0.690 | 7.7 | 37.3 | 34.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) |
| R-50-FPN | pytorch | 2x | 5.8 | 0.690 | 7.7 | 38.6 | 35.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth) |
| R-101-FPN | caffe | 1x | 8.8 | 0.791 | 7.0 | 39.9 | 36.1 | - |
| R-101-FPN | pytorch | 1x | 9.1 | 0.825 | 6.7 | 39.4 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_1x_20181129-34ad1961.pth) |
| R-101-FPN | pytorch | 2x | 9.1 | 0.825 | 6.7 | 40.4 | 36.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_2x_20181129-a254bdfc.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.9 | 0.972 | 5.8 | 41.2 | 37.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_1x_20181218-44e635cc.pth)
| X-101-64x4d-FPN | pytorch | 2x| 10.9 | 0.972 | 5.8 | 41.4 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_2x_20181218-f023dffa.pth)
| X-101-32x4d-FPN | pytorch | 1x| 14.1 | 1.302 | 4.7 | 42.2 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_1x_20181218-cb159987.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.1 | 1.302 | 4.7 | 42.0 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_2x_20181218-ea936e44.pth)
### Fast R-CNN (with pre-computed proposals)
| Backbone | Style | Type | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe | Faster | 1x | 3.5 | 0.348 | 14.6 | 36.6 | - | - |
| R-50-FPN | pytorch | Faster | 1x | 4.0 | 0.375 | 14.5 | 35.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth) |
| R-50-FPN | pytorch | Faster | 2x | 4.0 | 0.375 | 14.5 | 37.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth) |
| R-101-FPN| caffe | Faster | 1x | 7.1 | 0.484 | 11.9 | 38.4 | - | - |
| R-101-FPN| pytorch | Faster | 1x | 7.6 | 0.540 | 11.8 | 38.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_1x_20181129-ffaa2eb0.pth) |
| R-101-FPN| pytorch | Faster | 2x | 7.6 | 0.540 | 11.8 | 38.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_2x_20181129-9dba92ce.pth) |
| R-50-FPN | caffe | Mask | 1x | 5.4 | 0.473 | 10.7 | 37.3 | 34.5 | - |
| R-50-FPN | pytorch | Mask | 1x | 5.3 | 0.504 | 10.6 | 36.8 | 34.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth) |
| R-50-FPN | pytorch | Mask | 2x | 5.3 | 0.504 | 10.6 | 37.9 | 34.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) |
| R-101-FPN| caffe | Mask | 1x | 8.6 | 0.607 | 9.5 | 39.4 | 36.1 | - |
| R-101-FPN| pytorch | Mask | 1x | 9.0 | 0.656 | 9.3 | 38.9 | 35.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_1x_20181129-2273fa9b.pth) |
| R-101-FPN| pytorch | Mask | 2x | 9.0 | 0.656 | 9.3 | 39.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_2x_20181129-bf63ec5e.pth) |
### RetinaNet
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe | 1x | 6.7 | 0.468 | 9.4 | 35.8 | - |
| R-50-FPN | pytorch | 1x | 6.9 | 0.496 | 9.1 | 35.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-3d3c2142.pth) |
| R-50-FPN | pytorch | 2x | 6.9 | 0.496 | 9.1 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_2x_20181125-e0dbec97.pth) |
| R-101-FPN | caffe | 1x | 9.2 | 0.614 | 8.2 | 37.8 | - |
| R-101-FPN | pytorch | 1x | 9.6 | 0.643 | 8.1 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_1x_20181129-f738a02f.pth) |
| R-101-FPN | pytorch | 2x | 9.6 | 0.643 | 8.1 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_2x_20181129-f654534b.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.8 | 0.792 | 6.7 | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_1x_20181218-c140fb82.pth)
| X-101-32x4d-FPN | pytorch | 2x| 10.8 | 0.792 | 6.7 | 39.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_2x_20181218-605dcd0a.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.6 | 1.128 | 5.3 | 40.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_1x_20181218-2f6f778b.pth)
| X-101-64x4d-FPN | pytorch | 2x| 14.6 | 1.128 | 5.3 | 39.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_2x_20181218-2f598dc5.pth)
### Cascade R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50-FPN | caffe | 1x | 5.0 | 0.592 | 8.1 | 40.3 | - |
| R-50-FPN | pytorch | 1x | 5.5 | 0.622 | 8.0 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_1x_20181123-b1987c4a.pth) |
| R-50-FPN | pytorch | 20e | 5.5 | 0.622 | 8.0 | 41.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_20e_20181123-db483a09.pth) |
| R-101-FPN | caffe | 1x | 8.5 | 0.731 | 7.0 | 42.2 | - |
| R-101-FPN | pytorch | 1x | 8.7 | 0.766 | 6.9 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_1x_20181129-d64ebac7.pth) |
| R-101-FPN | pytorch | 20e | 8.7 | 0.766 | 6.9 | 42.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_20e_20181129-b46dcede.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 10.6 | 0.902 | 5.7 | 43.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_1x_20181218-941c0925.pth)
| X-101-32x4d-FPN | pytorch |20e| 10.6 | 0.902 | 5.7 | 44.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_2x_20181218-28f73c4c.pth)
| X-101-64x4d-FPN | pytorch | 1x| 14.1 | 1.251 | 4.6 | 44.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_1x_20181218-e2dc376a.pth)
| X-101-64x4d-FPN | pytorch |20e| 14.1 | 1.251 | 4.6 | 44.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_2x_20181218-5add321e.pth)
### Cascade Mask R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:--------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | caffe | 1x | 7.5 | 0.880 | 5.8 | 41.0 | 35.6 | - |
| R-50-FPN | pytorch | 1x | 7.6 | 0.910 | 5.7 | 41.3 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) |
| R-50-FPN | pytorch | 20e | 7.6 | 0.910 | 5.7 | 42.4 | 36.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_20e_20181123-6e0c9713.pth) |
| R-101-FPN | caffe | 1x | 10.5 | 1.024 | 5.3 | 43.1 | 37.3 | - |
| R-101-FPN | pytorch | 1x | 10.9 | 1.055 | 5.2 | 42.7 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_1x_20181129-64f00602.pth) |
| R-101-FPN | pytorch | 20e | 10.9 | 1.055 | 5.2 | 43.4 | 37.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_20e_20181129-cb85151d.pth) |
| X-101-32x4d-FPN | pytorch | 1x| 12.67 | 1.181 | 4.2 | 44.4 | 38.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_1x_20181218-1d944c89.pth)
| X-101-32x4d-FPN | pytorch |20e| 12.67 | 1.181 | 4.2 | 44.9 | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_20e_20181218-761a3473.pth)
| X-101-64x4d-FPN | pytorch | 1x| 10.87 | 1.125 | 3.6 | 45.5 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_1x_20181218-85953a91.pth)
| X-101-64x4d-FPN | pytorch |20e| 10.87 | 1.125 | 3.6 | 45.8 | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_20e_20181218-630773a7.pth)
**Notes:**
- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs.
- Cascade Mask R-CNN with X-101-64x4d-FPN was trained using 16 GPU with a batch size of 16 (1 images per GPU).
### SSD
| Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:----:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| VGG16 | 300 | caffe | 120e | 3.5 | 0.286 | 22.9 / 29.2 | 25.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) |
| VGG16 | 512 | caffe | 120e | 6.3 | 0.458 | 17.3 / 21.2 | 29.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_coco_vgg16_caffe_120e_20181221-d48b0be8.pth) |
### SSD (PASCAL VOC)
| Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:--------:|:----:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| VGG16 | 300 | caffe | 240e | 1.2 | 0.189 | 40.1 / 58.0 | 77.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20181221-2f05dd40.pth) |
| VGG16 | 512 | caffe | 240e | 2.9 | 0.261 | 28.1 / 36.2 | 80.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20181221-7652ee18.pth) |
**Notes:**
- `cudnn.benchmark` is set as `True` for SSD training and testing.
- Inference time is reported for batch size = 1 and batch size = 8.
- The speed difference between VOC and COCO is caused by model parameters and nms.
### Group Normalization (GN)
| Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.9 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
| R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.2 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.6 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.7 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
| R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
| R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
**Notes:**
- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
- The `3x` schedule is epoch [28, 34, 36].
- The memory is measured with `torch.cuda.max_memory_allocated()` instead of `torch.cuda.max_memory_cached()`. We will update the memory usage of other models in the future.
### Deformable Convolution v2
| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:---------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 3.9 | 0.594 | 10.2 | 40.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-e41688c9.pth) |
| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 3.7 | 0.598 | 10.0 | 40.3 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-1b768045.pth) |
| R-50-FPN | Faster | pytorch | - | dpool | 1x | 4.6 | 0.714 | 8.7 | 37.9 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dpool_r50_fpn_1x_20190125-f4fc1d70.pth) |
| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.2 | 0.769 | 8.2 | 38.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdpool_r50_fpn_1x_20190125-473d0f3d.pth) |
| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 5.8 | 0.811 | 8.0 | 42.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-a7e31b65.pth) |
| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.1 | 1.126 | 6.6 | 43.5 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x_20190201-6d46376f.pth) |
| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 0.712 | 7.7 | 41.1 | 37.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-4f94ff79.pth) |
| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 0.712 | 7.7 | 41.4 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-c5601dc3.pth) |
| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 0.939 | 6.5 | 43.2 | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-decb6db5.pth) |
| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.4 | 0.660 | 7.6 | 44.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth) |
| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.3 | 0.881 | 6.8 | 45.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-aaa877cc.pth) |
| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.6 | 0.942 | 5.7 | 44.5 | 38.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-09d8a443.pth) |
| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.5 | 1.156 | 5.1 | 45.8 | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-0d62c190.pth) |
**Notes:**
- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling.
- The memory is measured with `torch.cuda.max_memory_allocated()`. The batch size is 16 (2 images per GPU).
- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster.
## Comparison with Detectron
We compare mmdetection with [Detectron](https://github.com/facebookresearch/Detectron)
and [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch),
a third-party port of Detectron to Pytorch. The backbone used is R-50-FPN.
In general, mmdetection has 3 advantages over Detectron.
- **Higher performance** (especially in terms of mask AP)
- **Faster training speed**
- **Memory efficient**
### Performance
Detectron and Detectron.pytorch use caffe-style ResNet as the backbone.
In order to utilize the PyTorch model zoo, we use pytorch-style ResNet in our experiments.
In the meanwhile, we train models with caffe-style ResNet in 1x experiments for comparison.
We find that pytorch-style ResNet usually converges slower than caffe-style ResNet,
thus leading to slightly lower results in 1x schedule, but the final results
of 2x schedule is higher.
We report results using both caffe-style (weights converted from
[here](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#imagenet-pretrained-models))
and pytorch-style (weights from the official model zoo) ResNet backbone,
indicated as *pytorch-style results* / *caffe-style results*.
<table>
<tr>
<th>Type</th>
<th>Lr schd</th>
<th>Detectron</th>
<th>Detectron.pytorch</th>
<th>mmdetection</th>
</tr>
<tr>
<td rowspan="2">RPN</td>
<td>1x</td>
<td>57.2</td>
<td>-</td>
<td>57.1 / 58.2</td>
</tr>
<tr>
<td>2x</td>
<td>-</td>
<td>-</td>
<td>57.6 / -</td>
</tr>
<tr>
<td rowspan="2">Faster R-CNN</td>
<td>1x</td>
<td>36.7</td>
<td>37.1</td>
<td>36.4 / 36.7</td>
</tr>
<tr>
<td>2x</td>
<td>37.9</td>
<td>-</td>
<td>37.7 / -</td>
</tr>
<tr>
<td rowspan="2">Mask R-CNN</td>
<td>1x</td>
<td>37.7 & 33.9</td>
<td>37.7 & 33.7</td>
<td>37.3 & 34.2 / 37.5 & 34.4</td>
</tr>
<tr>
<td>2x</td>
<td>38.6 & 34.5</td>
<td>-</td>
<td>38.6 & 35.1 / -</td>
</tr>
<tr>
<td rowspan="2">Fast R-CNN</td>
<td>1x</td>
<td>36.4</td>
<td>-</td>
<td>35.8 / 36.6</td>
</tr>
<tr>
<td>2x</td>
<td>36.8</td>
<td>-</td>
<td>37.1 / -</td>
</tr>
<tr>
<td rowspan="2">Fast R-CNN (w/mask)</td>
<td>1x</td>
<td>37.3 & 33.7</td>
<td>-</td>
<td>36.8 & 34.1 / 37.3 & 34.5</td>
</tr>
<tr>
<td>2x</td>
<td>37.7 & 34.0</td>
<td>-</td>
<td>37.9 & 34.8 / -</td>
</tr>
</table>
### Training Speed
The training speed is measure with s/iter. The lower, the better.
<table>
<tr>
<th>Type</th>
<th>Detectron (P100<sup>1</sup>)</th>
<th>Detectron.pytorch (XP<sup>2</sup>)</th>
<th>mmdetection<sup>3</sup> (V100<sup>4</sup> / XP)</th>
</tr>
<tr>
<td>RPN</td>
<td>0.416</td>
<td>-</td>
<td>0.407 / 0.413</td>
</tr>
<tr>
<td>Faster R-CNN</td>
<td>0.544</td>
<td>1.015</td>
<td>0.554 / 0.579</td>
</tr>
<tr>
<td>Mask R-CNN</td>
<td>0.889</td>
<td>1.435</td>
<td>0.690 / 0.732</td>
</tr>
<tr>
<td>Fast R-CNN</td>
<td>0.285</td>
<td>-</td>
<td>0.375 / 0.398</td>
</tr>
<tr>
<td>Fast R-CNN (w/mask)</td>
<td>0.377</td>
<td>-</td>
<td>0.504 / 0.574</td>
</tr>
</table>
\*1. Detectron reports the speed on Facebook's Big Basin servers (P100),
on our V100 servers it is slower so we use the official reported values.
\*2. Detectron.pytorch does not report the runtime and we encountered some issue to
run it on V100, so we report the speed on TITAN XP.
\*3. The speed of pytorch-style ResNet is approximately 5% slower than caffe-style,
and we report the pytorch-style results here.
\*4. We also run the models on a DGX-1 server (P100) and the speed is almost the same as our V100 servers.
### Inference Speed
The inference speed is measured with fps (img/s) on a single GPU. The higher, the better.
<table>
<tr>
<th>Type</th>
<th>Detectron (P100)</th>
<th>Detectron.pytorch (XP)</th>
<th>mmdetection (V100 / XP)</th>
</tr>
<tr>
<td>RPN</td>
<td>12.5</td>
<td>-</td>
<td>14.5 / 15.4</td>
</tr>
<tr>
<td>Faster R-CNN</td>
<td>10.3</td>
<td></td>
<td>9.9 / 9.8</td>
</tr>
<tr>
<td>Mask R-CNN</td>
<td>8.5</td>
<td></td>
<td>7.7 / 7.4</td>
</tr>
<tr>
<td>Fast R-CNN</td>
<td>12.5</td>
<td></td>
<td>14.5 / 14.1</td>
</tr>
<tr>
<td>Fast R-CNN (w/mask)</td>
<td>9.9</td>
<td></td>
<td>10.6 / 10.3</td>
</tr>
</table>
### Training memory
We perform various tests and there is no doubt that mmdetection is more memory
efficient than Detectron, and the main cause is the deep learning framework itself, not our efforts.
Besides, Caffe2 and PyTorch have different apis to obtain memory usage
whose implementation is not exactly the same.
`nvidia-smi` shows a larger memory usage for both detectron and mmdetection, e.g.,
we observe a much higher memory usage when we train Mask R-CNN with 2 images per GPU using detectron (10.6G) and mmdetection (9.3G), which is obviously more than actually required.
> With mmdetection, we can train R-50 FPN Mask R-CNN with **4** images per GPU (TITAN XP, 12G),
which is a promising result.
================================================
FILE: README.md
================================================
# Environments
- pytorch 0.3.0/0.4.1
- oldest mmdetection
# Reasoning-RCNN
Reasoning-RCNN: Unifying Adaptive Global Reasoning into Large-scale Object Detection (CVPR2019 Oral)
```
# core files
configs/rrcnn/*
mmdet/models/detectors/reasoning_rcnn.py
mmdet/models/bbox_heads/graph_bbox_head.py
```
# SGRN
Spatial-Aware Graph Relation Network for Large-Scale Object Detection (CVPR2019)
```
# core files
configs/coco_sgrb_fpn_ms.py
mmdet/models/detectors/sgrn.py
mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py
```
================================================
FILE: TECHNICAL_DETAILS.md
================================================
## Overview
In this section, we will introduce the main units of training a detector:
data loading, model and iteration pipeline.
## Data loading
Following typical conventions, we use `Dataset` and `DataLoader` for data loading
with multiple workers. `Dataset` returns a dict of data items corresponding
the arguments of models' forward method.
Since the data in object detection may not be the same size (image size, gt bbox size, etc.),
we introduce a new `DataContainer` type in `mmcv` to help collect and distribute
data of different size.
See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
## Model
In mmdetection, model components are basically categorized as 4 types.
- backbone: usually a FCN network to extract feature maps, e.g., ResNet.
- neck: the part between backbones and heads, e.g., FPN, ASPP.
- head: the part for specific tasks, e.g., bbox prediction and mask prediction.
- roi extractor: the part for extracting features from feature maps, e.g., RoI Align.
We also write implement some general detection pipelines with the above components,
such as `SingleStageDetector` and `TwoStageDetector`.
### Build a model with basic components
Following some basic pipelines (e.g., two-stage detectors), the model structure
can be customized through config files with no pains.
If we want to implement some new components, e.g, the path aggregation
FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.
1. create a new file in `mmdet/models/necks/pafpn.py`.
```python
class PAFPN(nn.Module):
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
add_extra_convs=False):
pass
def forward(self, inputs):
# implementation is ignored
pass
```
2. modify the config file from
```python
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5)
```
to
```python
neck=dict(
type='PAFPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5)
```
We will release more components (backbones, necks, heads) for research purpose.
### Write a new model
To write a new detection pipeline, you need to inherit from `BaseDetector`,
which defines the following abstract methods.
- `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).
- `forward_train()`: forward method of the training mode
- `simple_test()`: single scale testing without augmentation
- `aug_test()`: testing with augmentation (multi-scale, flip, etc.)
[TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)
is a good example which shows how to do that.
## Iteration pipeline
We adopt distributed training for both single machine and multiple machines.
Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.
Each process keeps an isolated model, data loader, and optimizer.
Model parameters are only synchronized once at the begining.
After a forward and backward pass, gradients will be allreduced among all GPUs,
and the optimizer will update model parameters.
Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.
================================================
FILE: compile.sh
================================================
#!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
echo "Building roi align op..."
cd mmdet/ops/roi_align
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building roi pool op..."
cd ../roi_pool
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building nms op..."
cd ../nms
make clean
make PYTHON=${PYTHON}
echo "Building dcn..."
cd ../dcn
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
================================================
FILE: configs/ade_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=446,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'train.json',
img_prefix=data_root + 'train/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/coco_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'images/train2017/',
img_scale=[(1333,600),(1333,1000)],
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/coco_sgrb_fpn_ms.py
================================================
# model settings
model = dict(
type='ThreeStageGraphDetector',
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[dict(
type='SharedFCRoIHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False),
dict(
type='ConvFCRoIHeadEnhance',
enhance_channels=256,
num_shared_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False)
],
graph_convolution=dict(
latent_graph_channel=256,
n_kernels_gc=8,
n_graph_node=512,
neigh_size=32)
)
# model training and testing settings
train_cfg = dict(
rpn=dict(
pos_fraction=0.5,
pos_balance_sampling=False,
neg_pos_ub=256,
allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256,
pos_iou_thr=0.7,
neg_iou_thr=0.3,
neg_balance_thr=0,
min_pos_iou=0.3,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
crowd_thr=1.1,
roi_batch_size=512,
add_gt_as_proposals=False,
pos_fraction=0.25,
pos_balance_sampling=False,
neg_pos_ub=512,
neg_balance_thr=0,
min_pos_iou=0.5,
pos_weight=-1,
debug=False),
rcnn2=dict(
pos_iou_thr=0.6,
neg_iou_thr=0.6,
crowd_thr=1.1,
roi_batch_size=512,
add_gt_as_proposals=False,
pos_fraction=0.25,
pos_balance_sampling=False,
neg_pos_ub=512,
neg_balance_thr=0,
min_pos_iou=0.5,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(score_thr=0.001, max_per_img=150, nms_thr=0.55))
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/xuhang/data/detection_data/COCO2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=1,
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=[(1333, 600),(1333, 1000)],
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[4])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 5
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
#load_from = None
#resume_from = './exps/coco_three_stage_graph_fpn_ms/epoch_12.pth'
load_from = './tools/transfer_domian/model/vg_transfer_coco.pth'
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/hkrm/ade_faster_rcnn_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='HKRMRCNN',
pretrained='modelzoo://resnet50',
adja_gt='/home/cyan/code/mmdetection/tools/graph/new_ade_graph_a.pkl',
adjr_gt='/home/cyan/code/mmdetection/tools/graph/new_ade_graph_r.pkl',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='GraphBBoxHead',
roi_feat_size=7,
num_shared_fcs=2,
in_channels=256,
fc_out_channels=1024,
num_classes=446,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
num_attr_conv=4,
num_rela_conv=4,
num_spat_conv=2,
with_attr=True,
with_rela=True,
with_spat=True,))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=256,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'train.json',
img_prefix=data_root + 'train/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/hkrm/coco_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='HKRMRCNN',
adja_gt= './graph/new_ade_graph_a.pkl',
adjr_gt= './graph/new_ade_graph_r.pkl',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='GraphBBoxHead',
roi_feat_size=7,
num_shared_fcs=2,
in_channels=256,
fc_out_channels=1024,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
num_attr_conv=4,
num_rela_conv=4,
num_spat_conv=2,
with_attr=True,
with_rela=True,
with_spat=True,))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=256,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'images/train2017/',
img_scale=[(1333,600),(1333,1000)],
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
img_scale=(800, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/hkrm/vg_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='HKRMRCNN',
#pretrained='modelzoo://resnet101',
adja_gt='./graph/new_vg_graph_a.pkl',
adjr_gt='./graph/new_vg_graph_r.pkl',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='GraphBBoxHead',
roi_feat_size=7,
num_shared_fcs=2,
in_channels=256,
fc_out_channels=1024,
num_classes=1001,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
num_attr_conv=4,
num_rela_conv=4,
num_spat_conv=2,
with_attr=True,
with_rela=True,
with_spat=True, ))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=512,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'train.json',
img_prefix=data_root + 'train/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val.json',
img_prefix=data_root + 'VG/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hkrm_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=21,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset', # to avoid reloading datasets frequently
times=3,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
img_scale=(1000, 600),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True)),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(1000, 600),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(1000, 600),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(policy='step', step=[3]) # actual epoch = 3 * 3 = 9
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 4 # actual epoch = 4 * 3 = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x_voc0712'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/pascal_voc/ssd300_voc.py
================================================
# model settings
input_size = 300
model = dict(
type='SingleStageDetector',
pretrained='open-mmlab://vgg16_caffe',
backbone=dict(
type='SSDVGG',
input_size=input_size,
depth=16,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20),
neck=None,
bbox_head=dict(
type='SSDHead',
input_size=input_size,
in_channels=(512, 1024, 512, 256, 256, 256),
num_classes=21,
anchor_strides=(8, 16, 32, 64, 100, 300),
basesize_ratio_range=(0.2, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False)
test_cfg = dict(
nms=dict(type='nms', iou_thr=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200)
# model training and testing settings
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
data = dict(
imgs_per_gpu=4,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=10,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
img_scale=(300, 300),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=True,
test_mode=False,
extra_aug=dict(
photo_metric_distortion=dict(
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
expand=dict(
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
random_crop=dict(
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
resize_keep_ratio=False)),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(300, 300),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(300, 300),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 20])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd300_voc'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/pascal_voc/ssd512_voc.py
================================================
# model settings
input_size = 512
model = dict(
type='SingleStageDetector',
pretrained='open-mmlab://vgg16_caffe',
backbone=dict(
type='SSDVGG',
input_size=input_size,
depth=16,
with_last_pool=False,
ceil_mode=True,
out_indices=(3, 4),
out_feature_indices=(22, 34),
l2_norm_scale=20),
neck=None,
bbox_head=dict(
type='SSDHead',
input_size=input_size,
in_channels=(512, 1024, 512, 256, 256, 256, 256),
num_classes=21,
anchor_strides=(8, 16, 32, 64, 128, 256, 512),
basesize_ratio_range=(0.15, 0.9),
anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]),
target_means=(.0, .0, .0, .0),
target_stds=(0.1, 0.1, 0.2, 0.2)))
cudnn_benchmark = True
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False)
test_cfg = dict(
nms=dict(type='nms', iou_thr=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200)
# model training and testing settings
# dataset settings
dataset_type = 'VOCDataset'
data_root = 'data/VOCdevkit/'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
data = dict(
imgs_per_gpu=4,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=10,
dataset=dict(
type=dataset_type,
ann_file=[
data_root + 'VOC2007/ImageSets/Main/trainval.txt',
data_root + 'VOC2012/ImageSets/Main/trainval.txt'
],
img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
img_scale=(512, 512),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=True,
test_mode=False,
extra_aug=dict(
photo_metric_distortion=dict(
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18),
expand=dict(
mean=img_norm_cfg['mean'],
to_rgb=img_norm_cfg['to_rgb'],
ratio_range=(1, 4)),
random_crop=dict(
min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
resize_keep_ratio=False)),
val=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(512, 512),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False),
test=dict(
type=dataset_type,
ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
img_prefix=data_root + 'VOC2007/',
img_scale=(512, 512),
img_norm_cfg=img_norm_cfg,
size_divisor=None,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True,
resize_keep_ratio=False))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
optimizer_config = dict()
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 20])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ssd512_voc'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/rrcnn/ade_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='ReasoningRCNN',
num_stages=2,
adj_gt='./graph/new_ade_graph_r.pkl',
graph_out_channels=256,
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1024,
roi_feat_size=1,
num_classes=446,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1280,
roi_feat_size=1,
num_classes=446,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.6,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/ADE_new/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'train.json',
img_prefix=data_root + 'train/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(800, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/ade_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/rrcnn/coco_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='ReasoningRCNN',
num_stages=2,
adj_gt='./graph/new_COCO_graph_r.pkl', # relation graph: './graph/new_ade_graph_r.pkl'
graph_out_channels=256,
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1024,
roi_feat_size=1,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1280,
roi_feat_size=1,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.6,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/coco2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'images/train2017/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/coco_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/rrcnn/vg_reasoning_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='ReasoningRCNN',
num_stages=2,
adj_gt='./graph/new_COCO_graph_r.pkl',
graph_out_channels=256,
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1024,
roi_feat_size=1,
num_classes=3001,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=False,
in_channels=1280,
roi_feat_size=1,
num_classes=3001,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSamplerFixnum',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.6,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=True)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'images/train2017/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'images/val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val_big.json',
img_prefix=data_root + 'VG',
img_scale=(800, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rrcnn_r101_fpn_1x'
load_from = './work_dirs/vg_fpn_r101/pretrained_model.pth'
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/vg_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1001,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'train.json',
img_prefix=data_root + 'train/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'val/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val.json',
img_prefix=data_root + 'VG/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: configs/vgbig_faster_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1001,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/home/cyan/data/Detection/vg/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/train_big.json',
img_prefix=data_root + 'VG/',
img_scale=(1333, 200),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val_big.json',
img_prefix=data_root + 'VG/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/val_big.json',
img_prefix=data_root + 'VG/',
img_scale=(1333, 400),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=True,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdet/__init__.py
================================================
from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
================================================
FILE: mmdet/apis/__init__.py
================================================
from .env import init_dist, get_root_logger, set_random_seed
from .train import train_detector
from .inference import inference_detector, show_result
__all__ = [
'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
'inference_detector', 'show_result'
]
================================================
FILE: mmdet/apis/env.py
================================================
import logging
import os
import random
import numpy as np
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from mmcv.runner import get_dist_info
def init_dist(launcher, backend='nccl', **kwargs):
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'mpi':
_init_dist_mpi(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError('Invalid launcher type: {}'.format(launcher))
def _init_dist_pytorch(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus)
dist.init_process_group(backend=backend, **kwargs)
def _init_dist_mpi(backend, **kwargs):
raise NotImplementedError
def _init_dist_slurm(backend, **kwargs):
raise NotImplementedError
def set_random_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def get_root_logger(log_level=logging.INFO):
logger = logging.getLogger()
if not logger.hasHandlers():
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s',
level=log_level)
rank, _ = get_dist_info()
if rank != 0:
logger.setLevel('ERROR')
return logger
================================================
FILE: mmdet/apis/inference.py
================================================
import mmcv
import numpy as np
import torch
from mmdet.datasets import to_tensor
from mmdet.datasets.transforms import ImageTransform
from mmdet.core import get_classes
def _prepare_data(img, img_transform, cfg, device):
ori_shape = img.shape
img, img_shape, pad_shape, scale_factor = img_transform(
img, scale=cfg.data.test.img_scale)
img = to_tensor(img).to(device).unsqueeze(0)
img_meta = [
dict(
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=False)
]
return dict(img=[img], img_meta=[img_meta])
def _inference_single(model, img, img_transform, cfg, device):
img = mmcv.imread(img)
data = _prepare_data(img, img_transform, cfg, device)
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
return result
def _inference_generator(model, imgs, img_transform, cfg, device):
for img in imgs:
yield _inference_single(model, img, img_transform, cfg, device)
def inference_detector(model, imgs, cfg, device='cuda:0'):
img_transform = ImageTransform(
size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg)
model = model.to(device)
model.eval()
if not isinstance(imgs, list):
return _inference_single(model, imgs, img_transform, cfg, device)
else:
return _inference_generator(model, imgs, img_transform, cfg, device)
def show_result(img, result, dataset='coco', score_thr=0.3):
class_names = get_classes(dataset)
labels = [
np.full(bbox.shape[0], i, dtype=np.int32)
for i, bbox in enumerate(result)
]
labels = np.concatenate(labels)
bboxes = np.vstack(result)
img = mmcv.imread(img)
mmcv.imshow_det_bboxes(
img.copy(),
bboxes,
labels,
class_names=class_names,
score_thr=score_thr)
================================================
FILE: mmdet/apis/train.py
================================================
from __future__ import division
from collections import OrderedDict
import torch
from mmcv.runner import Runner, DistSamplerSeedHook
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmdet.core import (DistOptimizerHook, DistEvalmAPHook,
CocoDistEvalRecallHook, CocoDistEvalmAPHook)
from mmdet.datasets import build_dataloader
from mmdet.models import RPN
from .env import get_root_logger
def parse_losses(losses):
log_vars = OrderedDict()
for loss_name, loss_value in losses.items():
if isinstance(loss_value, torch.Tensor):
log_vars[loss_name] = loss_value.mean()
elif isinstance(loss_value, list):
log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
else:
raise TypeError(
'{} is not a tensor or list of tensors'.format(loss_name))
loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
log_vars['loss'] = loss
for name in log_vars:
log_vars[name] = log_vars[name].item()
return loss, log_vars
def batch_processor(model, data, train_mode):
losses = model(**data)
loss, log_vars = parse_losses(losses)
outputs = dict(
loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
return outputs
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
logger=None):
if logger is None:
logger = get_root_logger(cfg.log_level)
# start training
if distributed:
_dist_train(model, dataset, cfg, validate=validate)
else:
_non_dist_train(model, dataset, cfg, validate=validate)
def _dist_train(model, dataset, cfg, validate=False):
# prepare data loaders
data_loaders = [
build_dataloader(
dataset,
cfg.data.imgs_per_gpu,
cfg.data.workers_per_gpu,
dist=True)
]
# put model on gpus
model = MMDistributedDataParallel(model.cuda())
# build runner
runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
cfg.log_level)
# register hooks
optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
runner.register_training_hooks(cfg.lr_config, optimizer_config,
cfg.checkpoint_config, cfg.log_config)
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
if isinstance(model.module, RPN):
# TODO: implement recall hooks for other datasets
runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
else:
if cfg.data.val.type == 'CocoDataset':
runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
else:
runner.register_hook(DistEvalmAPHook(cfg.data.val))
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_train(model, dataset, cfg, validate=False):
# prepare data loaders
data_loaders = [
build_dataloader(
dataset,
cfg.data.imgs_per_gpu,
cfg.data.workers_per_gpu,
cfg.gpus,
dist=False)
]
# put model on gpus
model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
# build runner
runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
cfg.log_level)
runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
cfg.checkpoint_config, cfg.log_config)
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
================================================
FILE: mmdet/core/__init__.py
================================================
from .anchor import * # noqa: F401, F403
from .bbox import * # noqa: F401, F403
from .mask import * # noqa: F401, F403
from .loss import * # noqa: F401, F403
from .evaluation import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
================================================
FILE: mmdet/core/anchor/__init__.py
================================================
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target
__all__ = ['AnchorGenerator', 'anchor_target']
================================================
FILE: mmdet/core/anchor/anchor_generator.py
================================================
import torch
class AnchorGenerator(object):
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
self.base_size = base_size
self.scales = torch.Tensor(scales)
self.ratios = torch.Tensor(ratios)
self.scale_major = scale_major
self.ctr = ctr
self.base_anchors = self.gen_base_anchors()
@property
def num_base_anchors(self):
return self.base_anchors.size(0)
def gen_base_anchors(self):
w = self.base_size
h = self.base_size
if self.ctr is None:
x_ctr = 0.5 * (w - 1)
y_ctr = 0.5 * (h - 1)
else:
x_ctr, y_ctr = self.ctr
h_ratios = torch.sqrt(self.ratios)
w_ratios = 1 / h_ratios
if self.scale_major:
ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
else:
ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
base_anchors = torch.stack(
[
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
],
dim=-1).round()
return base_anchors
def _meshgrid(self, x, y, row_major=True):
xx = x.repeat(len(y))
yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
if row_major:
return xx, yy
else:
return yy, xx
def grid_anchors(self, featmap_size, stride=16, device='cuda'):
base_anchors = self.base_anchors.to(device)
feat_h, feat_w = featmap_size
shift_x = torch.arange(0, feat_w, device=device) * stride
shift_y = torch.arange(0, feat_h, device=device) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
shifts = shifts.type_as(base_anchors)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)
all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.view(-1, 4)
# first A rows correspond to A anchors of (0, 0) in feature map,
# then (0, 1), (0, 2), ...
return all_anchors
def valid_flags(self, featmap_size, valid_size, device='cuda'):
feat_h, feat_w = featmap_size
valid_h, valid_w = valid_size
assert valid_h <= feat_h and valid_w <= feat_w
valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
valid_x[:valid_w] = 1
valid_y[:valid_h] = 1
valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
valid = valid_xx & valid_yy
valid = valid[:, None].expand(
valid.size(0), self.num_base_anchors).contiguous().view(-1)
return valid
================================================
FILE: mmdet/core/anchor/anchor_target.py
================================================
import torch
from ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta
from ..utils import multi_apply
def anchor_target(anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
target_means,
target_stds,
cfg,
gt_labels_list=None,
label_channels=1,
sampling=True,
unmap_outputs=True):
"""Compute regression and classification targets for anchors.
Args:
anchor_list (list[list]): Multi level anchors of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns:
tuple
"""
num_imgs = len(img_metas)
assert len(anchor_list) == len(valid_flag_list) == num_imgs
# anchor number of multi levels
num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
# concat all level anchors and flags to a single tensor
for i in range(num_imgs):
assert len(anchor_list[i]) == len(valid_flag_list[i])
anchor_list[i] = torch.cat(anchor_list[i])
valid_flag_list[i] = torch.cat(valid_flag_list[i])
# compute targets for each image
if gt_labels_list is None:
gt_labels_list = [None for _ in range(num_imgs)]
(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
pos_inds_list, neg_inds_list) = multi_apply(
anchor_target_single,
anchor_list,
valid_flag_list,
gt_bboxes_list,
gt_labels_list,
img_metas,
target_means=target_means,
target_stds=target_stds,
cfg=cfg,
label_channels=label_channels,
sampling=sampling,
unmap_outputs=unmap_outputs)
# no valid anchors
if any([labels is None for labels in all_labels]):
return None
# sampled anchors of all images
num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
# split targets to a list w.r.t. multiple levels
labels_list = images_to_levels(all_labels, num_level_anchors)
label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
return (labels_list, label_weights_list, bbox_targets_list,
bbox_weights_list, num_total_pos, num_total_neg)
def images_to_levels(target, num_level_anchors):
"""Convert targets by image to targets by feature level.
[target_img0, target_img1] -> [target_level0, target_level1, ...]
"""
target = torch.stack(target, 0)
level_targets = []
start = 0
for n in num_level_anchors:
end = start + n
level_targets.append(target[:, start:end].squeeze(0))
start = end
return level_targets
def anchor_target_single(flat_anchors,
valid_flags,
gt_bboxes,
gt_labels,
img_meta,
target_means,
target_stds,
cfg,
label_channels=1,
sampling=True,
unmap_outputs=True):
inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
img_meta['img_shape'][:2],
cfg.allowed_border)
if not inside_flags.any():
return (None, ) * 6
# assign gt and sample anchors
anchors = flat_anchors[inside_flags, :]
if sampling:
assign_result, sampling_result = assign_and_sample(
anchors, gt_bboxes, None, None, cfg)
else:
bbox_assigner = build_assigner(cfg.assigner)
assign_result = bbox_assigner.assign(anchors, gt_bboxes, None,
gt_labels)
bbox_sampler = PseudoSampler()
sampling_result = bbox_sampler.sample(assign_result, anchors,
gt_bboxes)
num_valid_anchors = anchors.shape[0]
bbox_targets = torch.zeros_like(anchors)
bbox_weights = torch.zeros_like(anchors)
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
if len(pos_inds) > 0:
pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,
sampling_result.pos_gt_bboxes,
target_means, target_stds)
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
if gt_labels is None:
labels[pos_inds] = 1
else:
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
if cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0
else:
label_weights[pos_inds] = cfg.pos_weight
if len(neg_inds) > 0:
label_weights[neg_inds] = 1.0
# map up to original set of anchors
if unmap_outputs:
num_total_anchors = flat_anchors.size(0)
labels = unmap(labels, num_total_anchors, inside_flags)
label_weights = unmap(label_weights, num_total_anchors, inside_flags)
if label_channels > 1:
labels, label_weights = expand_binary_labels(
labels, label_weights, label_channels)
bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds)
def expand_binary_labels(labels, label_weights, label_channels):
bin_labels = labels.new_full((labels.size(0), label_channels), 0)
inds = torch.nonzero(labels >= 1).squeeze()
if inds.numel() > 0:
bin_labels[inds, labels[inds] - 1] = 1
bin_label_weights = label_weights.view(-1, 1).expand(
label_weights.size(0), label_channels)
return bin_labels, bin_label_weights
def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0:
inside_flags = valid_flags & \
(flat_anchors[:, 0] >= -allowed_border) & \
(flat_anchors[:, 1] >= -allowed_border) & \
(flat_anchors[:, 2] < img_w + allowed_border) & \
(flat_anchors[:, 3] < img_h + allowed_border)
else:
inside_flags = valid_flags
return inside_flags
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 1:
ret = data.new_full((count, ), fill)
ret[inds] = data
else:
new_size = (count, ) + data.size()[1:]
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
================================================
FILE: mmdet/core/bbox/__init__.py
================================================
from .geometry import bbox_overlaps
from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
CombinedSampler, SamplingResult, RandomSamplerFixnum)
from .assign_sampling import build_assigner, build_sampler, assign_and_sample
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target',
'RandomSamplerFixnum'
]
================================================
FILE: mmdet/core/bbox/assign_sampling.py
================================================
import mmcv
from . import assigners, samplers
def build_assigner(cfg, **kwargs):
if isinstance(cfg, assigners.BaseAssigner):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(
cfg, assigners, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
def build_sampler(cfg, **kwargs):
if isinstance(cfg, samplers.BaseSampler):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(
cfg, samplers, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
bbox_assigner = build_assigner(cfg.assigner)
bbox_sampler = build_sampler(cfg.sampler)
assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
gt_labels)
sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
gt_labels)
return assign_result, sampling_result
================================================
FILE: mmdet/core/bbox/assigners/__init__.py
================================================
from .base_assigner import BaseAssigner
from .max_iou_assigner import MaxIoUAssigner
from .assign_result import AssignResult
__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
================================================
FILE: mmdet/core/bbox/assigners/assign_result.py
================================================
import torch
class AssignResult(object):
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
self.num_gts = num_gts
self.gt_inds = gt_inds
self.max_overlaps = max_overlaps
self.labels = labels
def add_gt_(self, gt_labels):
self_inds = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
self.gt_inds = torch.cat([self_inds, self.gt_inds])
self.max_overlaps = torch.cat(
[self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
if self.labels is not None:
self.labels = torch.cat([gt_labels, self.labels])
================================================
FILE: mmdet/core/bbox/assigners/base_assigner.py
================================================
from abc import ABCMeta, abstractmethod
class BaseAssigner(metaclass=ABCMeta):
@abstractmethod
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
pass
================================================
FILE: mmdet/core/bbox/assigners/max_iou_assigner.py
================================================
import torch
from .base_assigner import BaseAssigner
from .assign_result import AssignResult
from ..geometry import bbox_overlaps
class MaxIoUAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
raise ValueError('No gt or bboxes')
bboxes = bboxes[:, :4]
overlaps = bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
ignore_overlaps = bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
ignore_bboxes_inds = torch.nonzero(
ignore_max_overlaps > self.ignore_iof_thr).squeeze()
if ignore_bboxes_inds.numel() > 0:
overlaps[ignore_bboxes_inds[:, 0], :] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
def assign_wrt_overlaps(self, overlaps, gt_labels=None):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if overlaps.numel() == 0:
raise ValueError('No gt or proposals')
num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
assigned_gt_inds = overlaps.new_full(
(num_bboxes, ), -1, dtype=torch.long)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below
if isinstance(self.neg_iou_thr, float):
assigned_gt_inds[(max_overlaps >= 0)
& (max_overlaps < self.neg_iou_thr)] = 0
elif isinstance(self.neg_iou_thr, tuple):
assert len(self.neg_iou_thr) == 2
assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
& (max_overlaps < self.neg_iou_thr[1])] = 0
# 3. assign positive: above positive IoU threshold
pos_inds = max_overlaps >= self.pos_iou_thr
assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
# 4. assign fg: for each gt, proposals with highest IoU
for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all:
max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1
else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
================================================
FILE: mmdet/core/bbox/bbox_target.py
================================================
import torch
from .transforms import bbox2delta
from ..utils import multi_apply
def bbox_target(pos_bboxes_list,
neg_bboxes_list,
pos_gt_bboxes_list,
pos_gt_labels_list,
cfg,
reg_classes=1,
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
concat=True):
labels, label_weights, bbox_targets, bbox_weights = multi_apply(
bbox_target_single,
pos_bboxes_list,
neg_bboxes_list,
pos_gt_bboxes_list,
pos_gt_labels_list,
cfg=cfg,
reg_classes=reg_classes,
target_means=target_means,
target_stds=target_stds)
if concat:
labels = torch.cat(labels, 0)
label_weights = torch.cat(label_weights, 0)
bbox_targets = torch.cat(bbox_targets, 0)
bbox_weights = torch.cat(bbox_weights, 0)
return labels, label_weights, bbox_targets, bbox_weights
def bbox_target_single(pos_bboxes,
neg_bboxes,
pos_gt_bboxes,
pos_gt_labels,
cfg,
reg_classes=1,
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]):
num_pos = pos_bboxes.size(0)
num_neg = neg_bboxes.size(0)
num_samples = num_pos + num_neg
labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
label_weights = pos_bboxes.new_zeros(num_samples)
bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
if num_pos > 0:
labels[:num_pos] = pos_gt_labels
pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
label_weights[:num_pos] = pos_weight
pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
target_stds)
bbox_targets[:num_pos, :] = pos_bbox_targets
bbox_weights[:num_pos, :] = 1
if num_neg > 0:
label_weights[-num_neg:] = 1.0
if reg_classes > 1:
bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
labels, reg_classes)
return labels, label_weights, bbox_targets, bbox_weights
def expand_target(bbox_targets, bbox_weights, labels, num_classes):
bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
4 * num_classes))
bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
4 * num_classes))
for i in torch.nonzero(labels > 0).squeeze(-1):
start, end = labels[i] * 4, (labels[i] + 1) * 4
bbox_targets_expand[i, start:end] = bbox_targets[i, :]
bbox_weights_expand[i, start:end] = bbox_weights[i, :]
return bbox_targets_expand, bbox_weights_expand
================================================
FILE: mmdet/core/bbox/geometry.py
================================================
import torch
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
"""Calculate overlap between two set of bboxes.
If ``is_aligned`` is ``False``, then calculate the ious between each bbox
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (m, 4)
bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
must be equal.
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
Returns:
ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
"""
assert mode in ['iou', 'iof']
rows = bboxes1.size(0)
cols = bboxes2.size(0)
if is_aligned:
assert rows == cols
if rows * cols == 0:
return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
if is_aligned:
lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
wh = (rb - lt + 1).clamp(min=0) # [rows, 2]
overlap = wh[:, 0] * wh[:, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
bboxes1[:, 3] - bboxes1[:, 1] + 1)
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1)
ious = overlap / (area1 + area2 - overlap)
else:
ious = overlap / area1
else:
lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2]
overlap = wh[:, :, 0] * wh[:, :, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
bboxes1[:, 3] - bboxes1[:, 1] + 1)
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1)
ious = overlap / (area1[:, None] + area2 - overlap)
else:
ious = overlap / (area1[:, None])
return ious
================================================
FILE: mmdet/core/bbox/samplers/__init__.py
================================================
from .base_sampler import BaseSampler
from .pseudo_sampler import PseudoSampler
from .random_sampler import RandomSampler
from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
from .iou_balanced_neg_sampler import IoUBalancedNegSampler
from .combined_sampler import CombinedSampler
from .ohem_sampler import OHEMSampler
from .sampling_result import SamplingResult
from .random_sampler_fixnum import RandomSamplerFixnum
__all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'OHEMSampler', 'SamplingResult', 'RandomSamplerFixnum'
]
================================================
FILE: mmdet/core/bbox/samplers/base_sampler.py
================================================
from abc import ABCMeta, abstractmethod
import torch
from .sampling_result import SamplingResult
class BaseSampler(metaclass=ABCMeta):
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
self.num = num
self.pos_fraction = pos_fraction
self.neg_pos_ub = neg_pos_ub
self.add_gt_as_proposals = add_gt_as_proposals
self.pos_sampler = self
self.neg_sampler = self
@abstractmethod
def _sample_pos(self, assign_result, num_expected, **kwargs):
pass
@abstractmethod
def _sample_neg(self, assign_result, num_expected, **kwargs):
pass
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
bboxes = bboxes[:, :4]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
if self.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
neg_inds = neg_inds.unique()
return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
================================================
FILE: mmdet/core/bbox/samplers/combined_sampler.py
================================================
from .base_sampler import BaseSampler
from ..assign_sampling import build_sampler
class CombinedSampler(BaseSampler):
def __init__(self, pos_sampler, neg_sampler, **kwargs):
super(CombinedSampler, self).__init__(**kwargs)
self.pos_sampler = build_sampler(pos_sampler, **kwargs)
self.neg_sampler = build_sampler(neg_sampler, **kwargs)
def _sample_pos(self, **kwargs):
raise NotImplementedError
def _sample_neg(self, **kwargs):
raise NotImplementedError
================================================
FILE: mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
================================================
import numpy as np
import torch
from .random_sampler import RandomSampler
class InstanceBalancedPosSampler(RandomSampler):
def _sample_pos(self, assign_result, num_expected, **kwargs):
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
num_gts = len(unique_gt_inds)
num_per_gt = int(round(num_expected / float(num_gts)) + 1)
sampled_inds = []
for i in unique_gt_inds:
inds = torch.nonzero(assign_result.gt_inds == i.item())
if inds.numel() != 0:
inds = inds.squeeze(1)
else:
continue
if len(inds) > num_per_gt:
inds = self.random_choice(inds, num_per_gt)
sampled_inds.append(inds)
sampled_inds = torch.cat(sampled_inds)
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(
list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
if len(extra_inds) > num_extra:
extra_inds = self.random_choice(extra_inds, num_extra)
extra_inds = torch.from_numpy(extra_inds).to(
assign_result.gt_inds.device).long()
sampled_inds = torch.cat([sampled_inds, extra_inds])
elif len(sampled_inds) > num_expected:
sampled_inds = self.random_choice(sampled_inds, num_expected)
return sampled_inds
================================================
FILE: mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
================================================
import numpy as np
import torch
from .random_sampler import RandomSampler
class IoUBalancedNegSampler(RandomSampler):
def __init__(self,
num,
pos_fraction,
hard_thr=0.1,
hard_fraction=0.5,
**kwargs):
super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
**kwargs)
assert hard_thr > 0
assert 0 < hard_fraction < 1
self.hard_thr = hard_thr
self.hard_fraction = hard_fraction
def _sample_neg(self, assign_result, num_expected, **kwargs):
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
max_overlaps = assign_result.max_overlaps.cpu().numpy()
# balance sampling for negative samples
neg_set = set(neg_inds.cpu().numpy())
easy_set = set(
np.where(
np.logical_and(max_overlaps >= 0,
max_overlaps < self.hard_thr))[0])
hard_set = set(np.where(max_overlaps >= self.hard_thr)[0])
easy_neg_inds = list(easy_set & neg_set)
hard_neg_inds = list(hard_set & neg_set)
num_expected_hard = int(num_expected * self.hard_fraction)
if len(hard_neg_inds) > num_expected_hard:
sampled_hard_inds = self.random_choice(hard_neg_inds,
num_expected_hard)
else:
sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)
num_expected_easy = num_expected - len(sampled_hard_inds)
if len(easy_neg_inds) > num_expected_easy:
sampled_easy_inds = self.random_choice(easy_neg_inds,
num_expected_easy)
else:
sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)
sampled_inds = np.concatenate((sampled_easy_inds,
sampled_hard_inds))
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(list(neg_set - set(sampled_inds)))
if len(extra_inds) > num_extra:
extra_inds = self.random_choice(extra_inds, num_extra)
sampled_inds = np.concatenate((sampled_inds, extra_inds))
sampled_inds = torch.from_numpy(sampled_inds).long().to(
assign_result.gt_inds.device)
return sampled_inds
================================================
FILE: mmdet/core/bbox/samplers/ohem_sampler.py
================================================
import torch
from .base_sampler import BaseSampler
from ..transforms import bbox2roi
class OHEMSampler(BaseSampler):
def __init__(self,
num,
pos_fraction,
context,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
self.bbox_roi_extractor = context.bbox_roi_extractor
self.bbox_head = context.bbox_head
def hard_mining(self, inds, num_expected, bboxes, labels, feats):
with torch.no_grad():
rois = bbox2roi([bboxes])
bbox_feats = self.bbox_roi_extractor(
feats[:self.bbox_roi_extractor.num_inputs], rois)
cls_score, _ = self.bbox_head(bbox_feats)
loss = self.bbox_head.loss(
cls_score=cls_score,
bbox_pred=None,
labels=labels,
label_weights=cls_score.new_ones(cls_score.size(0)),
bbox_targets=None,
bbox_weights=None,
reduce=False)['loss_cls']
_, topk_loss_inds = loss.topk(num_expected)
return inds[topk_loss_inds]
def _sample_pos(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard positive samples
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
assign_result.labels[pos_inds], feats)
def _sample_neg(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard negative samples
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
assign_result.labels[neg_inds], feats)
================================================
FILE: mmdet/core/bbox/samplers/pseudo_sampler.py
================================================
import torch
from .base_sampler import BaseSampler
from .sampling_result import SamplingResult
class PseudoSampler(BaseSampler):
def __init__(self, **kwargs):
pass
def _sample_pos(self, **kwargs):
raise NotImplementedError
def _sample_neg(self, **kwargs):
raise NotImplementedError
def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
pos_inds = torch.nonzero(
assign_result.gt_inds > 0).squeeze(-1).unique()
neg_inds = torch.nonzero(
assign_result.gt_inds == 0).squeeze(-1).unique()
gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
return sampling_result
================================================
FILE: mmdet/core/bbox/samplers/random_sampler.py
================================================
import numpy as np
import torch
from .base_sampler import BaseSampler
class RandomSampler(BaseSampler):
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
@staticmethod
def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use
numpy to randperm the indices.
"""
assert len(gallery) >= num
if isinstance(gallery, list):
gallery = np.array(gallery)
cands = np.arange(len(gallery))
np.random.shuffle(cands)
rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
return gallery[rand_inds]
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
"""Randomly sample some negative samples."""
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.random_choice(neg_inds, num_expected)
================================================
FILE: mmdet/core/bbox/samplers/random_sampler_fixnum.py
================================================
import numpy as np
import torch
from .base_sampler import BaseSampler
from .sampling_result import SamplingResult
class RandomSamplerFixnum(BaseSampler):
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(RandomSamplerFixnum, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
@staticmethod
def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use
numpy to randperm the indices.
"""
assert len(gallery) >= num
if isinstance(gallery, list):
gallery = np.array(gallery)
cands = np.arange(len(gallery))
np.random.shuffle(cands)
rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
return gallery[rand_inds]
# def _sample_pos(self, assign_result, num_expected, **kwargs):
# """Randomly sample some positive samples."""
# pos_inds = torch.nonzero(assign_result.gt_inds > 0)
# if pos_inds.numel() != 0:
# pos_inds = pos_inds.squeeze(1)
# if pos_inds.numel() <= num_expected:
# return pos_inds
# else:
# return self.random_choice(pos_inds, num_expected)
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg
"""
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
repeat_ = num_expected // pos_inds.numel()
return torch.cat((pos_inds.repeat(repeat_), self.random_choice(pos_inds, num_expected % pos_inds.numel())))
else:
return self.random_choice(pos_inds, num_expected)
# def _sample_neg(self, assign_result, num_expected, **kwargs):
# """Randomly sample some negative samples."""
# neg_inds = torch.nonzero(assign_result.gt_inds == 0)
# if neg_inds.numel() != 0:
# neg_inds = neg_inds.squeeze(1)
# if len(neg_inds) <= num_expected:
# return neg_inds
# else:
# return self.random_choice(neg_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
"""Balance sampling for negative bboxes/anchors.
Negative samples are split into 2 set: hard (balance_thr <= iou <
neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
"""
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
repeat_ = num_expected // neg_inds.numel()
return torch.cat((neg_inds.repeat(repeat_), self.random_choice(neg_inds, num_expected % neg_inds.numel())))
else:
return self.random_choice(neg_inds, num_expected)
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
has_roi_score=False,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
if has_roi_score:
gt_bboxes_new = gt_bboxes.new_ones((gt_bboxes.shape[0], 5))
gt_bboxes_new[:, :4] = gt_bboxes
gt_bboxes = gt_bboxes_new
else:
bboxes = bboxes[:, :4]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
if self.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
# sample pos inds must be fixed
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
# pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
# neg_inds = neg_inds.unique()
return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
================================================
FILE: mmdet/core/bbox/samplers/sampling_result.py
================================================
import torch
class SamplingResult(object):
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
gt_flags):
self.pos_inds = pos_inds
self.neg_inds = neg_inds
self.pos_bboxes = bboxes[pos_inds]
self.neg_bboxes = bboxes[neg_inds]
self.pos_is_gt = gt_flags[pos_inds]
self.num_gts = gt_bboxes.shape[0]
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
if assign_result.labels is not None:
self.pos_gt_labels = assign_result.labels[pos_inds]
else:
self.pos_gt_labels = None
@property
def bboxes(self):
return torch.cat([self.pos_bboxes, self.neg_bboxes])
================================================
FILE: mmdet/core/bbox/transforms.py
================================================
import mmcv
import numpy as np
import torch
def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
assert proposals.size() == gt.size()
proposals = proposals.float()
gt = gt.float()
px = (proposals[..., 0] + proposals[..., 2]) * 0.5
py = (proposals[..., 1] + proposals[..., 3]) * 0.5
pw = proposals[..., 2] - proposals[..., 0] + 1.0
ph = proposals[..., 3] - proposals[..., 1] + 1.0
gx = (gt[..., 0] + gt[..., 2]) * 0.5
gy = (gt[..., 1] + gt[..., 3]) * 0.5
gw = gt[..., 2] - gt[..., 0] + 1.0
gh = gt[..., 3] - gt[..., 1] + 1.0
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = torch.log(gw / pw)
dh = torch.log(gh / ph)
deltas = torch.stack([dx, dy, dw, dh], dim=-1)
means = deltas.new_tensor(means).unsqueeze(0)
stds = deltas.new_tensor(stds).unsqueeze(0)
deltas = deltas.sub_(means).div_(stds)
return deltas
def delta2bbox(rois,
deltas,
means=[0, 0, 0, 0],
stds=[1, 1, 1, 1],
max_shape=None,
wh_ratio_clip=16 / 1000):
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0::4]
dy = denorm_deltas[:, 1::4]
dw = denorm_deltas[:, 2::4]
dh = denorm_deltas[:, 3::4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = dw.clamp(min=-max_ratio, max=max_ratio)
dh = dh.clamp(min=-max_ratio, max=max_ratio)
px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)
ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)
gw = pw * dw.exp()
gh = ph * dh.exp()
gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx
gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy
x1 = gx - gw * 0.5 + 0.5
y1 = gy - gh * 0.5 + 0.5
x2 = gx + gw * 0.5 - 0.5
y2 = gy + gh * 0.5 - 0.5
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1] - 1)
y1 = y1.clamp(min=0, max=max_shape[0] - 1)
x2 = x2.clamp(min=0, max=max_shape[1] - 1)
y2 = y2.clamp(min=0, max=max_shape[0] - 1)
bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
return bboxes
def bbox_flip(bboxes, img_shape):
"""Flip bboxes horizontally.
Args:
bboxes(Tensor or ndarray): Shape (..., 4*k)
img_shape(tuple): Image shape.
Returns:
Same type as `bboxes`: Flipped bboxes.
"""
if isinstance(bboxes, torch.Tensor):
assert bboxes.shape[-1] % 4 == 0
flipped = bboxes.clone()
flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1
flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1
return flipped
elif isinstance(bboxes, np.ndarray):
return mmcv.bbox_flip(bboxes, img_shape)
def bbox_mapping(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from the original image scale to testing scale"""
new_bboxes = bboxes * scale_factor
if flip:
new_bboxes = bbox_flip(new_bboxes, img_shape)
return new_bboxes
def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from testing scale to original image scale"""
new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
new_bboxes = new_bboxes / scale_factor
return new_bboxes
def bbox2roi(bbox_list):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
"""
rois_list = []
for img_id, bboxes in enumerate(bbox_list):
if bboxes.size(0) > 0:
img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
else:
rois = bboxes.new_zeros((0, 5))
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
def roi2bbox(rois):
bbox_list = []
img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
for img_id in img_ids:
inds = (rois[:, 0] == img_id.item())
bbox = rois[inds, 1:]
bbox_list.append(bbox)
return bbox_list
def bbox2result(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
num_classes (int): class number, including background class
Returns:
list(ndarray): bbox results of each class
"""
if bboxes.shape[0] == 0:
return [
np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)
]
else:
bboxes = bboxes.cpu().numpy()
labels = labels.cpu().numpy()
return [bboxes[labels == i, :] for i in range(num_classes - 1)]
================================================
FILE: mmdet/core/evaluation/__init__.py
================================================
from .class_names import (voc_classes, imagenet_det_classes,
imagenet_vid_classes, coco_classes, dataset_aliases,
get_classes)
from .coco_utils import coco_eval, fast_eval_recall, results2json
from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,
CocoDistEvalmAPHook)
from .mean_ap import average_precision, eval_map, print_map_summary
from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
plot_iou_recall)
__all__ = [
'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
'plot_num_recall', 'plot_iou_recall'
]
================================================
FILE: mmdet/core/evaluation/bbox_overlaps.py
================================================
import numpy as np
def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
Args:
bboxes1(ndarray): shape (n, 4)
bboxes2(ndarray): shape (k, 4)
mode(str): iou (intersection over union) or iof (intersection
over foreground)
Returns:
ious(ndarray): shape (n, k)
"""
assert mode in ['iou', 'iof']
bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
bboxes1[:, 3] - bboxes1[:, 1] + 1)
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
bboxes2[:, 3] - bboxes2[:, 1] + 1)
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
y_end - y_start + 1, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious
================================================
FILE: mmdet/core/evaluation/class_names.py
================================================
import mmcv
def voc_classes():
return [
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
def imagenet_det_classes():
return [
'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
'whale', 'wine_bottle', 'zebra'
]
def imagenet_vid_classes():
return [
'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
'watercraft', 'whale', 'zebra'
]
def coco_classes():
return [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',
'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',
'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'
]
dataset_aliases = {
'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
'coco': ['coco', 'mscoco', 'ms_coco']
}
def get_classes(dataset):
"""Get class names of a dataset."""
alias2name = {}
for name, aliases in dataset_aliases.items():
for alias in aliases:
alias2name[alias] = name
if mmcv.is_str(dataset):
if dataset in alias2name:
labels = eval(alias2name[dataset] + '_classes()')
else:
raise ValueError('Unrecognized dataset: {}'.format(dataset))
else:
raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
return labels
================================================
FILE: mmdet/core/evaluation/coco_utils.py
================================================
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from .recall import eval_recalls
def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if result_types == ['proposal_fast']:
ar = fast_eval_recall(result_file, coco, np.array(max_dets))
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
img_ids = coco.getImgIds()
for res_type in result_types:
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
def fast_eval_recall(results,
coco,
max_dets,
iou_thrs=np.arange(0.5, 0.96, 0.05)):
if mmcv.is_str(results):
assert results.endswith('.pkl')
results = mmcv.load(results)
elif not isinstance(results, list):
raise TypeError(
'results must be a list of numpy arrays or a filename, not {}'.
format(type(results)))
gt_bboxes = []
img_ids = coco.getImgIds()
for i in range(len(img_ids)):
ann_ids = coco.getAnnIds(imgIds=img_ids[i])
ann_info = coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
ar = recalls.mean(axis=1)
return ar
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def results2json(dataset, results, out_file):
if isinstance(results[0], list):
json_results = det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file)
================================================
FILE: mmdet/core/evaluation/eval_hooks.py
================================================
import os
import os.path as osp
import shutil
import time
import mmcv
import numpy as np
import torch
from mmcv.runner import Hook, obj_from_dict
from mmcv.parallel import scatter, collate
from pycocotools.cocoeval import COCOeval
from torch.utils.data import Dataset
from .coco_utils import results2json, fast_eval_recall
from .mean_ap import eval_map
from mmdet import datasets
class DistEvalHook(Hook):
def __init__(self, dataset, interval=1):
if isinstance(dataset, Dataset):
self.dataset = dataset
elif isinstance(dataset, dict):
self.dataset = obj_from_dict(dataset, datasets,
{'test_mode': True})
else:
raise TypeError(
'dataset must be a Dataset object or a dict, not {}'.format(
type(dataset)))
self.interval = interval
self.lock_dir = None
def _barrier(self, rank, world_size):
"""Due to some issues with `torch.distributed.barrier()`, we have to
implement this ugly barrier function.
"""
if rank == 0:
for i in range(1, world_size):
tmp = osp.join(self.lock_dir, '{}.pkl'.format(i))
while not (osp.exists(tmp)):
time.sleep(1)
for i in range(1, world_size):
tmp = osp.join(self.lock_dir, '{}.pkl'.format(i))
os.remove(tmp)
else:
tmp = osp.join(self.lock_dir, '{}.pkl'.format(rank))
mmcv.dump([], tmp)
while osp.exists(tmp):
time.sleep(1)
def before_run(self, runner):
self.lock_dir = osp.join(runner.work_dir, '.lock_map_hook')
if runner.rank == 0:
if osp.exists(self.lock_dir):
shutil.rmtree(self.lock_dir)
mmcv.mkdir_or_exist(self.lock_dir)
def after_run(self, runner):
if runner.rank == 0:
shutil.rmtree(self.lock_dir)
def after_train_epoch(self, runner):
if not self.every_n_epochs(runner, self.interval):
return
runner.model.eval()
results = [None for _ in range(len(self.dataset))]
prog_bar = mmcv.ProgressBar(len(self.dataset))
for idx in range(runner.rank, len(self.dataset), runner.world_size):
data = self.dataset[idx]
data_gpu = scatter(
collate([data], samples_per_gpu=1),
[torch.cuda.current_device()])[0]
# compute output
with torch.no_grad():
result = runner.model(
return_loss=False, rescale=True, **data_gpu)
results[idx] = result
batch_size = runner.world_size
for _ in range(batch_size):
prog_bar.update()
if runner.rank == 0:
print('\n')
self._barrier(runner.rank, runner.world_size)
for i in range(1, runner.world_size):
tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
tmp_results = mmcv.load(tmp_file)
for idx in range(i, len(results), runner.world_size):
results[idx] = tmp_results[idx]
os.remove(tmp_file)
self.evaluate(runner, results)
else:
tmp_file = osp.join(runner.work_dir,
'temp_{}.pkl'.format(runner.rank))
mmcv.dump(results, tmp_file)
self._barrier(runner.rank, runner.world_size)
self._barrier(runner.rank, runner.world_size)
def evaluate(self):
raise NotImplementedError
class DistEvalmAPHook(DistEvalHook):
def evaluate(self, runner, results):
gt_bboxes = []
gt_labels = []
gt_ignore = [] if self.dataset.with_crowd else None
for i in range(len(self.dataset)):
ann = self.dataset.get_ann_info(i)
bboxes = ann['bboxes']
labels = ann['labels']
if gt_ignore is not None:
ignore = np.concatenate([
np.zeros(bboxes.shape[0], dtype=np.bool),
np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
])
gt_ignore.append(ignore)
bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
labels = np.concatenate([labels, ann['labels_ignore']])
gt_bboxes.append(bboxes)
gt_labels.append(labels)
# If the dataset is VOC2007, then use 11 points mAP evaluation.
if hasattr(self.dataset, 'year') and self.dataset.year == 2007:
ds_name = 'voc07'
else:
ds_name = self.dataset.CLASSES
mean_ap, eval_results = eval_map(
results,
gt_bboxes,
gt_labels,
gt_ignore=gt_ignore,
scale_ranges=None,
iou_thr=0.5,
dataset=ds_name,
print_summary=True)
runne
gitextract_155bakyx/
├── .gitignore
├── .travis.yml
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── compile.sh
├── configs/
│ ├── ade_faster_rcnn_r101_fpn_1x.py
│ ├── coco_faster_rcnn_r101_fpn_1x.py
│ ├── coco_sgrb_fpn_ms.py
│ ├── hkrm/
│ │ ├── ade_faster_rcnn_r50_fpn_1x.py
│ │ ├── coco_faster_rcnn_r101_fpn_1x.py
│ │ └── vg_faster_rcnn_r101_fpn_1x.py
│ ├── pascal_voc/
│ │ ├── faster_rcnn_r50_fpn_1x_voc0712.py
│ │ ├── ssd300_voc.py
│ │ └── ssd512_voc.py
│ ├── rrcnn/
│ │ ├── ade_reasoning_rcnn_r101_fpn_1x.py
│ │ ├── coco_reasoning_rcnn_r101_fpn_1x.py
│ │ └── vg_reasoning_rcnn_r101_fpn_1x.py
│ ├── vg_faster_rcnn_r101_fpn_1x.py
│ └── vgbig_faster_rcnn_r101_fpn_1x.py
├── mmdet/
│ ├── __init__.py
│ ├── apis/
│ │ ├── __init__.py
│ │ ├── env.py
│ │ ├── inference.py
│ │ └── train.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── anchor/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_generator.py
│ │ │ └── anchor_target.py
│ │ ├── bbox/
│ │ │ ├── __init__.py
│ │ │ ├── assign_sampling.py
│ │ │ ├── assigners/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── assign_result.py
│ │ │ │ ├── base_assigner.py
│ │ │ │ └── max_iou_assigner.py
│ │ │ ├── bbox_target.py
│ │ │ ├── geometry.py
│ │ │ ├── samplers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_sampler.py
│ │ │ │ ├── combined_sampler.py
│ │ │ │ ├── instance_balanced_pos_sampler.py
│ │ │ │ ├── iou_balanced_neg_sampler.py
│ │ │ │ ├── ohem_sampler.py
│ │ │ │ ├── pseudo_sampler.py
│ │ │ │ ├── random_sampler.py
│ │ │ │ ├── random_sampler_fixnum.py
│ │ │ │ └── sampling_result.py
│ │ │ └── transforms.py
│ │ ├── evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_overlaps.py
│ │ │ ├── class_names.py
│ │ │ ├── coco_utils.py
│ │ │ ├── eval_hooks.py
│ │ │ ├── mean_ap.py
│ │ │ └── recall.py
│ │ ├── loss/
│ │ │ ├── __init__.py
│ │ │ └── losses.py
│ │ ├── mask/
│ │ │ ├── __init__.py
│ │ │ ├── mask_target.py
│ │ │ └── utils.py
│ │ ├── post_processing/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_nms.py
│ │ │ └── merge_augs.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── dist_utils.py
│ │ └── misc.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── coco.py
│ │ ├── concat_dataset.py
│ │ ├── custom.py
│ │ ├── extra_aug.py
│ │ ├── loader/
│ │ │ ├── __init__.py
│ │ │ ├── build_loader.py
│ │ │ └── sampler.py
│ │ ├── repeat_dataset.py
│ │ ├── transforms.py
│ │ ├── utils.py
│ │ ├── voc.py
│ │ └── xml_style.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── anchor_heads/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_head.py
│ │ │ ├── retina_head.py
│ │ │ ├── rpn_head.py
│ │ │ └── ssd_head.py
│ │ ├── backbones/
│ │ │ ├── __init__.py
│ │ │ ├── resnet.py
│ │ │ ├── resnext.py
│ │ │ └── ssd_vgg.py
│ │ ├── bbox_heads/
│ │ │ ├── __init__.py
│ │ │ ├── bbox_head.py
│ │ │ ├── convfc_bbox_head.py
│ │ │ ├── convfc_bbox_head_enhanced.py
│ │ │ └── graph_bbox_head.py
│ │ ├── builder.py
│ │ ├── detectors/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── cascade_rcnn.py
│ │ │ ├── fast_rcnn.py
│ │ │ ├── faster_rcnn.py
│ │ │ ├── hkrm_rcnn.py
│ │ │ ├── mask_rcnn.py
│ │ │ ├── reasoning_rcnn.py
│ │ │ ├── retinanet.py
│ │ │ ├── rpn.py
│ │ │ ├── sgrn.py
│ │ │ ├── single_stage.py
│ │ │ ├── test_mixins.py
│ │ │ └── two_stage.py
│ │ ├── mask_heads/
│ │ │ ├── __init__.py
│ │ │ └── fcn_mask_head.py
│ │ ├── necks/
│ │ │ ├── __init__.py
│ │ │ └── fpn.py
│ │ ├── registry.py
│ │ ├── roi_extractors/
│ │ │ ├── __init__.py
│ │ │ └── single_level.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── conv_module.py
│ │ ├── norm.py
│ │ └── weight_init.py
│ └── ops/
│ ├── __init__.py
│ ├── dcn/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ ├── deform_conv.py
│ │ │ └── deform_pool.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── deform_conv.py
│ │ │ └── deform_pool.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── deform_conv_cuda.cpp
│ │ ├── deform_conv_cuda_kernel.cu
│ │ ├── deform_pool_cuda.cpp
│ │ └── deform_pool_cuda_kernel.cu
│ ├── nms/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── cpu_nms.pyx
│ │ ├── cpu_soft_nms.pyx
│ │ ├── gpu_nms.hpp
│ │ ├── gpu_nms.pyx
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ └── setup.py
│ ├── roi_align/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── gradcheck.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── roi_align_cuda.cpp
│ │ └── roi_align_kernel.cu
│ └── roi_pool/
│ ├── __init__.py
│ ├── functions/
│ │ ├── __init__.py
│ │ └── roi_pool.py
│ ├── gradcheck.py
│ ├── modules/
│ │ ├── __init__.py
│ │ └── roi_pool.py
│ ├── setup.py
│ └── src/
│ ├── roi_pool_cuda.cpp
│ └── roi_pool_kernel.cu
├── setup.py
└── tools/
├── coco_eval.py
├── convert_datasets/
│ └── pascal_voc.py
├── dist_train.sh
├── graph/
│ ├── new_COCO_graph_a.pkl
│ ├── new_COCO_graph_r.pkl
│ ├── new_ade_graph_a.pkl
│ ├── new_ade_graph_r.pkl
│ ├── new_vg_big_graph_a.pkl
│ ├── new_vg_big_graph_r.pkl
│ ├── new_vg_graph_a.pkl
│ └── new_vg_graph_r.pkl
├── test.py
├── train.py
├── vis_subgraph.py
└── voc_eval.py
SYMBOL INDEX (544 symbols across 97 files)
FILE: mmdet/apis/env.py
function init_dist (line 12) | def init_dist(launcher, backend='nccl', **kwargs):
function _init_dist_pytorch (line 25) | def _init_dist_pytorch(backend, **kwargs):
function _init_dist_mpi (line 33) | def _init_dist_mpi(backend, **kwargs):
function _init_dist_slurm (line 37) | def _init_dist_slurm(backend, **kwargs):
function set_random_seed (line 41) | def set_random_seed(seed):
function get_root_logger (line 48) | def get_root_logger(log_level=logging.INFO):
FILE: mmdet/apis/inference.py
function _prepare_data (line 10) | def _prepare_data(img, img_transform, cfg, device):
function _inference_single (line 26) | def _inference_single(model, img, img_transform, cfg, device):
function _inference_generator (line 34) | def _inference_generator(model, imgs, img_transform, cfg, device):
function inference_detector (line 39) | def inference_detector(model, imgs, cfg, device='cuda:0'):
function show_result (line 51) | def show_result(img, result, dataset='coco', score_thr=0.3):
FILE: mmdet/apis/train.py
function parse_losses (line 16) | def parse_losses(losses):
function batch_processor (line 36) | def batch_processor(model, data, train_mode):
function train_detector (line 46) | def train_detector(model,
function _dist_train (line 62) | def _dist_train(model, dataset, cfg, validate=False):
function _non_dist_train (line 99) | def _non_dist_train(model, dataset, cfg, validate=False):
FILE: mmdet/core/anchor/anchor_generator.py
class AnchorGenerator (line 4) | class AnchorGenerator(object):
method __init__ (line 6) | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=No...
method num_base_anchors (line 15) | def num_base_anchors(self):
method gen_base_anchors (line 18) | def gen_base_anchors(self):
method _meshgrid (line 45) | def _meshgrid(self, x, y, row_major=True):
method grid_anchors (line 53) | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
method valid_flags (line 72) | def valid_flags(self, featmap_size, valid_size, device='cuda'):
FILE: mmdet/core/anchor/anchor_target.py
function anchor_target (line 7) | def anchor_target(anchor_list,
function images_to_levels (line 75) | def images_to_levels(target, num_level_anchors):
function anchor_target_single (line 90) | def anchor_target_single(flat_anchors,
function expand_binary_labels (line 160) | def expand_binary_labels(labels, label_weights, label_channels):
function anchor_inside_flags (line 170) | def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
function unmap (line 184) | def unmap(data, count, inds, fill=0):
FILE: mmdet/core/bbox/assign_sampling.py
function build_assigner (line 6) | def build_assigner(cfg, **kwargs):
function build_sampler (line 17) | def build_sampler(cfg, **kwargs):
function assign_and_sample (line 28) | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
FILE: mmdet/core/bbox/assigners/assign_result.py
class AssignResult (line 4) | class AssignResult(object):
method __init__ (line 6) | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
method add_gt_ (line 12) | def add_gt_(self, gt_labels):
FILE: mmdet/core/bbox/assigners/base_assigner.py
class BaseAssigner (line 4) | class BaseAssigner(metaclass=ABCMeta):
method assign (line 7) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...
FILE: mmdet/core/bbox/assigners/max_iou_assigner.py
class MaxIoUAssigner (line 8) | class MaxIoUAssigner(BaseAssigner):
method __init__ (line 31) | def __init__(self,
method assign (line 43) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...
method assign_wrt_overlaps (line 87) | def assign_wrt_overlaps(self, overlaps, gt_labels=None):
FILE: mmdet/core/bbox/bbox_target.py
function bbox_target (line 7) | def bbox_target(pos_bboxes_list,
function bbox_target_single (line 35) | def bbox_target_single(pos_bboxes,
function expand_target (line 67) | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
FILE: mmdet/core/bbox/geometry.py
function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
FILE: mmdet/core/bbox/samplers/base_sampler.py
class BaseSampler (line 8) | class BaseSampler(metaclass=ABCMeta):
method __init__ (line 10) | def __init__(self,
method _sample_pos (line 24) | def _sample_pos(self, assign_result, num_expected, **kwargs):
method _sample_neg (line 28) | def _sample_neg(self, assign_result, num_expected, **kwargs):
method sample (line 31) | def sample(self,
FILE: mmdet/core/bbox/samplers/combined_sampler.py
class CombinedSampler (line 5) | class CombinedSampler(BaseSampler):
method __init__ (line 7) | def __init__(self, pos_sampler, neg_sampler, **kwargs):
method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
method _sample_neg (line 15) | def _sample_neg(self, **kwargs):
FILE: mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
class InstanceBalancedPosSampler (line 7) | class InstanceBalancedPosSampler(RandomSampler):
method _sample_pos (line 9) | def _sample_pos(self, assign_result, num_expected, **kwargs):
FILE: mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
class IoUBalancedNegSampler (line 7) | class IoUBalancedNegSampler(RandomSampler):
method __init__ (line 9) | def __init__(self,
method _sample_neg (line 22) | def _sample_neg(self, assign_result, num_expected, **kwargs):
FILE: mmdet/core/bbox/samplers/ohem_sampler.py
class OHEMSampler (line 7) | class OHEMSampler(BaseSampler):
method __init__ (line 9) | def __init__(self,
method hard_mining (line 21) | def hard_mining(self, inds, num_expected, bboxes, labels, feats):
method _sample_pos (line 38) | def _sample_pos(self,
method _sample_neg (line 54) | def _sample_neg(self,
FILE: mmdet/core/bbox/samplers/pseudo_sampler.py
class PseudoSampler (line 7) | class PseudoSampler(BaseSampler):
method __init__ (line 9) | def __init__(self, **kwargs):
method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
method _sample_neg (line 15) | def _sample_neg(self, **kwargs):
method sample (line 18) | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
FILE: mmdet/core/bbox/samplers/random_sampler.py
class RandomSampler (line 7) | class RandomSampler(BaseSampler):
method __init__ (line 9) | def __init__(self,
method random_choice (line 19) | def random_choice(gallery, num):
method _sample_pos (line 35) | def _sample_pos(self, assign_result, num_expected, **kwargs):
method _sample_neg (line 45) | def _sample_neg(self, assign_result, num_expected, **kwargs):
FILE: mmdet/core/bbox/samplers/random_sampler_fixnum.py
class RandomSamplerFixnum (line 8) | class RandomSamplerFixnum(BaseSampler):
method __init__ (line 10) | def __init__(self,
method random_choice (line 20) | def random_choice(gallery, num):
method _sample_pos (line 46) | def _sample_pos(self, assign_result, num_expected, **kwargs):
method _sample_neg (line 71) | def _sample_neg(self, assign_result, num_expected, **kwargs):
method sample (line 89) | def sample(self,
FILE: mmdet/core/bbox/samplers/sampling_result.py
class SamplingResult (line 4) | class SamplingResult(object):
method __init__ (line 6) | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
method bboxes (line 23) | def bboxes(self):
FILE: mmdet/core/bbox/transforms.py
function bbox2delta (line 6) | def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
function delta2bbox (line 34) | def delta2bbox(rois,
function bbox_flip (line 71) | def bbox_flip(bboxes, img_shape):
function bbox_mapping (line 91) | def bbox_mapping(bboxes, img_shape, scale_factor, flip):
function bbox_mapping_back (line 99) | def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
function bbox2roi (line 106) | def bbox2roi(bbox_list):
function roi2bbox (line 128) | def roi2bbox(rois):
function bbox2result (line 138) | def bbox2result(bboxes, labels, num_classes):
FILE: mmdet/core/evaluation/bbox_overlaps.py
function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
FILE: mmdet/core/evaluation/class_names.py
function voc_classes (line 4) | def voc_classes():
function imagenet_det_classes (line 12) | def imagenet_det_classes():
function imagenet_vid_classes (line 53) | def imagenet_vid_classes():
function coco_classes (line 63) | def coco_classes():
function get_classes (line 89) | def get_classes(dataset):
FILE: mmdet/core/evaluation/coco_utils.py
function coco_eval (line 9) | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
function fast_eval_recall (line 41) | def fast_eval_recall(results,
function xyxy2xywh (line 78) | def xyxy2xywh(bbox):
function proposal2json (line 88) | def proposal2json(dataset, results):
function det2json (line 103) | def det2json(dataset, results):
function segm2json (line 120) | def segm2json(dataset, results):
function results2json (line 140) | def results2json(dataset, results, out_file):
FILE: mmdet/core/evaluation/eval_hooks.py
class DistEvalHook (line 19) | class DistEvalHook(Hook):
method __init__ (line 21) | def __init__(self, dataset, interval=1):
method _barrier (line 34) | def _barrier(self, rank, world_size):
method before_run (line 52) | def before_run(self, runner):
method after_run (line 59) | def after_run(self, runner):
method after_train_epoch (line 63) | def after_train_epoch(self, runner):
method evaluate (line 102) | def evaluate(self):
class DistEvalmAPHook (line 106) | class DistEvalmAPHook(DistEvalHook):
method evaluate (line 108) | def evaluate(self, runner, results):
class CocoDistEvalRecallHook (line 144) | class CocoDistEvalRecallHook(DistEvalHook):
method __init__ (line 146) | def __init__(self,
method evaluate (line 154) | def evaluate(self, runner, results):
class CocoDistEvalmAPHook (line 164) | class CocoDistEvalmAPHook(DistEvalHook):
method evaluate (line 166) | def evaluate(self, runner, results):
FILE: mmdet/core/evaluation/mean_ap.py
function average_precision (line 9) | def average_precision(recalls, precisions, mode='area'):
function tpfp_imagenet (line 56) | def tpfp_imagenet(det_bboxes,
function tpfp_default (line 137) | def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=...
function get_cls_results (line 202) | def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_...
function eval_map (line 220) | def eval_map(det_results,
function print_map_summary (line 332) | def print_map_summary(mean_ap, results, dataset=None):
FILE: mmdet/core/evaluation/recall.py
function _recalls (line 7) | def _recalls(all_ious, proposal_nums, thrs):
function set_recall_param (line 40) | def set_recall_param(proposal_nums, iou_thrs):
function eval_recalls (line 62) | def eval_recalls(gts,
function print_recall_summary (line 105) | def print_recall_summary(recalls,
function plot_num_recall (line 138) | def plot_num_recall(recalls, proposal_nums):
function plot_iou_recall (line 163) | def plot_iou_recall(recalls, iou_thrs):
FILE: mmdet/core/loss/losses.py
function weighted_nll_loss (line 6) | def weighted_nll_loss(pred, label, weight, avg_factor=None):
function weighted_cross_entropy (line 13) | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=...
function weighted_binary_cross_entropy (line 23) | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
function sigmoid_focal_loss (line 31) | def sigmoid_focal_loss(pred,
function weighted_sigmoid_focal_loss (line 46) | def weighted_sigmoid_focal_loss(pred,
function mask_cross_entropy (line 60) | def mask_cross_entropy(pred, target, label):
function smooth_l1_loss (line 68) | def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'):
function weighted_smoothl1 (line 84) | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
function accuracy (line 91) | def accuracy(pred, target, topk=1):
FILE: mmdet/core/mask/mask_target.py
function mask_target (line 6) | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_...
function mask_target_single (line 15) | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
FILE: mmdet/core/mask/utils.py
function split_combined_polys (line 4) | def split_combined_polys(polys, poly_lens, polys_per_mask):
FILE: mmdet/core/post_processing/bbox_nms.py
function multiclass_nms (line 6) | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_n...
FILE: mmdet/core/post_processing/merge_augs.py
function merge_aug_proposals (line 9) | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
function merge_aug_bboxes (line 41) | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
function merge_aug_scores (line 68) | def merge_aug_scores(aug_scores):
function merge_aug_masks (line 76) | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
FILE: mmdet/core/utils/dist_utils.py
function _allreduce_coalesced (line 9) | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
function allreduce_grads (line 31) | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
class DistOptimizerHook (line 44) | class DistOptimizerHook(OptimizerHook):
method __init__ (line 46) | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
method after_train_iter (line 51) | def after_train_iter(self, runner):
FILE: mmdet/core/utils/misc.py
function tensor2imgs (line 8) | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
function multi_apply (line 21) | def multi_apply(func, *args, **kwargs):
function unmap (line 27) | def unmap(data, count, inds, fill=0):
FILE: mmdet/datasets/coco.py
class CocoDataset (line 7) | class CocoDataset(CustomDataset):
method load_annotations (line 24) | def load_annotations(self, ann_file):
method get_ann_info (line 39) | def get_ann_info(self, idx):
method _filter_imgs (line 45) | def _filter_imgs(self, min_size=32):
method _parse_ann_info (line 56) | def _parse_ann_info(self, ann_info, with_mask=True):
FILE: mmdet/datasets/concat_dataset.py
class ConcatDataset (line 5) | class ConcatDataset(_ConcatDataset):
method __init__ (line 15) | def __init__(self, datasets):
FILE: mmdet/datasets/custom.py
class CustomDataset (line 14) | class CustomDataset(Dataset):
method __init__ (line 38) | def __init__(self,
method __len__ (line 114) | def __len__(self):
method load_annotations (line 117) | def load_annotations(self, ann_file):
method load_proposals (line 120) | def load_proposals(self, proposal_file):
method get_ann_info (line 123) | def get_ann_info(self, idx):
method _filter_imgs (line 126) | def _filter_imgs(self, min_size=32):
method _set_group_flag (line 134) | def _set_group_flag(self):
method _rand_another (line 146) | def _rand_another(self, idx):
method __getitem__ (line 150) | def __getitem__(self, idx):
method prepare_train_img (line 160) | def prepare_train_img(self, idx):
method prepare_test_img (line 239) | def prepare_test_img(self, idx):
FILE: mmdet/datasets/extra_aug.py
class PhotoMetricDistortion (line 8) | class PhotoMetricDistortion(object):
method __init__ (line 10) | def __init__(self,
method __call__ (line 20) | def __call__(self, img, boxes, labels):
class Expand (line 67) | class Expand(object):
method __init__ (line 69) | def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
method __call__ (line 76) | def __call__(self, img, boxes, labels):
class RandomCrop (line 92) | class RandomCrop(object):
method __init__ (line 94) | def __init__(self,
method __call__ (line 101) | def __call__(self, img, boxes, labels):
class ExtraAugmentation (line 146) | class ExtraAugmentation(object):
method __init__ (line 148) | def __init__(self,
method __call__ (line 161) | def __call__(self, img, boxes, labels):
FILE: mmdet/datasets/loader/build_loader.py
function build_dataloader (line 15) | def build_dataloader(dataset,
FILE: mmdet/datasets/loader/sampler.py
class GroupSampler (line 11) | class GroupSampler(Sampler):
method __init__ (line 13) | def __init__(self, dataset, samples_per_gpu=1):
method __iter__ (line 24) | def __iter__(self):
method __len__ (line 47) | def __len__(self):
class DistributedGroupSampler (line 51) | class DistributedGroupSampler(Sampler):
method __init__ (line 66) | def __init__(self,
method __iter__ (line 92) | def __iter__(self):
method __len__ (line 128) | def __len__(self):
method set_epoch (line 131) | def set_epoch(self, epoch):
FILE: mmdet/datasets/repeat_dataset.py
class RepeatDataset (line 4) | class RepeatDataset(object):
method __init__ (line 6) | def __init__(self, dataset, times):
method __getitem__ (line 15) | def __getitem__(self, idx):
method __len__ (line 18) | def __len__(self):
FILE: mmdet/datasets/transforms.py
class ImageTransform (line 8) | class ImageTransform(object):
method __init__ (line 18) | def __init__(self,
method __call__ (line 28) | def __call__(self, img, scale, flip=False, keep_ratio=True):
function bbox_flip (line 49) | def bbox_flip(bboxes, img_shape):
class BboxTransform (line 64) | class BboxTransform(object):
method __init__ (line 72) | def __init__(self, max_num_gts=None):
method __call__ (line 75) | def __call__(self, bboxes, img_shape, scale_factor, flip=False):
class MaskTransform (line 90) | class MaskTransform(object):
method __call__ (line 98) | def __call__(self, masks, pad_shape, scale_factor, flip=False):
class Numpy2Tensor (line 112) | class Numpy2Tensor(object):
method __init__ (line 114) | def __init__(self):
method __call__ (line 117) | def __call__(self, *args):
FILE: mmdet/datasets/utils.py
function to_tensor (line 15) | def to_tensor(data):
function random_scale (line 36) | def random_scale(img_scales, mode='range'):
function show_ann (line 70) | def show_ann(coco, img, ann_info):
function get_dataset (line 77) | def get_dataset(data_cfg):
FILE: mmdet/datasets/voc.py
class VOCDataset (line 4) | class VOCDataset(XMLDataset):
method __init__ (line 11) | def __init__(self, **kwargs):
FILE: mmdet/datasets/xml_style.py
class XMLDataset (line 10) | class XMLDataset(CustomDataset):
method __init__ (line 12) | def __init__(self, **kwargs):
method load_annotations (line 16) | def load_annotations(self, ann_file):
method get_ann_info (line 32) | def get_ann_info(self, idx):
FILE: mmdet/models/anchor_heads/anchor_head.py
class AnchorHead (line 16) | class AnchorHead(nn.Module):
method __init__ (line 33) | def __init__(self,
method _init_layers (line 72) | def _init_layers(self):
method init_weights (line 77) | def init_weights(self):
method forward_single (line 81) | def forward_single(self, x):
method forward (line 86) | def forward(self, feats):
method get_anchors (line 89) | def get_anchors(self, featmap_sizes, img_metas):
method loss_single (line 128) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
method loss (line 172) | def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
method get_bboxes (line 210) | def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,
method get_bboxes_single (line 236) | def get_bboxes_single(self,
FILE: mmdet/models/anchor_heads/retina_head.py
class RetinaHead (line 11) | class RetinaHead(AnchorHead):
method __init__ (line 13) | def __init__(self,
method _init_layers (line 34) | def _init_layers(self):
method init_weights (line 52) | def init_weights(self):
method forward_single (line 61) | def forward_single(self, x):
FILE: mmdet/models/anchor_heads/rpn_head.py
class RPNHead (line 13) | class RPNHead(AnchorHead):
method __init__ (line 15) | def __init__(self, in_channels, **kwargs):
method _init_layers (line 18) | def _init_layers(self):
method init_weights (line 25) | def init_weights(self):
method forward_single (line 30) | def forward_single(self, x):
method loss (line 37) | def loss(self, cls_scores, bbox_preds, gt_bboxes, img_metas, cfg):
method get_bboxes_single (line 43) | def get_bboxes_single(self,
FILE: mmdet/models/anchor_heads/ssd_head.py
class SSDHead (line 14) | class SSDHead(AnchorHead):
method __init__ (line 16) | def __init__(self,
method init_weights (line 95) | def init_weights(self):
method forward (line 100) | def forward(self, feats):
method loss_single (line 109) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
method loss (line 133) | def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
FILE: mmdet/models/backbones/resnet.py
function conv3x3 (line 14) | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
class BasicBlock (line 26) | class BasicBlock(nn.Module):
method __init__ (line 29) | def __init__(self,
method norm1 (line 55) | def norm1(self):
method norm2 (line 59) | def norm2(self):
method forward (line 62) | def forward(self, x):
class Bottleneck (line 81) | class Bottleneck(nn.Module):
method __init__ (line 84) | def __init__(self,
method norm1 (line 176) | def norm1(self):
method norm2 (line 180) | def norm2(self):
method norm3 (line 184) | def norm3(self):
method forward (line 187) | def forward(self, x):
function make_res_layer (line 229) | def make_res_layer(block,
class ResNet (line 280) | class ResNet(nn.Module):
method __init__ (line 312) | def __init__(self,
method norm1 (line 380) | def norm1(self):
method _make_stem_layer (line 383) | def _make_stem_layer(self):
method _freeze_stages (line 392) | def _freeze_stages(self):
method init_weights (line 403) | def init_weights(self, pretrained=None):
method forward (line 429) | def forward(self, x):
method train (line 445) | def train(self, mode=True):
FILE: mmdet/models/backbones/resnext.py
class Bottleneck (line 12) | class Bottleneck(_Bottleneck):
method __init__ (line 14) | def __init__(self, *args, groups=1, base_width=4, **kwargs):
function make_res_layer (line 87) | def make_res_layer(block,
class ResNeXt (line 144) | class ResNeXt(ResNet):
method __init__ (line 176) | def __init__(self, groups=1, base_width=4, **kwargs):
FILE: mmdet/models/backbones/ssd_vgg.py
class SSDVGG (line 13) | class SSDVGG(VGG):
method __init__ (line 19) | def __init__(self,
method init_weights (line 55) | def init_weights(self, pretrained=None):
method forward (line 76) | def forward(self, x):
method _make_extra_layers (line 92) | def _make_extra_layers(self, outplanes):
class L2Norm (line 119) | class L2Norm(nn.Module):
method __init__ (line 121) | def __init__(self, n_dims, scale=20., eps=1e-10):
method forward (line 128) | def forward(self, x):
FILE: mmdet/models/bbox_heads/bbox_head.py
class BBoxHead (line 11) | class BBoxHead(nn.Module):
method __init__ (line 15) | def __init__(self,
method init_weights (line 49) | def init_weights(self):
method forward (line 57) | def forward(self, x):
method get_target (line 65) | def get_target(self, sampling_results, gt_bboxes, gt_labels,
method loss (line 83) | def loss(self,
method get_det_bboxes (line 104) | def get_det_bboxes(self,
method refine_bboxes (line 134) | def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
method regress_by_class (line 174) | def regress_by_class(self, rois, label, bbox_pred, img_meta):
FILE: mmdet/models/bbox_heads/convfc_bbox_head.py
class ConvFCBBoxHead (line 9) | class ConvFCBBoxHead(BBoxHead):
method __init__ (line 18) | def __init__(self,
method _add_conv_fc_branch (line 82) | def _add_conv_fc_branch(self,
method init_weights (line 123) | def init_weights(self):
method forward (line 131) | def forward(self, x):
class SharedFCBBoxHead (line 171) | class SharedFCBBoxHead(ConvFCBBoxHead):
method __init__ (line 173) | def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):
FILE: mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py
class ConvFCRoIHeadEnhance (line 8) | class ConvFCRoIHeadEnhance(BBoxHead):
method __init__ (line 17) | def __init__(self,
method _add_conv_fc_branch (line 85) | def _add_conv_fc_branch(self,
method init_weights (line 126) | def init_weights(self):
method forward (line 134) | def forward(self, x, enhanced_feature=None):
FILE: mmdet/models/bbox_heads/graph_bbox_head.py
class GraphBBoxHead (line 10) | class GraphBBoxHead(BBoxHead):
method __init__ (line 19) | def __init__(self,
method _add_conv_fc_branch (line 87) | def _add_conv_fc_branch(self,
method init_weights (line 131) | def init_weights(self):
method forward (line 140) | def forward(self, x, geom_f, bs):
method loss (line 211) | def loss(self, cls_score, bbox_pred, A_pred, A_gt, labels, label_weigh...
method propagate_em (line 232) | def propagate_em(self, x, A, W):
FILE: mmdet/models/builder.py
function _build_module (line 7) | def _build_module(cfg, registry, default_args):
function build (line 26) | def build(cfg, registry, default_args=None):
function build_backbone (line 34) | def build_backbone(cfg):
function build_neck (line 38) | def build_neck(cfg):
function build_roi_extractor (line 42) | def build_roi_extractor(cfg):
function build_head (line 46) | def build_head(cfg):
function build_detector (line 50) | def build_detector(cfg, train_cfg=None, test_cfg=None):
FILE: mmdet/models/detectors/base.py
class BaseDetector (line 12) | class BaseDetector(nn.Module):
method __init__ (line 17) | def __init__(self):
method with_neck (line 21) | def with_neck(self):
method with_bbox (line 25) | def with_bbox(self):
method with_mask (line 29) | def with_mask(self):
method extract_feat (line 33) | def extract_feat(self, imgs):
method extract_feats (line 36) | def extract_feats(self, imgs):
method forward_train (line 42) | def forward_train(self, imgs, img_metas, **kwargs):
method simple_test (line 46) | def simple_test(self, img, img_meta, **kwargs):
method aug_test (line 50) | def aug_test(self, imgs, img_metas, **kwargs):
method init_weights (line 53) | def init_weights(self, pretrained=None):
method forward_test (line 58) | def forward_test(self, imgs, img_metas, **kwargs):
method forward (line 78) | def forward(self, img, img_meta, return_loss=True, **kwargs):
method show_result (line 84) | def show_result(self,
FILE: mmdet/models/detectors/cascade_rcnn.py
class CascadeRCNN (line 15) | class CascadeRCNN(BaseDetector, RPNTestMixin):
method __init__ (line 17) | def __init__(self,
method with_rpn (line 80) | def with_rpn(self):
method init_weights (line 83) | def init_weights(self, pretrained=None):
method extract_feat (line 102) | def extract_feat(self, img):
method forward_train (line 108) | def forward_train(self,
method simple_test (line 190) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 307) | def aug_test(self, img, img_meta, proposals=None, rescale=False):
method show_result (line 310) | def show_result(self, data, result, img_norm_cfg, **kwargs):
FILE: mmdet/models/detectors/fast_rcnn.py
class FastRCNN (line 6) | class FastRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
method forward_test (line 29) | def forward_test(self, imgs, img_metas, proposals, **kwargs):
FILE: mmdet/models/detectors/faster_rcnn.py
class FasterRCNN (line 6) | class FasterRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdet/models/detectors/hkrm_rcnn.py
class HKRMRCNN (line 14) | class HKRMRCNN(BaseDetector, RPNTestMixin, BBoxTestMixin,
method __init__ (line 17) | def __init__(self,
method with_rpn (line 70) | def with_rpn(self):
method init_weights (line 73) | def init_weights(self, pretrained=None):
method extract_feat (line 90) | def extract_feat(self, img):
method forward_train (line 96) | def forward_train(self,
method simple_test (line 217) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 238) | def aug_test(self, imgs, img_metas, rescale=False):
method simple_test_bboxes_hkrm (line 267) | def simple_test_bboxes_hkrm(self,
method aug_test_bboxes_hkrm (line 305) | def aug_test_bboxes_hkrm(self, feats, img_metas, proposal_list, rcnn_t...
FILE: mmdet/models/detectors/mask_rcnn.py
class MaskRCNN (line 6) | class MaskRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdet/models/detectors/reasoning_rcnn.py
class ReasoningRCNN (line 19) | class ReasoningRCNN(BaseDetector, RPNTestMixin):
method __init__ (line 21) | def __init__(self,
method with_rpn (line 131) | def with_rpn(self):
method init_weights (line 134) | def init_weights(self, pretrained=None):
method extract_feat (line 154) | def extract_feat(self, img):
method forward_upper_neck (line 160) | def forward_upper_neck(self, x, stage):
method forward_train (line 167) | def forward_train(self,
method simple_test (line 299) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 469) | def aug_test(self, img, img_meta, proposals=None, rescale=False):
method show_result (line 472) | def show_result(self, data, result, img_norm_cfg, **kwargs):
FILE: mmdet/models/detectors/retinanet.py
class RetinaNet (line 6) | class RetinaNet(SingleStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdet/models/detectors/rpn.py
class RPN (line 11) | class RPN(BaseDetector, RPNTestMixin):
method __init__ (line 13) | def __init__(self,
method init_weights (line 28) | def init_weights(self, pretrained=None):
method extract_feat (line 35) | def extract_feat(self, img):
method forward_train (line 41) | def forward_train(self, img, img_meta, gt_bboxes=None):
method simple_test (line 52) | def simple_test(self, img, img_meta, rescale=False):
method aug_test (line 61) | def aug_test(self, imgs, img_metas, rescale=False):
method show_result (line 74) | def show_result(self, data, result, img_norm_cfg):
FILE: mmdet/models/detectors/sgrn.py
class ThreeStageGraphDetector (line 13) | class ThreeStageGraphDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
method __init__ (line 16) | def __init__(self,
method with_rpn (line 83) | def with_rpn(self):
method init_weights (line 86) | def init_weights(self, pretrained=None):
method extract_feat (line 103) | def extract_feat(self, img):
method forward_train (line 109) | def forward_train(self,
method simple_test (line 299) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method simple_test_bboxes_ms (line 330) | def simple_test_bboxes_ms(self,
method aug_test (line 472) | def aug_test(self, imgs, img_metas, rescale=False):
method _compute_pseudo (line 501) | def _compute_pseudo(self, bb_centre):
method _create_neighbourhood (line 529) | def _create_neighbourhood(self,
method _create_neighbourhood_feat (line 572) | def _create_neighbourhood_feat(self, image, top_ind):
method _create_neighbourhood_pseudo (line 590) | def _create_neighbourhood_pseudo(self, pseudo, top_ind):
class GraphLearner (line 607) | class GraphLearner(nn.Module):
method __init__ (line 608) | def __init__(self, in_feature_dim, combined_feature_dim, dropout=0.5):
method forward (line 632) | def forward(self, graph_nodes):
class NeighbourhoodGraphConvolution (line 658) | class NeighbourhoodGraphConvolution(Module):
method __init__ (line 664) | def __init__(self,
method init_parameters (line 699) | def init_parameters(self):
method forward (line 706) | def forward(self, neighbourhood_features, neighbourhood_pseudo_coord):
method get_gaussian_weights (line 733) | def get_gaussian_weights(self, pseudo_coord):
method convolution (line 760) | def convolution(self, neighbourhood, weights):
FILE: mmdet/models/detectors/single_stage.py
class SingleStageDetector (line 10) | class SingleStageDetector(BaseDetector):
method __init__ (line 12) | def __init__(self,
method init_weights (line 28) | def init_weights(self, pretrained=None):
method extract_feat (line 39) | def extract_feat(self, img):
method forward_train (line 45) | def forward_train(self, img, img_metas, gt_bboxes, gt_labels):
method simple_test (line 52) | def simple_test(self, img, img_meta, rescale=False):
method aug_test (line 63) | def aug_test(self, imgs, img_metas, rescale=False):
FILE: mmdet/models/detectors/test_mixins.py
class RPNTestMixin (line 5) | class RPNTestMixin(object):
method simple_test_rpn (line 7) | def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
method aug_test_rpn (line 13) | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
class BBoxTestMixin (line 28) | class BBoxTestMixin(object):
method simple_test_bboxes (line 30) | def simple_test_bboxes(self,
method aug_test_bboxes (line 53) | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_c...
class MaskTestMixin (line 88) | class MaskTestMixin(object):
method simple_test_mask (line 90) | def simple_test_mask(self,
method aug_test_mask (line 115) | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
FILE: mmdet/models/detectors/two_stage.py
class TwoStageDetector (line 12) | class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
method __init__ (line 15) | def __init__(self,
method with_rpn (line 53) | def with_rpn(self):
method init_weights (line 56) | def init_weights(self, pretrained=None):
method extract_feat (line 74) | def extract_feat(self, img):
method forward_train (line 80) | def forward_train(self,
method simple_test (line 155) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 176) | def aug_test(self, imgs, img_metas, rescale=False):
FILE: mmdet/models/mask_heads/fcn_mask_head.py
class FCNMaskHead (line 13) | class FCNMaskHead(nn.Module):
method __init__ (line 15) | def __init__(self,
method init_weights (line 73) | def init_weights(self):
method forward (line 81) | def forward(self, x):
method get_target (line 91) | def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
method loss (line 100) | def loss(self, mask_pred, mask_targets, labels):
method get_seg_masks (line 110) | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
FILE: mmdet/models/necks/fpn.py
class FPN (line 10) | class FPN(nn.Module):
method __init__ (line 12) | def __init__(self,
method init_weights (line 90) | def init_weights(self):
method forward (line 95) | def forward(self, inputs):
FILE: mmdet/models/registry.py
class Registry (line 4) | class Registry(object):
method __init__ (line 6) | def __init__(self, name):
method name (line 11) | def name(self):
method module_dict (line 15) | def module_dict(self):
method _register_module (line 18) | def _register_module(self, module_class):
method register_module (line 34) | def register_module(self, cls):
FILE: mmdet/models/roi_extractors/single_level.py
class SingleRoIExtractor (line 11) | class SingleRoIExtractor(nn.Module):
method __init__ (line 24) | def __init__(self,
method num_inputs (line 36) | def num_inputs(self):
method init_weights (line 40) | def init_weights(self):
method build_roi_layers (line 43) | def build_roi_layers(self, layer_cfg, featmap_strides):
method map_roi_levels (line 52) | def map_roi_levels(self, rois, num_levels):
method forward (line 73) | def forward(self, feats, rois):
FILE: mmdet/models/utils/conv_module.py
class ConvModule (line 9) | class ConvModule(nn.Module):
method __init__ (line 11) | def __init__(self,
method norm (line 68) | def norm(self):
method init_weights (line 71) | def init_weights(self):
method forward (line 77) | def forward(self, x, activate=True, norm=True):
FILE: mmdet/models/utils/norm.py
function build_norm_layer (line 13) | def build_norm_layer(cfg, num_features, postfix=''):
FILE: mmdet/models/utils/weight_init.py
function xavier_init (line 5) | def xavier_init(module, gain=1, bias=0, distribution='normal'):
function normal_init (line 15) | def normal_init(module, mean=0, std=1, bias=0):
function uniform_init (line 21) | def uniform_init(module, a=0, b=1, bias=0):
function kaiming_init (line 27) | def kaiming_init(module,
function bias_init_with_prob (line 43) | def bias_init_with_prob(prior_prob):
FILE: mmdet/ops/dcn/functions/deform_conv.py
class DeformConvFunction (line 8) | class DeformConvFunction(Function):
method forward (line 11) | def forward(ctx,
method backward (line 55) | def backward(ctx, grad_output):
method _output_size (line 92) | def _output_size(input, weight, padding, dilation, stride):
class ModulatedDeformConvFunction (line 108) | class ModulatedDeformConvFunction(Function):
method forward (line 111) | def forward(ctx,
method backward (line 146) | def backward(ctx, grad_output):
method _infer_shape (line 168) | def _infer_shape(ctx, input, weight):
FILE: mmdet/ops/dcn/functions/deform_pool.py
class DeformRoIPoolingFunction (line 7) | class DeformRoIPoolingFunction(Function):
method forward (line 10) | def forward(ctx,
method backward (line 50) | def backward(ctx, grad_output):
FILE: mmdet/ops/dcn/modules/deform_conv.py
class DeformConv (line 10) | class DeformConv(nn.Module):
method __init__ (line 12) | def __init__(self,
method reset_parameters (line 46) | def reset_parameters(self):
method forward (line 53) | def forward(self, input, offset):
class ModulatedDeformConv (line 59) | class ModulatedDeformConv(nn.Module):
method __init__ (line 61) | def __init__(self,
method reset_parameters (line 91) | def reset_parameters(self):
method forward (line 100) | def forward(self, input, offset, mask):
class ModulatedDeformConvPack (line 106) | class ModulatedDeformConvPack(ModulatedDeformConv):
method __init__ (line 108) | def __init__(self,
method init_offset (line 132) | def init_offset(self):
method forward (line 136) | def forward(self, input):
FILE: mmdet/ops/dcn/modules/deform_pool.py
class DeformRoIPooling (line 6) | class DeformRoIPooling(nn.Module):
method __init__ (line 8) | def __init__(self,
method forward (line 27) | def forward(self, data, rois, offset):
class DeformRoIPoolingPack (line 36) | class DeformRoIPoolingPack(DeformRoIPooling):
method __init__ (line 38) | def __init__(self,
method forward (line 66) | def forward(self, data, rois):
class ModulatedDeformRoIPoolingPack (line 89) | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
method __init__ (line 91) | def __init__(self,
method forward (line 128) | def forward(self, data, rois):
FILE: mmdet/ops/dcn/src/deform_conv_cuda.cpp
function shape_check (line 62) | void shape_check(at::Tensor input, at::Tensor offset,
function deform_conv_forward_cuda (line 153) | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
function deform_conv_backward_input_cuda (line 249) | int deform_conv_backward_input_cuda(
function deform_conv_backward_parameters_cuda (line 347) | int deform_conv_backward_parameters_cuda(
function modulated_deform_conv_cuda_forward (line 446) | void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor wei...
function modulated_deform_conv_cuda_backward (line 520) | void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor we...
function PYBIND11_MODULE (line 623) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
FILE: mmdet/ops/dcn/src/deform_pool_cuda.cpp
function deform_psroi_pooling_cuda_forward (line 53) | void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
function deform_psroi_pooling_cuda_backward (line 92) | void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
function PYBIND11_MODULE (line 138) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
FILE: mmdet/ops/nms/nms_wrapper.py
function nms (line 9) | def nms(dets, iou_thr, device_id=None):
function soft_nms (line 37) | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
FILE: mmdet/ops/nms/setup.py
function customize_compiler_for_nvcc (line 25) | def customize_compiler_for_nvcc(self):
class custom_build_ext (line 63) | class custom_build_ext(build_ext):
method build_extensions (line 65) | def build_extensions(self):
FILE: mmdet/ops/roi_align/functions/roi_align.py
class RoIAlignFunction (line 6) | class RoIAlignFunction(Function):
method forward (line 9) | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
method backward (line 39) | def backward(ctx, grad_output):
FILE: mmdet/ops/roi_align/modules/roi_align.py
class RoIAlign (line 5) | class RoIAlign(Module):
method __init__ (line 7) | def __init__(self, out_size, spatial_scale, sample_num=0):
method forward (line 14) | def forward(self, features, rois):
FILE: mmdet/ops/roi_align/src/roi_align_cuda.cpp
function roi_align_forward_cuda (line 27) | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
function roi_align_backward_cuda (line 55) | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
function PYBIND11_MODULE (line 82) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdet/ops/roi_pool/functions/roi_pool.py
class RoIPoolFunction (line 7) | class RoIPoolFunction(Function):
method forward (line 10) | def forward(ctx, features, rois, out_size, spatial_scale):
method backward (line 39) | def backward(ctx, grad_output):
FILE: mmdet/ops/roi_pool/modules/roi_pool.py
class RoIPool (line 5) | class RoIPool(Module):
method __init__ (line 7) | def __init__(self, out_size, spatial_scale):
method forward (line 13) | def forward(self, features, rois):
FILE: mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
function roi_pooling_forward_cuda (line 26) | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
function roi_pooling_backward_cuda (line 54) | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
function PYBIND11_MODULE (line 83) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: setup.py
function readme (line 7) | def readme():
function get_git_hash (line 22) | def get_git_hash():
function get_hash (line 48) | def get_hash():
function write_version_py (line 63) | def write_version_py():
function get_version (line 77) | def get_version():
FILE: tools/coco_eval.py
function main (line 6) | def main():
FILE: tools/convert_datasets/pascal_voc.py
function parse_xml (line 13) | def parse_xml(args):
function cvt_annotations (line 67) | def cvt_annotations(devkit_path, years, split, out_file):
function parse_args (line 94) | def parse_args():
function main (line 103) | def main():
FILE: tools/test.py
function single_test (line 14) | def single_test(model, data_loader, show=False):
function _data_func (line 35) | def _data_func(data, device_id):
function parse_args (line 40) | def parse_args():
function main (line 63) | def main():
FILE: tools/train.py
function parse_args (line 14) | def parse_args():
function main (line 42) | def main():
FILE: tools/voc_eval.py
function voc_eval (line 10) | def voc_eval(result_file, dataset, iou_thr=0.5):
function main (line 46) | def main():
Condensed preview — 179 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (705K chars).
[
{
"path": ".gitignore",
"chars": 1300,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\ntools/work_dirs/\n\n# C extensions\n*.so\n\n# Distr"
},
{
"path": ".travis.yml",
"chars": 111,
"preview": "dist: trusty\nlanguage: python\n\ninstall:\n - pip install flake8\n\npython:\n - \"3.5\"\n - \"3.6\"\n\nscript:\n - flake8"
},
{
"path": "INSTALL.md",
"chars": 1464,
"preview": "## Installation\n\n### Requirements\n\n- Linux (tested on Ubuntu 16.04 and CentOS 7.2)\n- Python 3.4+\n- PyTorch 0.4.1\n- Cytho"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "MODEL_ZOO.md",
"chars": 31264,
"preview": "# Benchmark and Model Zoo\n\n## Environment\n\n### Hardware\n\n- 8 NVIDIA Tesla V100 GPUs\n- Intel Xeon 4114 CPU @ 2.20GHz\n\n###"
},
{
"path": "README.md",
"chars": 525,
"preview": "# Environments\n- pytorch 0.3.0/0.4.1\n- oldest mmdetection\n\n\n# Reasoning-RCNN\nReasoning-RCNN: Unifying Adaptive Global Re"
},
{
"path": "TECHNICAL_DETAILS.md",
"chars": 3594,
"preview": "## Overview\n\nIn this section, we will introduce the main units of training a detector:\ndata loading, model and iteration"
},
{
"path": "compile.sh",
"chars": 495,
"preview": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\necho \"Building roi align op...\"\ncd mmdet/ops/roi_align\nif [ -d \"build\" "
},
{
"path": "configs/ade_faster_rcnn_r101_fpn_1x.py",
"chars": 4405,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n "
},
{
"path": "configs/coco_faster_rcnn_r101_fpn_1x.py",
"chars": 4528,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n "
},
{
"path": "configs/coco_sgrb_fpn_ms.py",
"chars": 5173,
"preview": "# model settings\nmodel = dict(\n type='ThreeStageGraphDetector',\n pretrained='modelzoo://resnet101',\n backbone=d"
},
{
"path": "configs/hkrm/ade_faster_rcnn_r50_fpn_1x.py",
"chars": 4702,
"preview": "# model settings\nmodel = dict(\n type='HKRMRCNN',\n pretrained='modelzoo://resnet50',\n adja_gt='/home/cyan/code/m"
},
{
"path": "configs/hkrm/coco_faster_rcnn_r101_fpn_1x.py",
"chars": 4719,
"preview": "# model settings\nmodel = dict(\n type='HKRMRCNN',\n adja_gt= './graph/new_ade_graph_a.pkl',\n adjr_gt= './graph/ne"
},
{
"path": "configs/hkrm/vg_faster_rcnn_r101_fpn_1x.py",
"chars": 4646,
"preview": "# model settings\nmodel = dict(\n type='HKRMRCNN',\n #pretrained='modelzoo://resnet101',\n adja_gt='./graph/new_vg_"
},
{
"path": "configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py",
"chars": 4721,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "configs/pascal_voc/ssd300_voc.py",
"chars": 4023,
"preview": "# model settings\ninput_size = 300\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "configs/pascal_voc/ssd512_voc.py",
"chars": 4042,
"preview": "# model settings\ninput_size = 512\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "configs/rrcnn/ade_reasoning_rcnn_r101_fpn_1x.py",
"chars": 5336,
"preview": "# model settings\nmodel = dict(\n type='ReasoningRCNN',\n num_stages=2,\n adj_gt='./graph/new_ade_graph_r.pkl',\n "
},
{
"path": "configs/rrcnn/coco_reasoning_rcnn_r101_fpn_1x.py",
"chars": 5496,
"preview": "# model settings\nmodel = dict(\n type='ReasoningRCNN',\n num_stages=2,\n adj_gt='./graph/new_COCO_graph_r.pkl', # "
},
{
"path": "configs/rrcnn/vg_reasoning_rcnn_r101_fpn_1x.py",
"chars": 5421,
"preview": "# model settings\nmodel = dict(\n type='ReasoningRCNN',\n num_stages=2,\n adj_gt='./graph/new_COCO_graph_r.pkl',\n "
},
{
"path": "configs/vg_faster_rcnn_r101_fpn_1x.py",
"chars": 4412,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n "
},
{
"path": "configs/vgbig_faster_rcnn_r101_fpn_1x.py",
"chars": 4442,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n "
},
{
"path": "mmdet/__init__.py",
"chars": 92,
"preview": "from .version import __version__, short_version\n\n__all__ = ['__version__', 'short_version']\n"
},
{
"path": "mmdet/apis/__init__.py",
"chars": 278,
"preview": "from .env import init_dist, get_root_logger, set_random_seed\nfrom .train import train_detector\nfrom .inference import in"
},
{
"path": "mmdet/apis/env.py",
"chars": 1514,
"preview": "import logging\nimport os\nimport random\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nimport torch.mu"
},
{
"path": "mmdet/apis/inference.py",
"chars": 1930,
"preview": "import mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.datasets import to_tensor\nfrom mmdet.datasets.transforms import "
},
{
"path": "mmdet/apis/train.py",
"chars": 3964,
"preview": "from __future__ import division\n\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv.runner import Runner, DistS"
},
{
"path": "mmdet/core/__init__.py",
"chars": 300,
"preview": "from .anchor import * # noqa: F401, F403\nfrom .bbox import * # noqa: F401, F403\nfrom .mask import * # noqa: F401, F40"
},
{
"path": "mmdet/core/anchor/__init__.py",
"chars": 135,
"preview": "from .anchor_generator import AnchorGenerator\nfrom .anchor_target import anchor_target\n\n__all__ = ['AnchorGenerator', 'a"
},
{
"path": "mmdet/core/anchor/anchor_generator.py",
"chars": 3117,
"preview": "import torch\n\n\nclass AnchorGenerator(object):\n\n def __init__(self, base_size, scales, ratios, scale_major=True, ctr=N"
},
{
"path": "mmdet/core/anchor/anchor_target.py",
"chars": 7498,
"preview": "import torch\n\nfrom ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta\nfrom ..utils import multi_"
},
{
"path": "mmdet/core/bbox/__init__.py",
"chars": 1046,
"preview": "from .geometry import bbox_overlaps\nfrom .assigners import BaseAssigner, MaxIoUAssigner, AssignResult\nfrom .samplers imp"
},
{
"path": "mmdet/core/bbox/assign_sampling.py",
"chars": 1185,
"preview": "import mmcv\n\nfrom . import assigners, samplers\n\n\ndef build_assigner(cfg, **kwargs):\n if isinstance(cfg, assigners.Bas"
},
{
"path": "mmdet/core/bbox/assigners/__init__.py",
"chars": 187,
"preview": "from .base_assigner import BaseAssigner\nfrom .max_iou_assigner import MaxIoUAssigner\nfrom .assign_result import AssignRe"
},
{
"path": "mmdet/core/bbox/assigners/assign_result.py",
"chars": 664,
"preview": "import torch\n\n\nclass AssignResult(object):\n\n def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):\n "
},
{
"path": "mmdet/core/bbox/assigners/base_assigner.py",
"chars": 195,
"preview": "from abc import ABCMeta, abstractmethod\n\n\nclass BaseAssigner(metaclass=ABCMeta):\n\n @abstractmethod\n def assign(sel"
},
{
"path": "mmdet/core/bbox/assigners/max_iou_assigner.py",
"chars": 6126,
"preview": "import torch\n\nfrom .base_assigner import BaseAssigner\nfrom .assign_result import AssignResult\nfrom ..geometry import bbo"
},
{
"path": "mmdet/core/bbox/bbox_target.py",
"chars": 2974,
"preview": "import torch\n\nfrom .transforms import bbox2delta\nfrom ..utils import multi_apply\n\n\ndef bbox_target(pos_bboxes_list,\n "
},
{
"path": "mmdet/core/bbox/geometry.py",
"chars": 2163,
"preview": "import torch\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):\n \"\"\"Calculate overlap between two s"
},
{
"path": "mmdet/core/bbox/samplers/__init__.py",
"chars": 640,
"preview": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSample"
},
{
"path": "mmdet/core/bbox/samplers/base_sampler.py",
"chars": 2753,
"preview": "from abc import ABCMeta, abstractmethod\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass BaseSampler(m"
},
{
"path": "mmdet/core/bbox/samplers/combined_sampler.py",
"chars": 509,
"preview": "from .base_sampler import BaseSampler\nfrom ..assign_sampling import build_sampler\n\n\nclass CombinedSampler(BaseSampler):\n"
},
{
"path": "mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py",
"chars": 1765,
"preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass InstanceBalancedPosSampler(RandomSamp"
},
{
"path": "mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py",
"chars": 2757,
"preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass IoUBalancedNegSampler(RandomSampler):"
},
{
"path": "mmdet/core/bbox/samplers/ohem_sampler.py",
"chars": 2512,
"preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom ..transforms import bbox2roi\n\n\nclass OHEMSampler(BaseSampler):\n"
},
{
"path": "mmdet/core/bbox/samplers/pseudo_sampler.py",
"chars": 829,
"preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\nclass PseudoSampler(Ba"
},
{
"path": "mmdet/core/bbox/samplers/random_sampler.py",
"chars": 1858,
"preview": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\n\n\nclass RandomSampler(BaseSampler):\n\n def __in"
},
{
"path": "mmdet/core/bbox/samplers/random_sampler_fixnum.py",
"chars": 5756,
"preview": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\ncla"
},
{
"path": "mmdet/core/bbox/samplers/sampling_result.py",
"chars": 790,
"preview": "import torch\n\n\nclass SamplingResult(object):\n\n def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_resul"
},
{
"path": "mmdet/core/bbox/transforms.py",
"chars": 5036,
"preview": "import mmcv\nimport numpy as np\nimport torch\n\n\ndef bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):\n "
},
{
"path": "mmdet/core/evaluation/__init__.py",
"chars": 967,
"preview": "from .class_names import (voc_classes, imagenet_det_classes,\n imagenet_vid_classes, coco_classe"
},
{
"path": "mmdet/core/evaluation/bbox_overlaps.py",
"chars": 1642,
"preview": "import numpy as np\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou'):\n \"\"\"Calculate the ious between each bbox of bbo"
},
{
"path": "mmdet/core/evaluation/class_names.py",
"chars": 5155,
"preview": "import mmcv\n\n\ndef voc_classes():\n return [\n 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'c"
},
{
"path": "mmdet/core/evaluation/coco_utils.py",
"chars": 4807,
"preview": "import mmcv\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\nfrom .recall"
},
{
"path": "mmdet/core/evaluation/eval_hooks.py",
"chars": 6788,
"preview": "import os\nimport os.path as osp\nimport shutil\nimport time\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.runner "
},
{
"path": "mmdet/core/evaluation/mean_ap.py",
"chars": 15761,
"preview": "import mmcv\nimport numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\nfrom .cla"
},
{
"path": "mmdet/core/evaluation/recall.py",
"chars": 5961,
"preview": "import numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\n\n\ndef _recalls(all_io"
},
{
"path": "mmdet/core/loss/__init__.py",
"chars": 508,
"preview": "from .losses import (weighted_nll_loss, weighted_cross_entropy,\n weighted_binary_cross_entropy, sigm"
},
{
"path": "mmdet/core/loss/losses.py",
"chars": 3790,
"preview": "# TODO merge naive and weighted loss.\nimport torch\nimport torch.nn.functional as F\n\n\ndef weighted_nll_loss(pred, label, "
},
{
"path": "mmdet/core/mask/__init__.py",
"chars": 128,
"preview": "from .utils import split_combined_polys\nfrom .mask_target import mask_target\n\n__all__ = ['split_combined_polys', 'mask_t"
},
{
"path": "mmdet/core/mask/mask_target.py",
"chars": 1427,
"preview": "import torch\nimport numpy as np\nimport mmcv\n\n\ndef mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_li"
},
{
"path": "mmdet/core/mask/utils.py",
"chars": 1172,
"preview": "import mmcv\n\n\ndef split_combined_polys(polys, poly_lens, polys_per_mask):\n \"\"\"Split the combined 1-D polys into masks"
},
{
"path": "mmdet/core/post_processing/__init__.py",
"chars": 283,
"preview": "from .bbox_nms import multiclass_nms\nfrom .merge_augs import (merge_aug_proposals, merge_aug_bboxes,\n "
},
{
"path": "mmdet/core/post_processing/bbox_nms.py",
"chars": 1980,
"preview": "import torch\n\nfrom mmdet.ops.nms import nms_wrapper\n\n\ndef multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg,"
},
{
"path": "mmdet/core/post_processing/merge_augs.py",
"chars": 3317,
"preview": "import torch\n\nimport numpy as np\n\nfrom mmdet.ops import nms\nfrom ..bbox import bbox_mapping_back\n\n\ndef merge_aug_proposa"
},
{
"path": "mmdet/core/utils/__init__.py",
"chars": 210,
"preview": "from .dist_utils import allreduce_grads, DistOptimizerHook\nfrom .misc import tensor2imgs, unmap, multi_apply\n\n__all__ = "
},
{
"path": "mmdet/core/utils/dist_utils.py",
"chars": 1941,
"preview": "from collections import OrderedDict\n\nimport torch.distributed as dist\nfrom torch._utils import (_flatten_dense_tensors, "
},
{
"path": "mmdet/core/utils/misc.py",
"chars": 1108,
"preview": "from functools import partial\n\nimport mmcv\nimport numpy as np\nfrom six.moves import map, zip\n\n\ndef tensor2imgs(tensor, m"
},
{
"path": "mmdet/datasets/__init__.py",
"chars": 657,
"preview": "from .custom import CustomDataset\nfrom .xml_style import XMLDataset\nfrom .coco import CocoDataset\nfrom .voc import VOCDa"
},
{
"path": "mmdet/datasets/coco.py",
"chars": 4871,
"preview": "import numpy as np\nfrom pycocotools.coco import COCO\n\nfrom .custom import CustomDataset\n\n\nclass CocoDataset(CustomDatase"
},
{
"path": "mmdet/datasets/concat_dataset.py",
"chars": 698,
"preview": "import numpy as np\nfrom torch.utils.data.dataset import ConcatDataset as _ConcatDataset\n\n\nclass ConcatDataset(_ConcatDat"
},
{
"path": "mmdet/datasets/custom.py",
"chars": 11042,
"preview": "import os.path as osp\n\nimport mmcv\nimport numpy as np\nfrom mmcv.parallel import DataContainer as DC\nfrom torch.utils.dat"
},
{
"path": "mmdet/datasets/extra_aug.py",
"chars": 5634,
"preview": "import mmcv\nimport numpy as np\nfrom numpy import random\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\n\n"
},
{
"path": "mmdet/datasets/loader/__init__.py",
"chars": 183,
"preview": "from .build_loader import build_dataloader\nfrom .sampler import GroupSampler, DistributedGroupSampler\n\n__all__ = [\n '"
},
{
"path": "mmdet/datasets/loader/build_loader.py",
"chars": 1356,
"preview": "from functools import partial\n\nfrom mmcv.runner import get_dist_info\nfrom mmcv.parallel import collate\nfrom torch.utils."
},
{
"path": "mmdet/datasets/loader/sampler.py",
"chars": 4682,
"preview": "from __future__ import division\n\nimport math\nimport torch\nimport numpy as np\n\nfrom torch.distributed import get_world_si"
},
{
"path": "mmdet/datasets/repeat_dataset.py",
"chars": 479,
"preview": "import numpy as np\n\n\nclass RepeatDataset(object):\n\n def __init__(self, dataset, times):\n self.dataset = datase"
},
{
"path": "mmdet/datasets/transforms.py",
"chars": 3723,
"preview": "import mmcv\nimport numpy as np\nimport torch\n\n__all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tenso"
},
{
"path": "mmdet/datasets/utils.py",
"chars": 3683,
"preview": "import copy\nfrom collections import Sequence\n\nimport mmcv\nfrom mmcv.runner import obj_from_dict\nimport torch\n\nimport mat"
},
{
"path": "mmdet/datasets/voc.py",
"chars": 638,
"preview": "from .xml_style import XMLDataset\n\n\nclass VOCDataset(XMLDataset):\n\n CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat'"
},
{
"path": "mmdet/datasets/xml_style.py",
"chars": 2687,
"preview": "import os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom .custom import CustomData"
},
{
"path": "mmdet/models/__init__.py",
"chars": 640,
"preview": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSample"
},
{
"path": "mmdet/models/anchor_heads/__init__.py",
"chars": 194,
"preview": "from .anchor_head import AnchorHead\nfrom .rpn_head import RPNHead\nfrom .retina_head import RetinaHead\nfrom .ssd_head imp"
},
{
"path": "mmdet/models/anchor_heads/anchor_head.py",
"chars": 11539,
"preview": "from __future__ import division\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n"
},
{
"path": "mmdet/models/anchor_heads/retina_head.py",
"chars": 2459,
"preview": "import numpy as np\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n\nfrom .anchor_head import AnchorHead\nfrom ..re"
},
{
"path": "mmdet/models/anchor_heads/rpn_head.py",
"chars": 3870,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core imp"
},
{
"path": "mmdet/models/anchor_heads/ssd_head.py",
"chars": 7420,
"preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\n"
},
{
"path": "mmdet/models/backbones/__init__.py",
"chars": 127,
"preview": "from .resnet import ResNet\nfrom .resnext import ResNeXt\nfrom .ssd_vgg import SSDVGG\n\n__all__ = ['ResNet', 'ResNeXt', 'SS"
},
{
"path": "mmdet/models/backbones/resnet.py",
"chars": 14617,
"preview": "import logging\n\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\n\nfrom mmcv.cnn import constant_init, kaiming_i"
},
{
"path": "mmdet/models/backbones/resnext.py",
"chars": 7229,
"preview": "import math\n\nimport torch.nn as nn\n\nfrom mmdet.ops import DeformConv, ModulatedDeformConv\nfrom .resnet import Bottleneck"
},
{
"path": "mmdet/models/backbones/ssd_vgg.py",
"chars": 4510,
"preview": "import logging\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (VGG, xavier_ini"
},
{
"path": "mmdet/models/bbox_heads/__init__.py",
"chars": 217,
"preview": "from .bbox_head import BBoxHead\nfrom .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead\nfrom .graph_bbox_head imp"
},
{
"path": "mmdet/models/bbox_heads/bbox_head.py",
"chars": 7406,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.core import (delta2bbox, multiclass_nms, "
},
{
"path": "mmdet/models/bbox_heads/convfc_bbox_head.py",
"chars": 7019,
"preview": "import torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HE"
},
{
"path": "mmdet/models/bbox_heads/convfc_bbox_head_enhanced.py",
"chars": 6868,
"preview": "import torch\nimport torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..utils import ConvModule\n\n\nclass ConvFCRoIHead"
},
{
"path": "mmdet/models/bbox_heads/graph_bbox_head.py",
"chars": 9497,
"preview": "import torch.nn as nn\nimport torch\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import Conv"
},
{
"path": "mmdet/models/builder.py",
"chars": 1500,
"preview": "import mmcv\nfrom torch import nn\n\nfrom .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS\n\n\ndef _build_"
},
{
"path": "mmdet/models/detectors/__init__.py",
"chars": 463,
"preview": "from .base import BaseDetector\nfrom .single_stage import SingleStageDetector\nfrom .two_stage import TwoStageDetector\nfro"
},
{
"path": "mmdet/models/detectors/base.py",
"chars": 4354,
"preview": "import logging\nfrom abc import ABCMeta, abstractmethod\n\nimport mmcv\nimport numpy as np\nimport torch.nn as nn\nimport pyco"
},
{
"path": "mmdet/models/detectors/cascade_rcnn.py",
"chars": 13078,
"preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins im"
},
{
"path": "mmdet/models/detectors/fast_rcnn.py",
"chars": 1691,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FastRCNN(Two"
},
{
"path": "mmdet/models/detectors/faster_rcnn.py",
"chars": 704,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FasterRCNN(T"
},
{
"path": "mmdet/models/detectors/hkrm_rcnn.py",
"chars": 14274,
"preview": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin,"
},
{
"path": "mmdet/models/detectors/mask_rcnn.py",
"chars": 849,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass MaskRCNN(Two"
},
{
"path": "mmdet/models/detectors/reasoning_rcnn.py",
"chars": 20528,
"preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins im"
},
{
"path": "mmdet/models/detectors/retinanet.py",
"chars": 488,
"preview": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass Retina"
},
{
"path": "mmdet/models/detectors/rpn.py",
"chars": 3195,
"preview": "import mmcv\n\nfrom mmdet.core import tensor2imgs, bbox_mapping\nfrom .base import BaseDetector\nfrom .test_mixins import RP"
},
{
"path": "mmdet/models/detectors/sgrn.py",
"chars": 31745,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.parameter import Parameter\nfrom torch.n"
},
{
"path": "mmdet/models/detectors/single_stage.py",
"chars": 2167,
"preview": "import torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet"
},
{
"path": "mmdet/models/detectors/test_mixins.py",
"chars": 6167,
"preview": "from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,\n merge_aug_bboxes, merge_aug"
},
{
"path": "mmdet/models/detectors/two_stage.py",
"chars": 7582,
"preview": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin,"
},
{
"path": "mmdet/models/mask_heads/__init__.py",
"chars": 66,
"preview": "from .fcn_mask_head import FCNMaskHead\n\n__all__ = ['FCNMaskHead']\n"
},
{
"path": "mmdet/models/mask_heads/fcn_mask_head.py",
"chars": 6283,
"preview": "import mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nimport torch\nimport torch.nn as nn\n\nfrom ..registry "
},
{
"path": "mmdet/models/necks/__init__.py",
"chars": 40,
"preview": "from .fpn import FPN\n\n__all__ = ['FPN']\n"
},
{
"path": "mmdet/models/necks/fpn.py",
"chars": 4744,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\nfrom ..utils import ConvModule\nf"
},
{
"path": "mmdet/models/registry.py",
"chars": 1144,
"preview": "import torch.nn as nn\n\n\nclass Registry(object):\n\n def __init__(self, name):\n self._name = name\n self._m"
},
{
"path": "mmdet/models/roi_extractors/__init__.py",
"chars": 79,
"preview": "from .single_level import SingleRoIExtractor\n\n__all__ = ['SingleRoIExtractor']\n"
},
{
"path": "mmdet/models/roi_extractors/single_level.py",
"chars": 3075,
"preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom mmdet import ops\nfrom ..registry import ROI_EX"
},
{
"path": "mmdet/models/utils/__init__.py",
"chars": 339,
"preview": "from .conv_module import ConvModule\nfrom .norm import build_norm_layer\nfrom .weight_init import (xavier_init, normal_ini"
},
{
"path": "mmdet/models/utils/conv_module.py",
"chars": 2871,
"preview": "import warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import kaiming_init, constant_init\n\nfrom .norm import build_norm_la"
},
{
"path": "mmdet/models/utils/norm.py",
"chars": 1687,
"preview": "import torch.nn as nn\n\n\nnorm_cfg = {\n # format: layer_type: (abbreviation, module)\n 'BN': ('bn', nn.BatchNorm2d),\n"
},
{
"path": "mmdet/models/utils/weight_init.py",
"chars": 1455,
"preview": "import numpy as np\nimport torch.nn as nn\n\n\ndef xavier_init(module, gain=1, bias=0, distribution='normal'):\n assert di"
},
{
"path": "mmdet/ops/__init__.py",
"chars": 675,
"preview": "from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack,\n ModulatedDeformRoIPoolingPack, M"
},
{
"path": "mmdet/ops/dcn/__init__.py",
"chars": 622,
"preview": "from .functions.deform_conv import deform_conv, modulated_deform_conv\nfrom .functions.deform_pool import deform_roi_pool"
},
{
"path": "mmdet/ops/dcn/functions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/dcn/functions/deform_conv.py",
"chars": 7291,
"preview": "import torch\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\n\nfrom .. import deform_conv_cu"
},
{
"path": "mmdet/ops/dcn/functions/deform_pool.py",
"chars": 2370,
"preview": "import torch\nfrom torch.autograd import Function\n\nfrom .. import deform_pool_cuda\n\n\nclass DeformRoIPoolingFunction(Funct"
},
{
"path": "mmdet/ops/dcn/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/dcn/modules/deform_conv.py",
"chars": 4714,
"preview": "import math\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn.modules.utils import _pair\n\nfrom ..functions.deform_conv i"
},
{
"path": "mmdet/ops/dcn/modules/deform_pool.py",
"chars": 6313,
"preview": "from torch import nn\n\nfrom ..functions.deform_pool import deform_roi_pooling\n\n\nclass DeformRoIPooling(nn.Module):\n\n d"
},
{
"path": "mmdet/ops/dcn/setup.py",
"chars": 469,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='defor"
},
{
"path": "mmdet/ops/dcn/src/deform_conv_cuda.cpp",
"chars": 30729,
"preview": "// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform"
},
{
"path": "mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu",
"chars": 42171,
"preview": "/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contribu"
},
{
"path": "mmdet/ops/dcn/src/deform_pool_cuda.cpp",
"chars": 6826,
"preview": "// author: Charles Shang\n// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu\n\n// mod"
},
{
"path": "mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu",
"chars": 16025,
"preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
},
{
"path": "mmdet/ops/nms/.gitignore",
"chars": 6,
"preview": "*.cpp\n"
},
{
"path": "mmdet/ops/nms/Makefile",
"chars": 124,
"preview": "PYTHON=${PYTHON:-python}\n\nall:\n\techo \"Compiling nms kernels...\"\n\t$(PYTHON) setup.py build_ext --inplace\n\nclean:\n\trm -f *"
},
{
"path": "mmdet/ops/nms/__init__.py",
"chars": 70,
"preview": "from .nms_wrapper import nms, soft_nms\n\n__all__ = ['nms', 'soft_nms']\n"
},
{
"path": "mmdet/ops/nms/cpu_nms.pyx",
"chars": 2241,
"preview": "# --------------------------------------------------------\n# Fast R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed under "
},
{
"path": "mmdet/ops/nms/cpu_soft_nms.pyx",
"chars": 3942,
"preview": "# ----------------------------------------------------------\n# Soft-NMS: Improving Object Detection With One Line of Cod"
},
{
"path": "mmdet/ops/nms/gpu_nms.hpp",
"chars": 180,
"preview": "void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,\n int boxes_dim, float nms_overla"
},
{
"path": "mmdet/ops/nms/gpu_nms.pyx",
"chars": 1433,
"preview": "# --------------------------------------------------------\n# Faster R-CNN\n# Copyright (c) 2015 Microsoft\n# Licensed unde"
},
{
"path": "mmdet/ops/nms/nms_kernel.cu",
"chars": 7120,
"preview": "// ------------------------------------------------------------------\n// Faster R-CNN\n// Copyright (c) 2015 Microsoft\n//"
},
{
"path": "mmdet/ops/nms/nms_wrapper.py",
"chars": 1915,
"preview": "import numpy as np\nimport torch\n\nfrom .gpu_nms import gpu_nms\nfrom .cpu_nms import cpu_nms\nfrom .cpu_soft_nms import cpu"
},
{
"path": "mmdet/ops/nms/setup.py",
"chars": 2485,
"preview": "import os.path as osp\nfrom distutils.core import setup, Extension\n\nimport numpy as np\nfrom Cython.Build import cythonize"
},
{
"path": "mmdet/ops/roi_align/__init__.py",
"chars": 120,
"preview": "from .functions.roi_align import roi_align\nfrom .modules.roi_align import RoIAlign\n\n__all__ = ['roi_align', 'RoIAlign']\n"
},
{
"path": "mmdet/ops/roi_align/functions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/roi_align/functions/roi_align.py",
"chars": 2080,
"preview": "from torch.autograd import Function, Variable\n\nfrom .. import roi_align_cuda\n\n\nclass RoIAlignFunction(Function):\n\n @s"
},
{
"path": "mmdet/ops/roi_align/gradcheck.py",
"chars": 866,
"preview": "import numpy as np\nimport torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(o"
},
{
"path": "mmdet/ops/roi_align/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/roi_align/modules/roi_align.py",
"chars": 535,
"preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_align import RoIAlignFunction\n\n\nclass RoIAlign(Module):\n"
},
{
"path": "mmdet/ops/roi_align/setup.py",
"chars": 332,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='roi_a"
},
{
"path": "mmdet/ops/roi_align/src/roi_align_cuda.cpp",
"chars": 2963,
"preview": "#include <torch/torch.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIAlignForwardLaucher(const at::Tensor features, cons"
},
{
"path": "mmdet/ops/roi_align/src/roi_align_kernel.cu",
"chars": 11825,
"preview": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\nusing namespace at; // temporal fix for pytorch<=0.4.1 (see #9848"
},
{
"path": "mmdet/ops/roi_pool/__init__.py",
"chars": 114,
"preview": "from .functions.roi_pool import roi_pool\nfrom .modules.roi_pool import RoIPool\n\n__all__ = ['roi_pool', 'RoIPool']\n"
},
{
"path": "mmdet/ops/roi_pool/functions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/roi_pool/functions/roi_pool.py",
"chars": 1807,
"preview": "import torch\nfrom torch.autograd import Function\n\nfrom .. import roi_pool_cuda\n\n\nclass RoIPoolFunction(Function):\n\n @"
},
{
"path": "mmdet/ops/roi_pool/gradcheck.py",
"chars": 500,
"preview": "import torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(osp.abspath(osp.join"
},
{
"path": "mmdet/ops/roi_pool/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdet/ops/roi_pool/modules/roi_pool.py",
"chars": 399,
"preview": "from torch.nn.modules.module import Module\nfrom ..functions.roi_pool import roi_pool\n\n\nclass RoIPool(Module):\n\n def _"
},
{
"path": "mmdet/ops/roi_pool/setup.py",
"chars": 322,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='roi_p"
},
{
"path": "mmdet/ops/roi_pool/src/roi_pool_cuda.cpp",
"chars": 2955,
"preview": "#include <torch/torch.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIPoolForwardLaucher(const at::Tensor features, const"
},
{
"path": "mmdet/ops/roi_pool/src/roi_pool_kernel.cu",
"chars": 6953,
"preview": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\nusing namespace at; // temporal fix for pytorch<=0.4.1 (see #9848"
},
{
"path": "setup.py",
"chars": 3013,
"preview": "import os\nimport subprocess\nimport time\nfrom setuptools import find_packages, setup\n\n\ndef readme():\n with open('READM"
},
{
"path": "tools/coco_eval.py",
"chars": 794,
"preview": "from argparse import ArgumentParser\n\nfrom mmdet.core import coco_eval\n\n\ndef main():\n parser = ArgumentParser(descript"
},
{
"path": "tools/convert_datasets/pascal_voc.py",
"chars": 4581,
"preview": "import argparse\nimport os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.cor"
},
{
"path": "tools/dist_train.sh",
"chars": 159,
"preview": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname \"$0\")"
},
{
"path": "tools/test.py",
"chars": 4570,
"preview": "import argparse\n\nimport torch\nimport mmcv\nfrom mmcv.runner import load_checkpoint, parallel_test, obj_from_dict\nfrom mmc"
},
{
"path": "tools/train.py",
"chars": 2763,
"preview": "from __future__ import division\n\nimport argparse\nfrom mmcv import Config\n\nfrom mmdet import __version__\nfrom mmdet.datas"
},
{
"path": "tools/vis_subgraph.py",
"chars": 2364,
"preview": "import numpy as np\nimport pickle\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport matplotlib.cm as cm\n\nsns.s"
},
{
"path": "tools/voc_eval.py",
"chars": 1824,
"preview": "from argparse import ArgumentParser\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet import datasets\nfrom mmdet.core import e"
}
]
// ... and 8 more files (download for full content)
About this extraction
This page contains the full source code of the chanyn/Reasoning-RCNN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 179 files (138.1 MB), approximately 175.1k tokens, and a symbol index with 544 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.