Showing preview only (1,084K chars total). Download the full file or copy to clipboard to get everything.
Repository: amirassov/kaggle-imaterialist
Branch: master
Commit: f1ae37100801
Files: 270
Total size: 1004.0 KB
Directory structure:
gitextract_ymxjag4v/
├── .dockerignore
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── configs/
│ └── htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900.py
├── mmdetection/
│ ├── .travis.yml
│ ├── GETTING_STARTED.md
│ ├── INSTALL.md
│ ├── LICENSE
│ ├── MODEL_ZOO.md
│ ├── README.md
│ ├── TECHNICAL_DETAILS.md
│ ├── compile.sh
│ ├── configs/
│ │ ├── cascade_mask_rcnn_r101_fpn_1x.py
│ │ ├── cascade_mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── cascade_mask_rcnn_r50_fpn_1x.py
│ │ ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── cascade_rcnn_r101_fpn_1x.py
│ │ ├── cascade_rcnn_r50_caffe_c4_1x.py
│ │ ├── cascade_rcnn_r50_fpn_1x.py
│ │ ├── cascade_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── cascade_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── dcn/
│ │ │ ├── README.md
│ │ │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
│ │ │ ├── faster_rcnn_dpool_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_mdpool_r50_fpn_1x.py
│ │ │ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ ├── fast_mask_rcnn_r101_fpn_1x.py
│ │ ├── fast_mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── fast_mask_rcnn_r50_fpn_1x.py
│ │ ├── fast_rcnn_r101_fpn_1x.py
│ │ ├── fast_rcnn_r50_caffe_c4_1x.py
│ │ ├── fast_rcnn_r50_fpn_1x.py
│ │ ├── faster_rcnn_ohem_r50_fpn_1x.py
│ │ ├── faster_rcnn_r101_fpn_1x.py
│ │ ├── faster_rcnn_r50_caffe_c4_1x.py
│ │ ├── faster_rcnn_r50_fpn_1x.py
│ │ ├── faster_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── faster_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── fcos/
│ │ │ ├── README.md
│ │ │ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
│ │ │ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
│ │ │ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py
│ │ ├── gn/
│ │ │ ├── README.md
│ │ │ ├── mask_rcnn_r101_fpn_gn_2x.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_2x.py
│ │ │ └── mask_rcnn_r50_fpn_gn_contrib_2x.py
│ │ ├── gn+ws/
│ │ │ ├── README.md
│ │ │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py
│ │ │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
│ │ ├── htc/
│ │ │ ├── README.md
│ │ │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
│ │ │ ├── htc_r101_fpn_20e.py
│ │ │ ├── htc_r50_fpn_1x.py
│ │ │ ├── htc_r50_fpn_20e.py
│ │ │ ├── htc_without_semantic_r50_fpn_1x.py
│ │ │ ├── htc_x101_32x4d_fpn_20e_16gpu.py
│ │ │ └── htc_x101_64x4d_fpn_20e_16gpu.py
│ │ ├── mask_rcnn_r101_fpn_1x.py
│ │ ├── mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── mask_rcnn_r50_fpn_1x.py
│ │ ├── mask_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── mask_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── pascal_voc/
│ │ │ ├── faster_rcnn_r50_fpn_1x_voc0712.py
│ │ │ ├── ssd300_voc.py
│ │ │ └── ssd512_voc.py
│ │ ├── retinanet_r101_fpn_1x.py
│ │ ├── retinanet_r50_fpn_1x.py
│ │ ├── retinanet_x101_32x4d_fpn_1x.py
│ │ ├── retinanet_x101_64x4d_fpn_1x.py
│ │ ├── rpn_r101_fpn_1x.py
│ │ ├── rpn_r50_caffe_c4_1x.py
│ │ ├── rpn_r50_fpn_1x.py
│ │ ├── rpn_x101_32x4d_fpn_1x.py
│ │ ├── rpn_x101_64x4d_fpn_1x.py
│ │ ├── ssd300_coco.py
│ │ └── ssd512_coco.py
│ ├── mmdet/
│ │ ├── __init__.py
│ │ ├── apis/
│ │ │ ├── __init__.py
│ │ │ ├── env.py
│ │ │ ├── inference.py
│ │ │ └── train.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── anchor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── anchor_generator.py
│ │ │ │ └── anchor_target.py
│ │ │ ├── bbox/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── assign_sampling.py
│ │ │ │ ├── assigners/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── assign_result.py
│ │ │ │ │ ├── base_assigner.py
│ │ │ │ │ └── max_iou_assigner.py
│ │ │ │ ├── bbox_target.py
│ │ │ │ ├── geometry.py
│ │ │ │ ├── samplers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base_sampler.py
│ │ │ │ │ ├── combined_sampler.py
│ │ │ │ │ ├── instance_balanced_pos_sampler.py
│ │ │ │ │ ├── iou_balanced_neg_sampler.py
│ │ │ │ │ ├── ohem_sampler.py
│ │ │ │ │ ├── pseudo_sampler.py
│ │ │ │ │ ├── random_sampler.py
│ │ │ │ │ └── sampling_result.py
│ │ │ │ └── transforms.py
│ │ │ ├── evaluation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_overlaps.py
│ │ │ │ ├── class_names.py
│ │ │ │ ├── coco_utils.py
│ │ │ │ ├── eval_hooks.py
│ │ │ │ ├── mean_ap.py
│ │ │ │ └── recall.py
│ │ │ ├── loss/
│ │ │ │ ├── __init__.py
│ │ │ │ └── losses.py
│ │ │ ├── mask/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── mask_target.py
│ │ │ │ └── utils.py
│ │ │ ├── post_processing/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_nms.py
│ │ │ │ └── merge_augs.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── dist_utils.py
│ │ │ └── misc.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── coco.py
│ │ │ ├── concat_dataset.py
│ │ │ ├── custom.py
│ │ │ ├── extra_aug.py
│ │ │ ├── loader/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── build_loader.py
│ │ │ │ └── sampler.py
│ │ │ ├── repeat_dataset.py
│ │ │ ├── transforms.py
│ │ │ ├── utils.py
│ │ │ ├── voc.py
│ │ │ └── xml_style.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── anchor_head.py
│ │ │ │ ├── fcos_head.py
│ │ │ │ ├── retina_head.py
│ │ │ │ ├── rpn_head.py
│ │ │ │ └── ssd_head.py
│ │ │ ├── backbones/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── resnet.py
│ │ │ │ ├── resnext.py
│ │ │ │ └── ssd_vgg.py
│ │ │ ├── bbox_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_head.py
│ │ │ │ └── convfc_bbox_head.py
│ │ │ ├── builder.py
│ │ │ ├── detectors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── cascade_rcnn.py
│ │ │ │ ├── ensemble_htc.py
│ │ │ │ ├── fast_rcnn.py
│ │ │ │ ├── faster_rcnn.py
│ │ │ │ ├── fcos.py
│ │ │ │ ├── htc.py
│ │ │ │ ├── mask_rcnn.py
│ │ │ │ ├── retinanet.py
│ │ │ │ ├── rpn.py
│ │ │ │ ├── single_stage.py
│ │ │ │ ├── test_mixins.py
│ │ │ │ └── two_stage.py
│ │ │ ├── mask_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fcn_mask_head.py
│ │ │ │ ├── fused_semantic_head.py
│ │ │ │ └── htc_mask_head.py
│ │ │ ├── necks/
│ │ │ │ ├── __init__.py
│ │ │ │ └── fpn.py
│ │ │ ├── registry.py
│ │ │ ├── roi_extractors/
│ │ │ │ ├── __init__.py
│ │ │ │ └── single_level.py
│ │ │ ├── shared_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ └── res_layer.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── conv_module.py
│ │ │ ├── conv_ws.py
│ │ │ ├── norm.py
│ │ │ ├── scale.py
│ │ │ └── weight_init.py
│ │ └── ops/
│ │ ├── __init__.py
│ │ ├── dcn/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── deform_conv.py
│ │ │ │ └── deform_pool.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── deform_conv.py
│ │ │ │ └── deform_pool.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── deform_conv_cuda.cpp
│ │ │ ├── deform_conv_cuda_kernel.cu
│ │ │ ├── deform_pool_cuda.cpp
│ │ │ └── deform_pool_cuda_kernel.cu
│ │ ├── nms/
│ │ │ ├── __init__.py
│ │ │ ├── nms_wrapper.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── nms_cpu.cpp
│ │ │ ├── nms_cuda.cpp
│ │ │ ├── nms_kernel.cu
│ │ │ └── soft_nms_cpu.pyx
│ │ ├── roi_align/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_align.py
│ │ │ ├── gradcheck.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_align.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── roi_align_cuda.cpp
│ │ │ └── roi_align_kernel.cu
│ │ ├── roi_pool/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_pool.py
│ │ │ ├── gradcheck.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_pool.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── roi_pool_cuda.cpp
│ │ │ └── roi_pool_kernel.cu
│ │ └── sigmoid_focal_loss/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ └── sigmoid_focal_loss.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ └── sigmoid_focal_loss.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── sigmoid_focal_loss.cpp
│ │ └── sigmoid_focal_loss_cuda.cu
│ ├── setup.py
│ └── tools/
│ ├── analyze_logs.py
│ ├── coco_eval.py
│ ├── convert_datasets/
│ │ └── pascal_voc.py
│ ├── dist_test.sh
│ ├── dist_train.sh
│ ├── publish_model.py
│ ├── slurm_test.sh
│ ├── slurm_train.sh
│ ├── test.py
│ ├── test_ensemble.py
│ ├── train.py
│ ├── upgrade_model_version.py
│ └── voc_eval.py
├── scrips/
│ ├── create_mmdetection_test.sh
│ ├── create_mmdetection_train.sh
│ ├── dist_test.sh
│ ├── dist_test_ensemble.sh
│ ├── dist_train.sh
│ ├── prepare_weights.sh
│ └── split.sh
└── src/
├── __init__.py
├── create_mmdetection_test.py
├── create_mmdetection_train.py
├── draw.py
├── eda.py
├── metric.py
├── prune.py
├── rle.py
├── rm_attribute_classes.py
├── split.py
├── submit.py
├── utils.py
└── visualization.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
# custom:
.git/*
data/*
ipynb/*
.idea/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: .gitignore
================================================
*.ipynb
.idea/
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: Dockerfile
================================================
FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel
RUN apt-get update && apt-get install -y \
git \
wget \
curl \
cmake \
unzip \
build-essential \
libsm6 \
libxext6 \
libfontconfig1 \
libxrender1 \
libswscale-dev \
libtbb2 \
libtbb-dev \
libjpeg-dev \
libpng-dev \
libtiff-dev \
libjasper-dev \
libavformat-dev \
libpq-dev \
libturbojpeg \
software-properties-common \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN pip install --no-cache-dir \
numpy \
pandas \
PyYAML \
cycler \
dill \
h5py \
imgaug \
matplotlib \
opencv-contrib-python \
Pillow \
scikit-image \
scikit-learn \
scipy \
setuptools \
six \
tqdm \
ipython \
ipdb \
albumentations \
click \
jpeg4py \
addict \
colorama \
torchvision \
iterative-stratification
RUN pip install --upgrade --no-cache-dir cython && pip install --no-cache-dir pycocotools==2.0.0 mmcv==0.2.5
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2019 Miras Amir
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: Makefile
================================================
APP_NAME=amirassov/kaggle-imaterialist
CONTAINER_NAME=kaggle-imaterialist
# HELP
.PHONY: help
help: ## This help.
@awk 'BEGIN (FS = ":.*?## ") /^[a-zA-Z_-]+:.*?## / (printf "\033[36m%-30s\033[0m %s\n", $$1, $$2)' $(MAKEFILE_LIST)
build: ## Build the container
nvidia-docker build -t $(APP_NAME) .
run-dgx: ## Run container in omen
nvidia-docker run \
-itd \
--ipc=host \
--name=$(CONTAINER_NAME) \
-e DISPLAY=localhost:10.0 \
-v /tmp/.X11-unix:/tmp/.X11-unix \
-v /raid/data_share/amirassov/kaggle-imaterialist_data:/data \
-v /raid/data_share/amirassov/kaggle-imaterialist_dumps:/dumps \
-v $(shell pwd):/kaggle-imaterialist $(APP_NAME) bash
run-omen: ## Run container in omen
nvidia-docker run \
-itd \
--ipc=host \
--name=$(CONTAINER_NAME) \
-e DISPLAY=localhost:10.0 \
-v /tmp/.X11-unix:/tmp/.X11-unix \
-v /home/videoanalytics/data/kaggle-imaterialist_data:/data \
-v /home/videoanalytics/data/dumps:/dumps \
-v $(shell pwd):/kaggle-imaterialist $(APP_NAME) bash
exec: ## Run a bash in a running container
nvidia-docker exec -it $(CONTAINER_NAME) bash
stop: ## Stop and remove a running container
docker stop $(CONTAINER_NAME); docker rm $(CONTAINER_NAME)
================================================
FILE: README.md
================================================
# The First Place Solution of [iMaterialist (Fashion) 2019](https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/)

## Solution
My solution is based on the COCO challenge 2018 winners article: https://arxiv.org/abs/1901.07518.
### Model:
[Hybrid Task Cascade with ResNeXt-101-64x4d-FPN backbone](https://github.com/open-mmlab/mmdetection/blob/master/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py). This model has a metric Mask mAP = 43.9 on COCO dataset. This is SOTA for instance segmentation.
### Validation:
For validation, I used 450 training samples splitted using https://github.com/trent-b/iterative-stratification.
### Preprocessing:
I applied light augmentatios from the [albumentations](https://github.com/albu/albumentations) library to the original image. Then I use multi-scale training: in each iteration, the scale of short edge is randomly sampled
from [600, 1200], and the scale of long edge is fixed as 1900.

### Training details:
* pre-train from COCO
* optimizer: `SGD(lr=0.03, momentum=0.9, weight_decay=0.0001)`
* batch_size: 16 = 2 images per gpu x 8 gpus Tesla V100
* learning rate scheduler:
```
if iterations < 500:
lr = warmup(warmup_ratio=1 / 3)
if epochs == 10:
lr = lr ∗ 0.1
if epochs == 18:
lr = lr ∗ 0.1
if epochs > 20:
stop
```
* training time: ~3 days.
### Parameter tuning:
After the 12th epoch with the default parameters, the metric on LB was **0.21913**. Next, I tuned postprocessing thresholds using validation data:
```
rcnn=dict(
score_thr=0.5,
nms=dict(type='nms', iou_thr=0.3),
max_per_img=100,
mask_thr_binary=0.45
)
```
This improved the metric on LB: **0.21913 -> 0.30011.**
### Test time augmentation:
I use 3 scales as well as horizontal flip at test time and ensemble the results. Testing scales are (1000, 1600), (1200, 1900), (1400, 2200).
I drew a TTA scheme for Mask R-CNN, which is implemented in mmdetection library. For Hybrid Task Cascade R-CNN, I rewrote this code.
This improved the metric on LB: **0.30011 -> 0.31074.**

### Ensemble:
I ensemble the 3 best checkpoints of my model. The ensemble scheme is similar to TTA.
This improved the metric on LB: **0.31074 -> 0.31626.**

### Attributes:
I didn't use attributes at all: they were difficult to predict and the removal of classes with attributes greatly improved the metric.
During the whole competition, I deleted classes with attributes: `{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}` U `{27, 28, 33}`. But two days before the end I read [the discussion] (https://www.kaggle.com/c/kaggle-imaterialist-fashion-2019-FGVC6/discussion/94811#latest548137) and added back classes `{27, 28, 33 }`.
This improved the metric on LB: **0.31626 -> 0.33511.**
### Postprocessing for masks
My post-processing algorithm for avoid intersections of masks of the same class:
```python
def hard_overlaps_suppression(binary_mask, scores):
not_overlap_mask = []
for i in np.argsort(scores)[::-1]:
current_mask = binary_mask[..., i].copy()
for mask in not_overlap_mask:
current_mask = np.bitwise_and(current_mask, np.invert(mask))
not_overlap_mask.append(current_mask)
return np.stack(not_overlap_mask, -1)
```
### Small postprocessing:
I deleted objects with an area of less than 20 pixels.
This improved the metric on LB: **0.33511 -> 0.33621.**
## How to run?
### Docker
```bash
make build
make run-[server-name]
make exec
```
### Build mmdetection:
```bash
cd mmdetection
bash compile.sh
python setup.py develop
```
### Prepare pretrained weights:
```bash
bash prepare_weights.sh
```
### Data structure
```
/data/
├── train/
│ └── ...
├── test/
│ └── ...
└── train.csv.zip
/dumps/
└── htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900/
```
Fix the [error](https://www.kaggle.com/c/kaggle-imaterialist-fashion-2019-FGVC6/discussion/91217#latest-529042) in `train.csv.zip.`
### Prepare annotations for mmdetection:
```bash
cd scripts
bash create_mmdetection_train.sh
bash create_mmdetection_test.sh
bash split.sh
```
### Training the model:
```bash
CUDA_VISIBLE_DEVICES=[list of gpus] bash dist_train.sh [config] [gpus] [--validate]
```
#### My best checkpoint:
https://yadi.sk/d/-raqliq_ad6r_Q
### Test the model:
```bash
CUDA_VISIBLE_DEVICES=[list of gpus] bash dist_test_ensemble.sh [config] [gpus]
```
## References
* https://github.com/open-mmlab/mmdetection
================================================
FILE: configs/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900.py
================================================
# model settings
model = dict(
type='HybridTaskCascade',
num_stages=3,
pretrained=None,
interleaved=True,
mask_info_flow=True,
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
groups=64,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=47,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=47,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=47,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='HTCMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=47))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.5,
nms=dict(type='nms', iou_thr=0.3),
max_per_img=100,
mask_thr_binary=0.45),
keep_all_stages=False)
# dataset settings
dataset_type = 'CustomDataset'
data_root = '/data/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=data_root + '/data/train_99_mmdetection.pkl',
img_prefix=data_root + 'train/',
img_scale=[(600, 1900), (1200, 1900)],
multiscale_mode='range',
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=False,
with_label=True,
extra_aug=dict(
type='Compose',
transforms=[
dict(
p=0.5,
max_h_size=64,
type='Cutout'
),
dict(
brightness_limit=0.3,
contrast_limit=0.3,
p=0.5,
type='RandomBrightnessContrast'
),
dict(
p=0.5,
quality_lower=80,
quality_upper=99,
type='JpegCompression'
),
],
p=1.0
)
),
val=dict(
type=dataset_type,
ann_file=data_root + '/data/val_01_mmdetection.pkl',
img_prefix=data_root + 'train/',
img_scale=(1200, 1900),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=False,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'test_mmdetection.pkl',
img_prefix=data_root + 'test/',
img_scale=[(1000, 1600), (1200, 1900), (1400, 2200)],
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=1.0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[10, 18])
checkpoint_config = dict(interval=1)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
])
# runtime settings
total_epochs = 20
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = '/dumps/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900'
load_from = '/dumps/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c_prune.pth'
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/.travis.yml
================================================
dist: trusty
language: python
install:
- pip install flake8
python:
- "3.5"
- "3.6"
script:
- flake8
================================================
FILE: mmdetection/GETTING_STARTED.md
================================================
# Getting Started
This page provides basic tutorials about the usage of mmdetection.
For installation instructions, please see [INSTALL.md](INSTALL.md).
## Inference with pretrained models
We provide testing scripts to evaluate a whole dataset (COCO, PASCAL VOC, etc.),
and also some high-level apis for easier integration to other projects.
### Test a dataset
- [x] single GPU testing
- [x] multiple GPU testing
- [x] visualize detection results
You can use the following commands to test a dataset.
```shell
# single-gpu testing
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]
# multi-gpu testing
./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
```
Optional arguments:
- `RESULT_FILE`: Filename of the output results in pickle format. If not specified, the results will not be saved to a file.
- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values are: `proposal_fast`, `proposal`, `bbox`, `segm`, `keypoints`.
- `--show`: If specified, detection results will be ploted on the images and shown in a new window. Only applicable for single GPU testing.
Examples:
Assume that you have already downloaded the checkpoints to `checkpoints/`.
1. Test Faster R-CNN and show the results.
```shell
python tools/test.py configs/faster_rcnn_r50_fpn_1x.py \
checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth \
--show
```
2. Test Mask R-CNN and evaluate the bbox and mask AP.
```shell
python tools/test.py configs/mask_rcnn_r50_fpn_1x.py \
checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \
--out results.pkl --eval bbox segm
```
3. Test Mask R-CNN with 8 GPUs, and evaluate the bbox and mask AP.
```shell
./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x.py \
checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \
8 --out results.pkl --eval bbox segm
```
### High-level APIs for testing images.
Here is an example of building the model and test given images.
```python
from mmdet.apis import init_detector, inference_detector, show_result
config_file = 'configs/faster_rcnn_r50_fpn_1x.py'
checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'
# build the model from a config file and a checkpoint file
model = init_detector(config_file, checkpoint_file)
# test a single image and show the results
img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once
result = inference_detector(model, img)
show_result(img, result, model.CLASSES)
# test a list of images and write the results to image files
imgs = ['test1.jpg', 'test2.jpg']
for i, result in enumerate(inference_detector(model, imgs, device='cuda:0')):
show_result(imgs[i], result, model.CLASSES, out_file='result_{}.jpg'.format(i))
```
## Train a model
mmdetection implements distributed training and non-distributed training,
which uses `MMDistributedDataParallel` and `MMDataParallel` respectively.
All outputs (log files and checkpoints) will be saved to the working directory,
which is specified by `work_dir` in the config file.
**\*Important\***: The default learning rate in config files is for 8 GPUs.
If you use less or more than 8 GPUs, you need to set the learning rate proportional
to the GPU num, e.g., 0.01 for 4 GPUs and 0.04 for 16 GPUs.
### Train with a single GPU
```shell
python tools/train.py ${CONFIG_FILE}
```
If you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.
### Train with multiple GPUs
```shell
./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
```
Optional arguments are:
- `--validate` (recommended): Perform evaluation at every k (default=1) epochs during the training.
- `--work_dir ${WORK_DIR}`: Override the working directory specified in the config file.
- `--resume_from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.
### Train with multiple machines
If you run mmdetection on a cluster managed with [slurm](https://slurm.schedmd.com/), you can just use the script `slurm_train.sh`.
```shell
./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${WORK_DIR} [${GPUS}]
```
Here is an example of using 16 GPUs to train Mask R-CNN on the dev partition.
```shell
./tools/slurm_train.sh dev mask_r50_1x configs/mask_rcnn_r50_fpn_1x.py /nfs/xxxx/mask_rcnn_r50_fpn_1x 16
```
You can check [slurm_train.sh](tools/slurm_train.sh) for full arguments and environment variables.
If you have just multiple machines connected with ethernet, you can refer to
pytorch [launch utility](https://pytorch.org/docs/stable/distributed_deprecated.html#launch-utility).
Usually it is slow if you do not have high speed networking like infiniband.
## How-to
### Use my own datasets
The simplest way is to convert your dataset to existing dataset formats (COCO or PASCAL VOC).
Here we show an example of adding a custom dataset of 5 classes, assuming it is also in COCO format.
In `mmdet/datasets/my_dataset.py`:
```python
from .coco import CocoDataset
class MyDataset(CocoDataset):
CLASSES = ('a', 'b', 'c', 'd', 'e')
```
In `mmdet/datasets/__init__.py`:
```python
from .my_dataset import MyDataset
```
Then you can use `MyDataset` in config files, with the same API as CocoDataset.
It is also fine if you do not want to convert the annotation format to COCO or PASCAL format.
Actually, we define a simple annotation format and all existing datasets are
processed to be compatible with it, either online or offline.
The annotation of a dataset is a list of dict, each dict corresponds to an image.
There are 3 field `filename` (relative path), `width`, `height` for testing,
and an additional field `ann` for training. `ann` is also a dict containing at least 2 fields:
`bboxes` and `labels`, both of which are numpy arrays. Some datasets may provide
annotations like crowd/difficult/ignored bboxes, we use `bboxes_ignore` and `labels_ignore`
to cover them.
Here is an example.
```
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray, float32> (n, 4),
'labels': <np.ndarray, float32> (n, ),
'bboxes_ignore': <np.ndarray, float32> (k, 4),
'labels_ignore': <np.ndarray, float32> (k, ) (optional field)
}
},
...
]
```
There are two ways to work with custom datasets.
- online conversion
You can write a new Dataset class inherited from `CustomDataset`, and overwrite two methods
`load_annotations(self, ann_file)` and `get_ann_info(self, idx)`,
like [CocoDataset](mmdet/datasets/coco.py) and [VOCDataset](mmdet/datasets/voc.py).
- offline conversion
You can convert the annotation format to the expected format above and save it to
a pickle or json file, like [pascal_voc.py](tools/convert_datasets/pascal_voc.py).
Then you can simply use `CustomDataset`.
### Develop new components
We basically categorize model components into 4 types.
- backbone: usually a FCN network to extract feature maps, e.g., ResNet, MobileNet.
- neck: the component between backbones and heads, e.g., FPN, PAFPN.
- head: the component for specific tasks, e.g., bbox prediction and mask prediction.
- roi extractor: the part for extracting RoI features from feature maps, e.g., RoI Align.
Here we show how to develop new components with an example of MobileNet.
1. Create a new file `mmdet/models/backbones/mobilenet.py`.
```python
import torch.nn as nn
from ..registry import BACKBONES
@BACKBONES.register
class MobileNet(nn.Module):
def __init__(self, arg1, arg2):
pass
def forward(x): # should return a tuple
pass
```
2. Import the module in `mmdet/models/backbones/__init__.py`.
```python
from .mobilenet import MobileNet
```
3. Use it in your config file.
```python
model = dict(
...
backbone=dict(
type='MobileNet',
arg1=xxx,
arg2=xxx),
...
```
For more information on how it works, you can refer to [TECHNICAL_DETAILS.md](TECHNICAL_DETAILS.md) (TODO).
================================================
FILE: mmdetection/INSTALL.md
================================================
## Installation
### Requirements
- Linux
- Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/))
- PyTorch 1.0+ or PyTorch-nightly
- CUDA 9.0+
- NCCL 2+
- GCC 4.9+
- [mmcv](https://github.com/open-mmlab/mmcv)
We have tested the following versions of OS and softwares:
- OS: Ubuntu 16.04/18.04 and CentOS 7.2
- CUDA: 9.0/9.2/10.0
- NCCL: 2.1.15/2.2.13/2.3.7/2.4.2
- GCC: 4.9/5.3/5.4/7.3
### Install mmdetection
a. Create a conda virtual environment and activate it. Then install Cython.
```shell
conda create -n open-mmlab python=3.7 -y
source activate open-mmlab
conda install cython
```
b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/).
c. Clone the mmdetection repository.
```shell
git clone https://github.com/open-mmlab/mmdetection.git
cd mmdetection
```
d. Compile cuda extensions.
```shell
./compile.sh
```
e. Install mmdetection (other dependencies will be installed automatically).
```shell
python setup.py develop
# or "pip install -e ."
```
Note:
1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d.
The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models.
2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again.
### Prepare COCO dataset.
It is recommended to symlink the dataset root to `$MMDETECTION/data`.
```
mmdetection
├── mmdet
├── tools
├── configs
├── data
│ ├── coco
│ │ ├── annotations
│ │ ├── train2017
│ │ ├── val2017
│ │ ├── test2017
│ ├── VOCdevkit
│ │ ├── VOC2007
│ │ ├── VOC2012
```
### Scripts
[Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
a script for setting up mmdetection with conda.
### Notice
You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently.
If there are more than one mmdetection on your machine, and you want to use them alternatively.
Please insert the following code to the main file
```python
import os.path as osp
import sys
sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))
```
or run the following command in the terminal of corresponding folder.
```shell
export PYTHONPATH=`pwd`:$PYTHONPATH
```
================================================
FILE: mmdetection/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: mmdetection/MODEL_ZOO.md
================================================
# Benchmark and Model Zoo
## Environment
### Hardware
- 8 NVIDIA Tesla V100 GPUs
- Intel Xeon 4114 CPU @ 2.20GHz
### Software environment
- Python 3.6 / 3.7
- PyTorch Nightly
- CUDA 9.0.176
- CUDNN 7.0.4
- NCCL 2.1.15
## Mirror sites
We use AWS as the main site to host our model zoo, and maintain a mirror on aliyun.
You can replace `https://s3.ap-northeast-2.amazonaws.com/open-mmlab` with `https://open-mmlab.oss-cn-beijing.aliyuncs.com` in model urls.
## Common settings
- All FPN baselines and RPN-C4 baselines were trained using 8 GPU with a batch size of 16 (2 images per GPU). Other C4 baselines were trained using 8 GPU with a batch size of 8 (1 image per GPU).
- All models were trained on `coco_2017_train`, and tested on the `coco_2017_val`.
- We use distributed training and BN layer stats are fixed.
- We adopt the same training schedules as Detectron. 1x indicates 12 epochs and 2x indicates 24 epochs, which corresponds to slightly less iterations than Detectron and the difference can be ignored.
- All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.
- For fair comparison with other codebases, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows.
- We report the inference time as the overall time including data loading, network forwarding and post processing.
## Baselines
More models with different backbones will be added to the model zoo.
### RPN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR1000 | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | 1x | - | - | 20.5 | 51.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_caffe_c4_1x-ea7d3428.pth) |
| R-50-C4 | caffe | 2x | 2.2 | 0.17 | 20.3 | 52.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_caffe_c4_2x-c6d5b958.pth) |
| R-50-C4 | pytorch | 1x | - | - | 20.1 | 50.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_c4_1x-eb38972b.pth) |
| R-50-C4 | pytorch | 2x | - | - | 20.0 | 51.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_c4_2x-3d4c1e14.pth) |
| R-50-FPN | caffe | 1x | 3.3 | 0.253 | 16.9 | 58.2 | - |
| R-50-FPN | pytorch | 1x | 3.5 | 0.276 | 17.7 | 57.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth) |
| R-50-FPN | pytorch | 2x | - | - | - | 57.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth) |
| R-101-FPN | caffe | 1x | 5.2 | 0.379 | 13.9 | 59.4 | - |
| R-101-FPN | pytorch | 1x | 5.4 | 0.396 | 14.4 | 58.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_1x_20181129-f50da4bd.pth) |
| R-101-FPN | pytorch | 2x | - | - | - | 59.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_2x_20181129-e42c6c9a.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 6.6 | 0.589 | 11.8 | 59.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_1x_20181218-7e379d26.pth) |
| X-101-32x4d-FPN | pytorch | 2x | - | - | - | 59.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_2x_20181218-0510af40.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 9.5 | 0.955 | 8.3 | 59.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_1x_20181218-c1a24f1f.pth) |
| X-101-64x4d-FPN | pytorch | 2x | - | - | - | 60.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_2x_20181218-c22bdd70.pth) |
### Faster R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | 1x | - | - | 9.5 | 34.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_caffe_c4_1x-75ecfdfa.pth) |
| R-50-C4 | caffe | 2x | 4.0 | 0.39 | 9.3 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_caffe_c4_2x-71c67f27.pth) |
| R-50-C4 | pytorch | 1x | - | - | 9.3 | 33.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_c4_1x-642cf91f.pth) |
| R-50-C4 | pytorch | 2x | - | - | 9.4 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_c4_2x-6e4fdf4f.pth) |
| R-50-FPN | caffe | 1x | 3.6 | 0.333 | 13.5 | 36.6 | - |
| R-50-FPN | pytorch | 1x | 3.8 | 0.353 | 13.6 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth) |
| R-50-FPN | pytorch | 2x | - | - | - | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth) |
| R-101-FPN | caffe | 1x | 5.5 | 0.465 | 11.5 | 38.8 | - |
| R-101-FPN | pytorch | 1x | 5.7 | 0.474 | 11.9 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_1x_20181129-d1468807.pth) |
| R-101-FPN | pytorch | 2x | - | - | - | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_2x_20181129-73e7ade7.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 6.9 | 0.672 | 10.3 | 40.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_1x_20181218-ad81c133.pth) |
| X-101-32x4d-FPN | pytorch | 2x | - | - | - | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_2x_20181218-0ed58946.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 9.8 | 1.040 | 7.3 | 41.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_1x_20181218-c9c69c8f.pth) |
| X-101-64x4d-FPN | pytorch | 2x | - | - | - | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_2x_20181218-fe94f9b8.pth) |
### Mask R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | 1x | - | - | 8.1 | 35.9 | 31.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_caffe_c4_1x-02a4ad3b.pth) |
| R-50-C4 | caffe | 2x | 4.2 | 0.43 | 8.1 | 37.9 | 32.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_caffe_c4_2x-d150973a.pth) |
| R-50-C4 | pytorch | 1x | - | - | 7.9 | 35.1 | 31.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_c4_1x-a83bdd40.pth) |
| R-50-C4 | pytorch | 2x | - | - | 8.0 | 37.2 | 32.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_c4_2x-3cf169a9.pth) |
| R-50-FPN | caffe | 1x | 3.8 | 0.430 | 10.2 | 37.4 | 34.3 | - |
| R-50-FPN | pytorch | 1x | 3.9 | 0.453 | 10.6 | 37.3 | 34.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth) |
| R-50-FPN | pytorch | 2x | - | - | - | 38.5 | 35.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth) |
| R-101-FPN | caffe | 1x | 5.7 | 0.534 | 9.4 | 39.9 | 36.1 | - |
| R-101-FPN | pytorch | 1x | 5.8 | 0.571 | 9.5 | 39.4 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_1x_20181129-34ad1961.pth) |
| R-101-FPN | pytorch | 2x | - | - | - | 40.3 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_2x_20181129-a254bdfc.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 7.1 | 0.759 | 8.3 | 41.1 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_1x_20181218-44e635cc.pth) |
| X-101-32x4d-FPN | pytorch | 2x | - | - | - | 41.4 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_2x_20181218-f023dffa.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 10.0 | 1.102 | 6.5 | 42.1 | 38.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_1x_20181218-cb159987.pth) |
| X-101-64x4d-FPN | pytorch | 2x | - | - | - | 42.0 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_2x_20181218-ea936e44.pth) |
### Fast R-CNN (with pre-computed proposals)
| Backbone | Style | Type | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
| :-------: | :-----: | :----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | Faster | 1x | - | - | 6.7 | 35.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_caffe_c4_1x-0ef9a60b.pth) |
| R-50-C4 | caffe | Faster | 2x | 3.8 | 0.34 | 6.6 | 36.4 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_c4_2x-657a9fc6.pth) |
| R-50-C4 | pytorch | Faster | 1x | - | - | 6.3 | 34.2 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_c4_1x-2bc00ca9.pth) |
| R-50-C4 | pytorch | Faster | 2x | - | - | 6.1 | 35.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_caffe_c4_2x-9171d0fc.pth) |
| R-50-FPN | caffe | Faster | 1x | 3.3 | 0.242 | 18.4 | 36.6 | - | - |
| R-50-FPN | pytorch | Faster | 1x | 3.5 | 0.250 | 16.5 | 35.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth) |
| R-50-C4 | caffe | Mask | 1x | - | - | 8.1 | 35.9 | 31.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_caffe_c4_1x-b43f7f3c.pth) |
| R-50-C4 | caffe | Mask | 2x | 4.2 | 0.43 | 8.1 | 37.9 | 32.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_caffe_c4_2x-e3580184.pth) |
| R-50-C4 | pytorch | Mask | 1x | - | - | 7.9 | 35.1 | 31.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_c4_1x-bc7fa8c8.pth) |
| R-50-C4 | pytorch | Mask | 2x | - | - | 8.0 | 37.2 | 32.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) |
| R-50-FPN | pytorch | Faster | 2x | - | - | - | 37.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth) |
| R-101-FPN | caffe | Faster | 1x | 5.2 | 0.355 | 14.4 | 38.6 | - | - |
| R-101-FPN | pytorch | Faster | 1x | 5.4 | 0.388 | 13.2 | 38.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_1x_20181129-ffaa2eb0.pth) |
| R-101-FPN | pytorch | Faster | 2x | - | - | - | 38.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_2x_20181129-9dba92ce.pth) |
| R-50-FPN | caffe | Mask | 1x | 3.4 | 0.328 | 12.8 | 37.3 | 34.5 | - |
| R-50-FPN | pytorch | Mask | 1x | 3.5 | 0.346 | 12.7 | 36.8 | 34.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth) |
| R-50-FPN | pytorch | Mask | 2x | - | - | - | 37.9 | 34.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth) |
| R-101-FPN | caffe | Mask | 1x | 5.2 | 0.429 | 11.2 | 39.4 | 36.1 | - |
| R-101-FPN | pytorch | Mask | 1x | 5.4 | 0.462 | 10.9 | 38.9 | 35.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_1x_20181129-2273fa9b.pth) |
| R-101-FPN | pytorch | Mask | 2x | - | - | - | 39.9 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_2x_20181129-bf63ec5e.pth) |
### RetinaNet
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |
| R-50-FPN | caffe | 1x | 3.4 | 0.285 | 12.5 | 35.8 | - |
| R-50-FPN | pytorch | 1x | 3.6 | 0.308 | 12.1 | 35.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth) |
| R-50-FPN | pytorch | 2x | - | - | - | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_2x_20181125-8b724df2.pth) |
| R-101-FPN | caffe | 1x | 5.3 | 0.410 | 10.4 | 37.8 | - |
| R-101-FPN | pytorch | 1x | 5.5 | 0.429 | 10.9 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_1x_20181129-f016f384.pth) |
| R-101-FPN | pytorch | 2x | - | - | - | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_2x_20181129-72c14526.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 6.7 | 0.632 | 9.3 | 39.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_1x_20190501-967812ba.pth) |
| X-101-32x4d-FPN | pytorch | 2x | - | - | - | 39.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_2x_20181218-8596452d.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 9.6 | 0.993 | 7.0 | 40.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_1x_20181218-a0a22662.pth) |
| X-101-64x4d-FPN | pytorch | 2x | - | - | - | 39.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_2x_20181218-5e88d045.pth) |
### Cascade R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | 1x | 8.7 | 0.92 | 5.0 | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_caffe_c4_1x-7c85c62b.pth) |
| R-50-FPN | caffe | 1x | 3.9 | 0.464 | 10.9 | 40.5 | - |
| R-50-FPN | pytorch | 1x | 4.1 | 0.455 | 11.9 | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_1x_20190501-3b6211ab.pth) |
| R-50-FPN | pytorch | 20e | - | - | - | 41.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_20e_20181123-db483a09.pth) |
| R-101-FPN | caffe | 1x | 5.8 | 0.569 | 9.6 | 42.4 | - |
| R-101-FPN | pytorch | 1x | 6.0 | 0.584 | 10.3 | 42.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_1x_20181129-d64ebac7.pth) |
| R-101-FPN | pytorch | 20e | - | - | - | 42.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_20e_20181129-b46dcede.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 0.770 | 8.9 | 43.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth) |
| X-101-32x4d-FPN | pytorch | 20e | - | - | - | 44.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_2x_20181218-28f73c4c.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 10.0 | 1.133 | 6.7 | 44.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_1x_20181218-e2dc376a.pth) |
| X-101-64x4d-FPN | pytorch | 20e | - | - | - | 44.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_2x_20181218-5add321e.pth) |
### Cascade Mask R-CNN
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------------------: |
| R-50-C4 | caffe | 1x | 9.1 | 0.99 | 4.5 | 39.3 | 32.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_caffe_c4_1x-f72cc254.pth) |
| R-50-FPN | caffe | 1x | 5.1 | 0.692 | 7.6 | 40.9 | 35.5 | - |
| R-50-FPN | pytorch | 1x | 5.3 | 0.683 | 7.4 | 41.2 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) |
| R-50-FPN | pytorch | 20e | - | - | - | 42.3 | 36.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_20e_20181123-6e0c9713.pth) |
| R-101-FPN | caffe | 1x | 7.0 | 0.803 | 7.2 | 43.1 | 37.2 | - |
| R-101-FPN | pytorch | 1x | 7.2 | 0.807 | 6.8 | 42.6 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_1x_20181129-64f00602.pth) |
| R-101-FPN | pytorch | 20e | - | - | - | 43.3 | 37.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_20e_20181129-cb85151d.pth) |
| X-101-32x4d-FPN | pytorch | 1x | 8.4 | 0.976 | 6.6 | 44.4 | 38.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_1x_20181218-1d944c89.pth) |
| X-101-32x4d-FPN | pytorch | 20e | - | - | - | 44.7 | 38.6 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_20e_20181218-761a3473.pth) |
| X-101-64x4d-FPN | pytorch | 1x | 11.4 | 1.33 | 5.3 | 45.4 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_1x_20190501-827e0a70.pth) |
| X-101-64x4d-FPN | pytorch | 20e | - | - | - | 45.7 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_20e_20181218-630773a7.pth) |
**Notes:**
- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs.
### Hybrid Task Cascade (HTC)
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------: |
| R-50-FPN | pytorch | 1x | 7.4 | 0.936 | 4.1 | 42.1 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) |
| R-50-FPN | pytorch | 20e | - | - | - | 43.2 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) |
| R-101-FPN | pytorch | 20e | 9.3 | 1.051 | 4.0 | 44.9 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) |
| X-101-32x4d-FPN | pytorch | 20e | 5.8 | 0.769 | 3.8 | 46.1 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |
| X-101-64x4d-FPN | pytorch | 20e | 7.5 | 1.120 | 3.5 | 46.9 | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |
**Notes:**
- Please refer to [Hybrid Task Cascade](configs/htc/README.md) for details and more a powerful model (50.7/43.9).
### SSD
| Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
| :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------: |
| VGG16 | 300 | caffe | 120e | 3.5 | 0.256 | 25.9 / 34.6 | 25.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) |
| VGG16 | 512 | caffe | 120e | 7.6 | 0.412 | 20.7 / 25.4 | 29.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_coco_vgg16_caffe_120e_20181221-d48b0be8.pth) |
### SSD (PASCAL VOC)
| Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
| :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |
| VGG16 | 300 | caffe | 240e | 2.5 | 0.159 | 35.7 / 53.6 | 77.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) |
| VGG16 | 512 | caffe | 240e | 4.3 | 0.214 | 27.5 / 35.9 | 80.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) |
**Notes:**
- `cudnn.benchmark` is set as `True` for SSD training and testing.
- Inference time is reported for batch size = 1 and batch size = 8.
- The speed difference between VOC and COCO is caused by model parameters and nms.
### Group Normalization (GN)
Please refer to [Group Normalization](configs/gn/README.md) for details.
### Weight Standardization
Please refer to [Weight Standardization](configs/gn+ws/README.md) for details.
### Deformable Convolution v2
Please refer to [Deformable Convolutional Networks](configs/dcn/README.md) for details.
## Comparison with Detectron and maskrcnn-benchmark
We compare mmdetection with [Detectron](https://github.com/facebookresearch/Detectron)
and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). The backbone used is R-50-FPN.
In general, mmdetection has 3 advantages over Detectron.
- **Higher performance** (especially in terms of mask AP)
- **Faster training speed**
- **Memory efficient**
### Performance
Detectron and maskrcnn-benchmark use caffe-style ResNet as the backbone.
We report results using both caffe-style (weights converted from
[here](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#imagenet-pretrained-models))
and pytorch-style (weights from the official model zoo) ResNet backbone,
indicated as *pytorch-style results* / *caffe-style results*.
We find that pytorch-style ResNet usually converges slower than caffe-style ResNet,
thus leading to slightly lower results in 1x schedule, but the final results
of 2x schedule is higher.
<table>
<tr>
<th>Type</th>
<th>Lr schd</th>
<th>Detectron</th>
<th>maskrcnn-benchmark</th>
<th>mmdetection</th>
</tr>
<tr>
<td rowspan="2">RPN</td>
<td>1x</td>
<td>57.2</td>
<td>-</td>
<td>57.1 / 58.2</td>
</tr>
<tr>
<td>2x</td>
<td>-</td>
<td>-</td>
<td>57.6 / -</td>
</tr>
<tr>
<td rowspan="2">Faster R-CNN</td>
<td>1x</td>
<td>36.7</td>
<td>36.8</td>
<td>36.4 / 36.6</td>
</tr>
<tr>
<td>2x</td>
<td>37.9</td>
<td>-</td>
<td>37.7 / -</td>
</tr>
<tr>
<td rowspan="2">Mask R-CNN</td>
<td>1x</td>
<td>37.7 & 33.9</td>
<td>37.8 & 34.2</td>
<td>37.3 & 34.2 / 37.4 & 34.3</td>
</tr>
<tr>
<td>2x</td>
<td>38.6 & 34.5</td>
<td>-</td>
<td>38.5 & 35.1 / -</td>
</tr>
<tr>
<td rowspan="2">Fast R-CNN</td>
<td>1x</td>
<td>36.4</td>
<td>-</td>
<td>35.8 / 36.6</td>
</tr>
<tr>
<td>2x</td>
<td>36.8</td>
<td>-</td>
<td>37.1 / -</td>
</tr>
<tr>
<td rowspan="2">Fast R-CNN (w/mask)</td>
<td>1x</td>
<td>37.3 & 33.7</td>
<td>-</td>
<td>36.8 & 34.1 / 37.3 & 34.5</td>
</tr>
<tr>
<td>2x</td>
<td>37.7 & 34.0</td>
<td>-</td>
<td>37.9 & 34.8 / -</td>
</tr>
</table>
### Training Speed
The training speed is measure with s/iter. The lower, the better.
<table>
<tr>
<th>Type</th>
<th>Detectron (P100<sup>1</sup>)</th>
<th>maskrcnn-benchmark (V100)</th>
<th>mmdetection (V100<sup>2</sup>)</th>
</tr>
<tr>
<td>RPN</td>
<td>0.416</td>
<td>-</td>
<td>0.253</td>
</tr>
<tr>
<td>Faster R-CNN</td>
<td>0.544</td>
<td>0.353</td>
<td>0.333</td>
</tr>
<tr>
<td>Mask R-CNN</td>
<td>0.889</td>
<td>0.454</td>
<td>0.430</td>
</tr>
<tr>
<td>Fast R-CNN</td>
<td>0.285</td>
<td>-</td>
<td>0.242</td>
</tr>
<tr>
<td>Fast R-CNN (w/mask)</td>
<td>0.377</td>
<td>-</td>
<td>0.328</td>
</tr>
</table>
\*1. Facebook's Big Basin servers (P100/V100) is slightly faster than the servers we use. mmdetection can also run slightly faster on FB's servers.
\*2. For fair comparison, we list the caffe-style results here.
### Inference Speed
The inference speed is measured with fps (img/s) on a single GPU. The higher, the better.
<table>
<tr>
<th>Type</th>
<th>Detectron (P100)</th>
<th>maskrcnn-benchmark (V100)</th>
<th>mmdetection (V100)</th>
</tr>
<tr>
<td>RPN</td>
<td>12.5</td>
<td>-</td>
<td>16.9</td>
</tr>
<tr>
<td>Faster R-CNN</td>
<td>10.3</td>
<td>7.9</td>
<td>13.5</td>
</tr>
<tr>
<td>Mask R-CNN</td>
<td>8.5</td>
<td>7.7</td>
<td>10.2</td>
</tr>
<tr>
<td>Fast R-CNN</td>
<td>12.5</td>
<td>-</td>
<td>18.4</td>
</tr>
<tr>
<td>Fast R-CNN (w/mask)</td>
<td>9.9</td>
<td>-</td>
<td>12.8</td>
</tr>
</table>
### Training memory
<table>
<tr>
<th>Type</th>
<th>Detectron</th>
<th>maskrcnn-benchmark</th>
<th>mmdetection</th>
</tr>
<tr>
<td>RPN</td>
<td>6.4</td>
<td>-</td>
<td>3.3</td>
</tr>
<tr>
<td>Faster R-CNN</td>
<td>7.2</td>
<td>4.4</td>
<td>3.6</td>
</tr>
<tr>
<td>Mask R-CNN</td>
<td>8.6</td>
<td>5.2</td>
<td>3.8</td>
</tr>
<tr>
<td>Fast R-CNN</td>
<td>6.0</td>
<td>-</td>
<td>3.3</td>
</tr>
<tr>
<td>Fast R-CNN (w/mask)</td>
<td>7.9</td>
<td>-</td>
<td>3.4</td>
</tr>
</table>
There is no doubt that maskrcnn-benchmark and mmdetection is more memory efficient than Detectron,
and the main advantage is PyTorch itself. We also perform some memory optimizations to push it forward.
Note that Caffe2 and PyTorch have different apis to obtain memory usage with different implementations.
For all codebases, `nvidia-smi` shows a larger memory usage than the reported number in the above table.
================================================
FILE: mmdetection/README.md
================================================
# mmdetection
## Introduction
The master branch works with **PyTorch 1.1** or higher. If you would like to use PyTorch 0.4.1,
please checkout to the [pytorch-0.4.1](https://github.com/open-mmlab/mmdetection/tree/pytorch-0.4.1) branch.
mmdetection is an open source object detection toolbox based on PyTorch. It is
a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).

### Major features
- **Modular Design**
One can easily construct a customized object detection framework by combining different components.
- **Support of multiple frameworks out of box**
The toolbox directly supports popular detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc.
- **Efficient**
All basic bbox and mask operations run on GPUs now.
The training speed is nearly 2x faster than Detectron and comparable to maskrcnn-benchmark.
- **State of the art**
This was the codebase of the *MMDet* team, who won the [COCO Detection 2018 challenge](http://cocodataset.org/#detection-leaderboard).
Apart from mmdetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research,
which is heavily depended on by this toolbox.
## License
This project is released under the [Apache 2.0 license](LICENSE).
## Updates
v0.6.0 (14/04/2019)
- Up to 30% speedup compared to the model zoo.
- Support both PyTorch stable and nightly version.
- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.
v0.6rc0(06/02/2019)
- Migrate to PyTorch 1.0.
v0.5.7 (06/02/2019)
- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
- This is the last release based on PyTorch 0.4.1.
v0.5.6 (17/01/2019)
- Add support for Group Normalization.
- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
v0.5.5 (22/12/2018)
- Add SSD for COCO and PASCAL VOC.
- Add ResNeXt backbones and detection models.
- Refactoring for Samplers/Assigners and add OHEM.
- Add VOC dataset and evaluation scripts.
v0.5.4 (27/11/2018)
- Add SingleStageDetector and RetinaNet.
v0.5.3 (26/11/2018)
- Add Cascade R-CNN and Cascade Mask R-CNN.
- Add support for Soft-NMS in config files.
v0.5.2 (21/10/2018)
- Add support for custom datasets.
- Add a script to convert PASCAL VOC annotations to the expected format.
v0.5.1 (20/10/2018)
- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.
- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.
## Benchmark and model zoo
Supported methods and backbones are shown in the below table.
Results and models are available in the [Model zoo](MODEL_ZOO.md).
| | ResNet | ResNeXt | SENet | VGG |
|--------------------|:--------:|:--------:|:--------:|:--------:|
| RPN | ✓ | ✓ | ☐ | ✗ |
| Fast R-CNN | ✓ | ✓ | ☐ | ✗ |
| Faster R-CNN | ✓ | ✓ | ☐ | ✗ |
| Mask R-CNN | ✓ | ✓ | ☐ | ✗ |
| Cascade R-CNN | ✓ | ✓ | ☐ | ✗ |
| Cascade Mask R-CNN | ✓ | ✓ | ☐ | ✗ |
| SSD | ✗ | ✗ | ✗ | ✓ |
| RetinaNet | ✓ | ✓ | ☐ | ✗ |
| Hybrid Task Cascade| ✓ | ✓ | ☐ | ✗ |
| FCOS | ✓ | ✓ | ☐ | ✗ |
Other features
- [x] DCNv2
- [x] Group Normalization
- [x] Weight Standardization
- [x] OHEM
- [x] Soft-NMS
- [ ] Mixed Precision (FP16) Training (coming soon)
## Installation
Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
## Get Started
Please see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of mmdetection.
## Citation
If you use our codebase or models in your research, please cite this project.
We will release a paper or technical report later.
```
@misc{mmdetection2018,
author = {Kai Chen and Jiangmiao Pang and Jiaqi Wang and Yu Xiong and Xiaoxiao Li
and Shuyang Sun and Wansen Feng and Ziwei Liu and Jianping Shi and
Wanli Ouyang and Chen Change Loy and Dahua Lin},
title = {mmdetection},
howpublished = {\url{https://github.com/open-mmlab/mmdetection}},
year = {2018}
}
```
================================================
FILE: mmdetection/TECHNICAL_DETAILS.md
================================================
## Overview
In this section, we will introduce the main units of training a detector:
data loading, model and iteration pipeline.
## Data loading
Following typical conventions, we use `Dataset` and `DataLoader` for data loading
with multiple workers. `Dataset` returns a dict of data items corresponding
the arguments of models' forward method.
Since the data in object detection may not be the same size (image size, gt bbox size, etc.),
we introduce a new `DataContainer` type in `mmcv` to help collect and distribute
data of different size.
See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
## Model
In mmdetection, model components are basically categorized as 4 types.
- backbone: usually a FCN network to extract feature maps, e.g., ResNet.
- neck: the part between backbones and heads, e.g., FPN, ASPP.
- head: the part for specific tasks, e.g., bbox prediction and mask prediction.
- roi extractor: the part for extracting features from feature maps, e.g., RoI Align.
We also write implement some general detection pipelines with the above components,
such as `SingleStageDetector` and `TwoStageDetector`.
### Build a model with basic components
Following some basic pipelines (e.g., two-stage detectors), the model structure
can be customized through config files with no pains.
If we want to implement some new components, e.g, the path aggregation
FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.
1. create a new file in `mmdet/models/necks/pafpn.py`.
```python
class PAFPN(nn.Module):
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
add_extra_convs=False):
pass
def forward(self, inputs):
# implementation is ignored
pass
```
2. modify the config file from
```python
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5)
```
to
```python
neck=dict(
type='PAFPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5)
```
We will release more components (backbones, necks, heads) for research purpose.
### Write a new model
To write a new detection pipeline, you need to inherit from `BaseDetector`,
which defines the following abstract methods.
- `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).
- `forward_train()`: forward method of the training mode
- `simple_test()`: single scale testing without augmentation
- `aug_test()`: testing with augmentation (multi-scale, flip, etc.)
[TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)
is a good example which shows how to do that.
## Iteration pipeline
We adopt distributed training for both single machine and multiple machines.
Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.
Each process keeps an isolated model, data loader, and optimizer.
Model parameters are only synchronized once at the begining.
After a forward and backward pass, gradients will be allreduced among all GPUs,
and the optimizer will update model parameters.
Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.
================================================
FILE: mmdetection/compile.sh
================================================
#!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
echo "Building roi align op..."
cd mmdet/ops/roi_align
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building roi pool op..."
cd ../roi_pool
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building nms op..."
cd ../nms
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building dcn..."
cd ../dcn
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building sigmoid focal loss op..."
cd ../sigmoid_focal_loss
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
================================================
FILE: mmdetection/configs/cascade_mask_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py
================================================
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=3,
strides=(1, 2, 2),
dilations=(1, 1, 1),
out_indices=(2, ),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
shared_head=dict(
type='ResLayer',
depth=50,
stage=3,
stride=2,
dilation=1,
style='caffe',
norm_cfg=norm_cfg,
norm_eval=True),
rpn_head=dict(
type='RPNHead',
in_channels=1024,
feat_channels=1024,
anchor_scales=[2, 4, 8, 16, 32],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[16],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=1024,
featmap_strides=[16]),
bbox_head=[
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=None,
mask_head=dict(
type='FCNMaskHead',
num_convs=0,
in_channels=2048,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=12000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=6000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
data = dict(
imgs_per_gpu=1,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_r50_caffe_c4_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_mask_rcnn_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_rcnn_r101_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet101',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_rcnn_r50_caffe_c4_1x.py
================================================
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=3,
strides=(1, 2, 2),
dilations=(1, 1, 1),
out_indices=(2, ),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
shared_head=dict(
type='ResLayer',
depth=50,
stage=3,
stride=2,
dilation=1,
style='caffe',
norm_cfg=norm_cfg,
norm_eval=True),
rpn_head=dict(
type='RPNHead',
in_channels=1024,
feat_channels=1024,
anchor_scales=[2, 4, 8, 16, 32],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[16],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=1024,
featmap_strides=[16]),
bbox_head=[
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='BBoxHead',
with_avg_pool=True,
roi_feat_size=7,
in_channels=2048,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=12000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=14,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=6000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
data = dict(
imgs_per_gpu=1,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_r50_c4_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_rcnn_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_rcnn_x101_32x4d_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/cascade_rcnn_x101_64x4d_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='open-mmlab://resnext101_64x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=64,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/README.md
================================================
# Deformable Convolutional Networks
# Introduction
```
@inproceedings{dai2017deformable,
title={Deformable Convolutional Networks},
author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
booktitle={Proceedings of the IEEE international conference on computer vision},
year={2017}
}
@article{zhu2018deformable,
title={Deformable ConvNets v2: More Deformable, Better Results},
author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
journal={arXiv preprint arXiv:1811.11168},
year={2018}
}
```
## Results and Models
| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:---------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 3.9 | 0.594 | 10.2 | 40.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-e41688c9.pth) |
| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 3.7 | 0.598 | 10.0 | 40.2 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-1b768045.pth) |
| R-50-FPN | Faster | pytorch | - | dpool | 1x | 4.6 | 0.714 | 8.7 | 37.8 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dpool_r50_fpn_1x_20190125-f4fc1d70.pth) |
| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.2 | 0.769 | 8.2 | 38.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdpool_r50_fpn_1x_20190125-473d0f3d.pth) |
| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 5.8 | 0.811 | 8.0 | 42.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-a7e31b65.pth) |
| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.1 | 1.126 | 6.6 | 43.4 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x_20190201-6d46376f.pth) |
| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 0.712 | 7.7 | 41.1 | 37.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-4f94ff79.pth) |
| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 0.712 | 7.7 | 41.3 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-c5601dc3.pth) |
| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 0.939 | 6.5 | 43.2 | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-decb6db5.pth) |
| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.4 | 0.660 | 7.6 | 44.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth) |
| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.3 | 0.881 | 6.8 | 45.0 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-aaa877cc.pth) |
| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.6 | 0.942 | 5.7 | 44.4 | 38.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-09d8a443.pth) |
| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.5 | 1.156 | 5.1 | 45.7 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-0d62c190.pth) |
**Notes:**
- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling.
- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster.
- **Memory, Train/Inf time is outdated.**
================================================
FILE: mmdetection/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True)
])
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),
keep_all_stages=False)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cascade_rcnn_dconv_c3-c5_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
groups=32,
base_width=4,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
groups=32,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='DeformRoIPoolingPack',
out_size=7,
out_channels=256,
no_trans=False,
group_size=1,
trans_std=0.1),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_dpool_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=True,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='ModulatedDeformRoIPoolingPack',
out_size=7,
out_channels=256,
no_trans=False,
group_size=1,
trans_std=0.1),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_mdpool_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: mmdetection/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
================================================
# model settings
model = dict(
type='MaskRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
dcn=dict(
modulated=False,
deformable_groups=1,
fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_l
gitextract_ymxjag4v/
├── .dockerignore
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── configs/
│ └── htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900.py
├── mmdetection/
│ ├── .travis.yml
│ ├── GETTING_STARTED.md
│ ├── INSTALL.md
│ ├── LICENSE
│ ├── MODEL_ZOO.md
│ ├── README.md
│ ├── TECHNICAL_DETAILS.md
│ ├── compile.sh
│ ├── configs/
│ │ ├── cascade_mask_rcnn_r101_fpn_1x.py
│ │ ├── cascade_mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── cascade_mask_rcnn_r50_fpn_1x.py
│ │ ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── cascade_rcnn_r101_fpn_1x.py
│ │ ├── cascade_rcnn_r50_caffe_c4_1x.py
│ │ ├── cascade_rcnn_r50_fpn_1x.py
│ │ ├── cascade_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── cascade_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── dcn/
│ │ │ ├── README.md
│ │ │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
│ │ │ ├── faster_rcnn_dpool_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
│ │ │ ├── faster_rcnn_mdpool_r50_fpn_1x.py
│ │ │ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ │ ├── fast_mask_rcnn_r101_fpn_1x.py
│ │ ├── fast_mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── fast_mask_rcnn_r50_fpn_1x.py
│ │ ├── fast_rcnn_r101_fpn_1x.py
│ │ ├── fast_rcnn_r50_caffe_c4_1x.py
│ │ ├── fast_rcnn_r50_fpn_1x.py
│ │ ├── faster_rcnn_ohem_r50_fpn_1x.py
│ │ ├── faster_rcnn_r101_fpn_1x.py
│ │ ├── faster_rcnn_r50_caffe_c4_1x.py
│ │ ├── faster_rcnn_r50_fpn_1x.py
│ │ ├── faster_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── faster_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── fcos/
│ │ │ ├── README.md
│ │ │ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
│ │ │ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
│ │ │ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py
│ │ ├── gn/
│ │ │ ├── README.md
│ │ │ ├── mask_rcnn_r101_fpn_gn_2x.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_2x.py
│ │ │ └── mask_rcnn_r50_fpn_gn_contrib_2x.py
│ │ ├── gn+ws/
│ │ │ ├── README.md
│ │ │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
│ │ │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py
│ │ │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
│ │ ├── htc/
│ │ │ ├── README.md
│ │ │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
│ │ │ ├── htc_r101_fpn_20e.py
│ │ │ ├── htc_r50_fpn_1x.py
│ │ │ ├── htc_r50_fpn_20e.py
│ │ │ ├── htc_without_semantic_r50_fpn_1x.py
│ │ │ ├── htc_x101_32x4d_fpn_20e_16gpu.py
│ │ │ └── htc_x101_64x4d_fpn_20e_16gpu.py
│ │ ├── mask_rcnn_r101_fpn_1x.py
│ │ ├── mask_rcnn_r50_caffe_c4_1x.py
│ │ ├── mask_rcnn_r50_fpn_1x.py
│ │ ├── mask_rcnn_x101_32x4d_fpn_1x.py
│ │ ├── mask_rcnn_x101_64x4d_fpn_1x.py
│ │ ├── pascal_voc/
│ │ │ ├── faster_rcnn_r50_fpn_1x_voc0712.py
│ │ │ ├── ssd300_voc.py
│ │ │ └── ssd512_voc.py
│ │ ├── retinanet_r101_fpn_1x.py
│ │ ├── retinanet_r50_fpn_1x.py
│ │ ├── retinanet_x101_32x4d_fpn_1x.py
│ │ ├── retinanet_x101_64x4d_fpn_1x.py
│ │ ├── rpn_r101_fpn_1x.py
│ │ ├── rpn_r50_caffe_c4_1x.py
│ │ ├── rpn_r50_fpn_1x.py
│ │ ├── rpn_x101_32x4d_fpn_1x.py
│ │ ├── rpn_x101_64x4d_fpn_1x.py
│ │ ├── ssd300_coco.py
│ │ └── ssd512_coco.py
│ ├── mmdet/
│ │ ├── __init__.py
│ │ ├── apis/
│ │ │ ├── __init__.py
│ │ │ ├── env.py
│ │ │ ├── inference.py
│ │ │ └── train.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── anchor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── anchor_generator.py
│ │ │ │ └── anchor_target.py
│ │ │ ├── bbox/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── assign_sampling.py
│ │ │ │ ├── assigners/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── assign_result.py
│ │ │ │ │ ├── base_assigner.py
│ │ │ │ │ └── max_iou_assigner.py
│ │ │ │ ├── bbox_target.py
│ │ │ │ ├── geometry.py
│ │ │ │ ├── samplers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base_sampler.py
│ │ │ │ │ ├── combined_sampler.py
│ │ │ │ │ ├── instance_balanced_pos_sampler.py
│ │ │ │ │ ├── iou_balanced_neg_sampler.py
│ │ │ │ │ ├── ohem_sampler.py
│ │ │ │ │ ├── pseudo_sampler.py
│ │ │ │ │ ├── random_sampler.py
│ │ │ │ │ └── sampling_result.py
│ │ │ │ └── transforms.py
│ │ │ ├── evaluation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_overlaps.py
│ │ │ │ ├── class_names.py
│ │ │ │ ├── coco_utils.py
│ │ │ │ ├── eval_hooks.py
│ │ │ │ ├── mean_ap.py
│ │ │ │ └── recall.py
│ │ │ ├── loss/
│ │ │ │ ├── __init__.py
│ │ │ │ └── losses.py
│ │ │ ├── mask/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── mask_target.py
│ │ │ │ └── utils.py
│ │ │ ├── post_processing/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_nms.py
│ │ │ │ └── merge_augs.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── dist_utils.py
│ │ │ └── misc.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── coco.py
│ │ │ ├── concat_dataset.py
│ │ │ ├── custom.py
│ │ │ ├── extra_aug.py
│ │ │ ├── loader/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── build_loader.py
│ │ │ │ └── sampler.py
│ │ │ ├── repeat_dataset.py
│ │ │ ├── transforms.py
│ │ │ ├── utils.py
│ │ │ ├── voc.py
│ │ │ └── xml_style.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── anchor_head.py
│ │ │ │ ├── fcos_head.py
│ │ │ │ ├── retina_head.py
│ │ │ │ ├── rpn_head.py
│ │ │ │ └── ssd_head.py
│ │ │ ├── backbones/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── resnet.py
│ │ │ │ ├── resnext.py
│ │ │ │ └── ssd_vgg.py
│ │ │ ├── bbox_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bbox_head.py
│ │ │ │ └── convfc_bbox_head.py
│ │ │ ├── builder.py
│ │ │ ├── detectors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── cascade_rcnn.py
│ │ │ │ ├── ensemble_htc.py
│ │ │ │ ├── fast_rcnn.py
│ │ │ │ ├── faster_rcnn.py
│ │ │ │ ├── fcos.py
│ │ │ │ ├── htc.py
│ │ │ │ ├── mask_rcnn.py
│ │ │ │ ├── retinanet.py
│ │ │ │ ├── rpn.py
│ │ │ │ ├── single_stage.py
│ │ │ │ ├── test_mixins.py
│ │ │ │ └── two_stage.py
│ │ │ ├── mask_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fcn_mask_head.py
│ │ │ │ ├── fused_semantic_head.py
│ │ │ │ └── htc_mask_head.py
│ │ │ ├── necks/
│ │ │ │ ├── __init__.py
│ │ │ │ └── fpn.py
│ │ │ ├── registry.py
│ │ │ ├── roi_extractors/
│ │ │ │ ├── __init__.py
│ │ │ │ └── single_level.py
│ │ │ ├── shared_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ └── res_layer.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── conv_module.py
│ │ │ ├── conv_ws.py
│ │ │ ├── norm.py
│ │ │ ├── scale.py
│ │ │ └── weight_init.py
│ │ └── ops/
│ │ ├── __init__.py
│ │ ├── dcn/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── deform_conv.py
│ │ │ │ └── deform_pool.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── deform_conv.py
│ │ │ │ └── deform_pool.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── deform_conv_cuda.cpp
│ │ │ ├── deform_conv_cuda_kernel.cu
│ │ │ ├── deform_pool_cuda.cpp
│ │ │ └── deform_pool_cuda_kernel.cu
│ │ ├── nms/
│ │ │ ├── __init__.py
│ │ │ ├── nms_wrapper.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── nms_cpu.cpp
│ │ │ ├── nms_cuda.cpp
│ │ │ ├── nms_kernel.cu
│ │ │ └── soft_nms_cpu.pyx
│ │ ├── roi_align/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_align.py
│ │ │ ├── gradcheck.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_align.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── roi_align_cuda.cpp
│ │ │ └── roi_align_kernel.cu
│ │ ├── roi_pool/
│ │ │ ├── __init__.py
│ │ │ ├── functions/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_pool.py
│ │ │ ├── gradcheck.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ └── roi_pool.py
│ │ │ ├── setup.py
│ │ │ └── src/
│ │ │ ├── roi_pool_cuda.cpp
│ │ │ └── roi_pool_kernel.cu
│ │ └── sigmoid_focal_loss/
│ │ ├── __init__.py
│ │ ├── functions/
│ │ │ ├── __init__.py
│ │ │ └── sigmoid_focal_loss.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ └── sigmoid_focal_loss.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── sigmoid_focal_loss.cpp
│ │ └── sigmoid_focal_loss_cuda.cu
│ ├── setup.py
│ └── tools/
│ ├── analyze_logs.py
│ ├── coco_eval.py
│ ├── convert_datasets/
│ │ └── pascal_voc.py
│ ├── dist_test.sh
│ ├── dist_train.sh
│ ├── publish_model.py
│ ├── slurm_test.sh
│ ├── slurm_train.sh
│ ├── test.py
│ ├── test_ensemble.py
│ ├── train.py
│ ├── upgrade_model_version.py
│ └── voc_eval.py
├── scrips/
│ ├── create_mmdetection_test.sh
│ ├── create_mmdetection_train.sh
│ ├── dist_test.sh
│ ├── dist_test_ensemble.sh
│ ├── dist_train.sh
│ ├── prepare_weights.sh
│ └── split.sh
└── src/
├── __init__.py
├── create_mmdetection_test.py
├── create_mmdetection_train.py
├── draw.py
├── eda.py
├── metric.py
├── prune.py
├── rle.py
├── rm_attribute_classes.py
├── split.py
├── submit.py
├── utils.py
└── visualization.py
SYMBOL INDEX (624 symbols across 121 files)
FILE: mmdetection/mmdet/apis/env.py
function init_dist (line 13) | def init_dist(launcher, backend='nccl', **kwargs):
function _init_dist_pytorch (line 26) | def _init_dist_pytorch(backend, **kwargs):
function _init_dist_mpi (line 34) | def _init_dist_mpi(backend, **kwargs):
function _init_dist_slurm (line 38) | def _init_dist_slurm(backend, port=29500, **kwargs):
function set_random_seed (line 53) | def set_random_seed(seed):
function get_root_logger (line 60) | def get_root_logger(log_level=logging.INFO):
FILE: mmdetection/mmdet/apis/inference.py
function init_detector (line 15) | def init_detector(config, checkpoint=None, device='cuda:0'):
function inference_detector (line 48) | def inference_detector(model, imgs):
function _prepare_data (line 71) | def _prepare_data(img, img_transform, cfg, device):
function _inference_single (line 89) | def _inference_single(model, img, img_transform, device):
function _inference_generator (line 97) | def _inference_generator(model, imgs, img_transform, device):
function show_result (line 103) | def show_result(img, result, class_names, score_thr=0.3, out_file=None):
FILE: mmdetection/mmdet/apis/train.py
function parse_losses (line 18) | def parse_losses(losses):
function batch_processor (line 38) | def batch_processor(model, data, train_mode):
function train_detector (line 48) | def train_detector(model,
function build_optimizer (line 64) | def build_optimizer(model, optimizer_cfg):
function _dist_train (line 134) | def _dist_train(model, dataset, cfg, validate=False):
function _non_dist_train (line 174) | def _non_dist_train(model, dataset, cfg, validate=False):
FILE: mmdetection/mmdet/core/anchor/anchor_generator.py
class AnchorGenerator (line 4) | class AnchorGenerator(object):
method __init__ (line 6) | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=No...
method num_base_anchors (line 15) | def num_base_anchors(self):
method gen_base_anchors (line 18) | def gen_base_anchors(self):
method _meshgrid (line 45) | def _meshgrid(self, x, y, row_major=True):
method grid_anchors (line 53) | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
method valid_flags (line 72) | def valid_flags(self, featmap_size, valid_size, device='cuda'):
FILE: mmdetection/mmdet/core/anchor/anchor_target.py
function anchor_target (line 7) | def anchor_target(anchor_list,
function images_to_levels (line 79) | def images_to_levels(target, num_level_anchors):
function anchor_target_single (line 94) | def anchor_target_single(flat_anchors,
function anchor_inside_flags (line 162) | def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
function unmap (line 176) | def unmap(data, count, inds, fill=0):
FILE: mmdetection/mmdet/core/bbox/assign_sampling.py
function build_assigner (line 6) | def build_assigner(cfg, **kwargs):
function build_sampler (line 17) | def build_sampler(cfg, **kwargs):
function assign_and_sample (line 28) | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
FILE: mmdetection/mmdet/core/bbox/assigners/assign_result.py
class AssignResult (line 4) | class AssignResult(object):
method __init__ (line 6) | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
method add_gt_ (line 12) | def add_gt_(self, gt_labels):
FILE: mmdetection/mmdet/core/bbox/assigners/base_assigner.py
class BaseAssigner (line 4) | class BaseAssigner(metaclass=ABCMeta):
method assign (line 7) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...
FILE: mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py
class MaxIoUAssigner (line 8) | class MaxIoUAssigner(BaseAssigner):
method __init__ (line 33) | def __init__(self,
method assign (line 47) | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=N...
method assign_wrt_overlaps (line 93) | def assign_wrt_overlaps(self, overlaps, gt_labels=None):
FILE: mmdetection/mmdet/core/bbox/bbox_target.py
function bbox_target (line 7) | def bbox_target(pos_bboxes_list,
function bbox_target_single (line 35) | def bbox_target_single(pos_bboxes,
function expand_target (line 64) | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
FILE: mmdetection/mmdet/core/bbox/geometry.py
function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
FILE: mmdetection/mmdet/core/bbox/samplers/base_sampler.py
class BaseSampler (line 8) | class BaseSampler(metaclass=ABCMeta):
method __init__ (line 10) | def __init__(self,
method _sample_pos (line 24) | def _sample_pos(self, assign_result, num_expected, **kwargs):
method _sample_neg (line 28) | def _sample_neg(self, assign_result, num_expected, **kwargs):
method sample (line 31) | def sample(self,
FILE: mmdetection/mmdet/core/bbox/samplers/combined_sampler.py
class CombinedSampler (line 5) | class CombinedSampler(BaseSampler):
method __init__ (line 7) | def __init__(self, pos_sampler, neg_sampler, **kwargs):
method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
method _sample_neg (line 15) | def _sample_neg(self, **kwargs):
FILE: mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py
class InstanceBalancedPosSampler (line 7) | class InstanceBalancedPosSampler(RandomSampler):
method _sample_pos (line 9) | def _sample_pos(self, assign_result, num_expected, **kwargs):
FILE: mmdetection/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py
class IoUBalancedNegSampler (line 7) | class IoUBalancedNegSampler(RandomSampler):
method __init__ (line 9) | def __init__(self,
method _sample_neg (line 22) | def _sample_neg(self, assign_result, num_expected, **kwargs):
FILE: mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py
class OHEMSampler (line 7) | class OHEMSampler(BaseSampler):
method __init__ (line 9) | def __init__(self,
method hard_mining (line 26) | def hard_mining(self, inds, num_expected, bboxes, labels, feats):
method _sample_pos (line 43) | def _sample_pos(self,
method _sample_neg (line 59) | def _sample_neg(self,
FILE: mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py
class PseudoSampler (line 7) | class PseudoSampler(BaseSampler):
method __init__ (line 9) | def __init__(self, **kwargs):
method _sample_pos (line 12) | def _sample_pos(self, **kwargs):
method _sample_neg (line 15) | def _sample_neg(self, **kwargs):
method sample (line 18) | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
FILE: mmdetection/mmdet/core/bbox/samplers/random_sampler.py
class RandomSampler (line 7) | class RandomSampler(BaseSampler):
method __init__ (line 9) | def __init__(self,
method random_choice (line 19) | def random_choice(gallery, num):
method _sample_pos (line 35) | def _sample_pos(self, assign_result, num_expected, **kwargs):
method _sample_neg (line 45) | def _sample_neg(self, assign_result, num_expected, **kwargs):
FILE: mmdetection/mmdet/core/bbox/samplers/sampling_result.py
class SamplingResult (line 4) | class SamplingResult(object):
method __init__ (line 6) | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
method bboxes (line 23) | def bboxes(self):
FILE: mmdetection/mmdet/core/bbox/transforms.py
function bbox2delta (line 6) | def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
function delta2bbox (line 34) | def delta2bbox(rois,
function bbox_flip (line 71) | def bbox_flip(bboxes, img_shape):
function bbox_mapping (line 91) | def bbox_mapping(bboxes, img_shape, scale_factor, flip):
function bbox_mapping_back (line 99) | def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
function bbox2roi (line 106) | def bbox2roi(bbox_list):
function roi2bbox (line 128) | def roi2bbox(rois):
function bbox2result (line 138) | def bbox2result(bboxes, labels, num_classes):
function distance2bbox (line 159) | def distance2bbox(points, distance, max_shape=None):
FILE: mmdetection/mmdet/core/evaluation/bbox_overlaps.py
function bbox_overlaps (line 4) | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
FILE: mmdetection/mmdet/core/evaluation/class_names.py
function voc_classes (line 4) | def voc_classes():
function imagenet_det_classes (line 12) | def imagenet_det_classes():
function imagenet_vid_classes (line 53) | def imagenet_vid_classes():
function coco_classes (line 63) | def coco_classes():
function get_classes (line 89) | def get_classes(dataset):
FILE: mmdetection/mmdet/core/evaluation/coco_utils.py
function coco_eval (line 9) | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
function fast_eval_recall (line 41) | def fast_eval_recall(results,
function xyxy2xywh (line 78) | def xyxy2xywh(bbox):
function proposal2json (line 88) | def proposal2json(dataset, results):
function det2json (line 103) | def det2json(dataset, results):
function segm2json (line 120) | def segm2json(dataset, results):
function results2json (line 140) | def results2json(dataset, results, out_file):
FILE: mmdetection/mmdet/core/evaluation/eval_hooks.py
class DistEvalHook (line 18) | class DistEvalHook(Hook):
method __init__ (line 20) | def __init__(self, dataset, interval=1):
method after_train_epoch (line 32) | def after_train_epoch(self, runner):
method evaluate (line 73) | def evaluate(self):
class DistEvalmAPHook (line 77) | class DistEvalmAPHook(DistEvalHook):
method evaluate (line 79) | def evaluate(self, runner, results):
class CocoDistEvalRecallHook (line 118) | class CocoDistEvalRecallHook(DistEvalHook):
method __init__ (line 120) | def __init__(self,
method evaluate (line 128) | def evaluate(self, runner, results):
class CocoDistEvalmAPHook (line 138) | class CocoDistEvalmAPHook(DistEvalHook):
method evaluate (line 140) | def evaluate(self, runner, results):
FILE: mmdetection/mmdet/core/evaluation/mean_ap.py
function average_precision (line 9) | def average_precision(recalls, precisions, mode='area'):
function tpfp_imagenet (line 56) | def tpfp_imagenet(det_bboxes,
function tpfp_default (line 137) | def tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=...
function get_cls_results (line 202) | def get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_...
function eval_map (line 220) | def eval_map(det_results,
function print_map_summary (line 332) | def print_map_summary(mean_ap, results, dataset=None):
FILE: mmdetection/mmdet/core/evaluation/recall.py
function _recalls (line 7) | def _recalls(all_ious, proposal_nums, thrs):
function set_recall_param (line 40) | def set_recall_param(proposal_nums, iou_thrs):
function eval_recalls (line 62) | def eval_recalls(gts,
function print_recall_summary (line 105) | def print_recall_summary(recalls,
function plot_num_recall (line 138) | def plot_num_recall(recalls, proposal_nums):
function plot_iou_recall (line 163) | def plot_iou_recall(recalls, iou_thrs):
FILE: mmdetection/mmdet/core/loss/losses.py
function weighted_nll_loss (line 9) | def weighted_nll_loss(pred, label, weight, avg_factor=None):
function weighted_cross_entropy (line 16) | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=...
function weighted_binary_cross_entropy (line 26) | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
function py_sigmoid_focal_loss (line 36) | def py_sigmoid_focal_loss(pred,
function weighted_sigmoid_focal_loss (line 59) | def weighted_sigmoid_focal_loss(pred,
function mask_cross_entropy (line 73) | def mask_cross_entropy(pred, target, label):
function smooth_l1_loss (line 81) | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'):
function weighted_smoothl1 (line 97) | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
function accuracy (line 104) | def accuracy(pred, target, topk=1):
function _expand_binary_labels (line 123) | def _expand_binary_labels(labels, label_weights, label_channels):
function iou_loss (line 133) | def iou_loss(pred_bboxes, target_bboxes, reduction='mean'):
FILE: mmdetection/mmdet/core/mask/mask_target.py
function mask_target (line 6) | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_...
function mask_target_single (line 15) | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
FILE: mmdetection/mmdet/core/mask/utils.py
function split_combined_polys (line 4) | def split_combined_polys(polys, poly_lens, polys_per_mask):
FILE: mmdetection/mmdet/core/post_processing/bbox_nms.py
function multiclass_nms (line 6) | def multiclass_nms(multi_bboxes,
FILE: mmdetection/mmdet/core/post_processing/merge_augs.py
function merge_aug_proposals (line 9) | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
function merge_aug_bboxes (line 41) | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
function merge_aug_scores (line 68) | def merge_aug_scores(aug_scores):
function merge_aug_masks (line 76) | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
FILE: mmdetection/mmdet/core/utils/dist_utils.py
function _allreduce_coalesced (line 9) | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
function allreduce_grads (line 31) | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
class DistOptimizerHook (line 44) | class DistOptimizerHook(OptimizerHook):
method __init__ (line 46) | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
method after_train_iter (line 51) | def after_train_iter(self, runner):
FILE: mmdetection/mmdet/core/utils/misc.py
function tensor2imgs (line 8) | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
function multi_apply (line 21) | def multi_apply(func, *args, **kwargs):
function unmap (line 27) | def unmap(data, count, inds, fill=0):
FILE: mmdetection/mmdet/datasets/coco.py
class CocoDataset (line 7) | class CocoDataset(CustomDataset):
method load_annotations (line 24) | def load_annotations(self, ann_file):
method get_ann_info (line 39) | def get_ann_info(self, idx):
method _filter_imgs (line 45) | def _filter_imgs(self, min_size=32):
method _parse_ann_info (line 56) | def _parse_ann_info(self, ann_info, with_mask=True):
FILE: mmdetection/mmdet/datasets/concat_dataset.py
class ConcatDataset (line 5) | class ConcatDataset(_ConcatDataset):
method __init__ (line 15) | def __init__(self, datasets):
FILE: mmdetection/mmdet/datasets/custom.py
class CustomDataset (line 15) | class CustomDataset(Dataset):
method __init__ (line 39) | def __init__(self,
method __len__ (line 130) | def __len__(self):
method load_annotations (line 133) | def load_annotations(self, ann_file):
method load_proposals (line 136) | def load_proposals(self, proposal_file):
method get_ann_info (line 139) | def get_ann_info(self, idx):
method _filter_imgs (line 142) | def _filter_imgs(self, min_size=32):
method _set_group_flag (line 150) | def _set_group_flag(self):
method _rand_another (line 162) | def _rand_another(self, idx):
method __getitem__ (line 166) | def __getitem__(self, idx):
method prepare_train_img (line 176) | def prepare_train_img(self, idx):
method prepare_test_img (line 269) | def prepare_test_img(self, idx):
FILE: mmdetection/mmdet/datasets/extra_aug.py
class ExtraAugmentation (line 8) | class ExtraAugmentation(object):
method __init__ (line 10) | def __init__(self, **kwargs):
method transform_from_dict (line 13) | def transform_from_dict(self, **kwargs):
method __call__ (line 21) | def __call__(self, img):
FILE: mmdetection/mmdet/datasets/loader/build_loader.py
function build_dataloader (line 15) | def build_dataloader(dataset,
FILE: mmdetection/mmdet/datasets/loader/sampler.py
class DistributedSampler (line 12) | class DistributedSampler(_DistributedSampler):
method __init__ (line 14) | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
method __iter__ (line 18) | def __iter__(self):
class GroupSampler (line 38) | class GroupSampler(Sampler):
method __init__ (line 40) | def __init__(self, dataset, samples_per_gpu=1):
method __iter__ (line 51) | def __iter__(self):
method __len__ (line 74) | def __len__(self):
class DistributedGroupSampler (line 78) | class DistributedGroupSampler(Sampler):
method __init__ (line 93) | def __init__(self,
method __iter__ (line 119) | def __iter__(self):
method __len__ (line 155) | def __len__(self):
method set_epoch (line 158) | def set_epoch(self, epoch):
FILE: mmdetection/mmdet/datasets/repeat_dataset.py
class RepeatDataset (line 4) | class RepeatDataset(object):
method __init__ (line 6) | def __init__(self, dataset, times):
method __getitem__ (line 15) | def __getitem__(self, idx):
method __len__ (line 18) | def __len__(self):
FILE: mmdetection/mmdet/datasets/transforms.py
class ImageTransform (line 16) | class ImageTransform(object):
method __init__ (line 26) | def __init__(self,
method __call__ (line 36) | def __call__(self, img, scale, flip=False, keep_ratio=True):
function bbox_flip (line 57) | def bbox_flip(bboxes, img_shape):
class BboxTransform (line 72) | class BboxTransform(object):
method __init__ (line 80) | def __init__(self, max_num_gts=None):
method __call__ (line 83) | def __call__(self, bboxes, img_shape, scale_factor, flip=False):
class MaskTransform (line 98) | class MaskTransform(object):
method __call__ (line 106) | def __call__(self, masks, pad_shape, scale_factor, flip=False):
class SegMapTransform (line 120) | class SegMapTransform(object):
method __init__ (line 128) | def __init__(self, size_divisor=None):
method __call__ (line 131) | def __call__(self, img, scale, flip=False, keep_ratio=True):
class Numpy2Tensor (line 143) | class Numpy2Tensor(object):
method __init__ (line 145) | def __init__(self):
method __call__ (line 148) | def __call__(self, *args):
class RandomCropNearBBox (line 155) | class RandomCropNearBBox(A.DualTransform):
method __init__ (line 166) | def __init__(self, max_part_shift=0.3, always_apply=False, p=1.0):
method apply (line 170) | def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
method get_params_dependent_on_targets (line 173) | def get_params_dependent_on_targets(self, params):
method apply_to_bbox (line 190) | def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **pa...
method targets_as_params (line 203) | def targets_as_params(self):
method get_transform_init_args_names (line 206) | def get_transform_init_args_names(self):
FILE: mmdetection/mmdet/datasets/utils.py
function to_tensor (line 15) | def to_tensor(data):
function random_scale (line 36) | def random_scale(img_scales, mode='range'):
function show_ann (line 70) | def show_ann(coco, img, ann_info):
function get_dataset (line 77) | def get_dataset(data_cfg):
function rle_decode (line 119) | def rle_decode(rle, h: int, w: int):
FILE: mmdetection/mmdet/datasets/voc.py
class VOCDataset (line 4) | class VOCDataset(XMLDataset):
method __init__ (line 11) | def __init__(self, **kwargs):
FILE: mmdetection/mmdet/datasets/xml_style.py
class XMLDataset (line 10) | class XMLDataset(CustomDataset):
method __init__ (line 12) | def __init__(self, **kwargs):
method load_annotations (line 16) | def load_annotations(self, ann_file):
method get_ann_info (line 32) | def get_ann_info(self, idx):
FILE: mmdetection/mmdet/models/anchor_heads/anchor_head.py
class AnchorHead (line 16) | class AnchorHead(nn.Module):
method __init__ (line 33) | def __init__(self,
method _init_layers (line 72) | def _init_layers(self):
method init_weights (line 77) | def init_weights(self):
method forward_single (line 81) | def forward_single(self, x):
method forward (line 86) | def forward(self, feats):
method get_anchors (line 89) | def get_anchors(self, featmap_sizes, img_metas):
method loss_single (line 128) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
method loss (line 168) | def loss(self,
method get_bboxes (line 213) | def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,
method get_bboxes_single (line 239) | def get_bboxes_single(self,
FILE: mmdetection/mmdet/models/anchor_heads/fcos_head.py
class FCOSHead (line 15) | class FCOSHead(nn.Module):
method __init__ (line 17) | def __init__(self,
method _init_layers (line 41) | def _init_layers(self):
method init_weights (line 73) | def init_weights(self):
method forward (line 83) | def forward(self, feats):
method forward_single (line 86) | def forward_single(self, x, scale):
method loss (line 101) | def loss(self,
method get_bboxes (line 173) | def get_bboxes(self,
method get_bboxes_single (line 205) | def get_bboxes_single(self,
method get_points (line 254) | def get_points(self, featmap_sizes, dtype, device):
method get_points_single (line 272) | def get_points_single(self, featmap_size, stride, dtype, device):
method fcos_target (line 283) | def fcos_target(self, points, gt_bboxes_list, gt_labels_list):
method fcos_target_single (line 321) | def fcos_target_single(self, gt_bboxes, gt_labels, points, regress_ran...
method centerness_target (line 364) | def centerness_target(self, pos_bbox_targets):
FILE: mmdetection/mmdet/models/anchor_heads/retina_head.py
class RetinaHead (line 11) | class RetinaHead(AnchorHead):
method __init__ (line 13) | def __init__(self,
method _init_layers (line 38) | def _init_layers(self):
method init_weights (line 70) | def init_weights(self):
method forward_single (line 79) | def forward_single(self, x):
FILE: mmdetection/mmdet/models/anchor_heads/rpn_head.py
class RPNHead (line 13) | class RPNHead(AnchorHead):
method __init__ (line 15) | def __init__(self, in_channels, **kwargs):
method _init_layers (line 18) | def _init_layers(self):
method init_weights (line 25) | def init_weights(self):
method forward_single (line 30) | def forward_single(self, x):
method loss (line 37) | def loss(self,
method get_bboxes_single (line 55) | def get_bboxes_single(self,
FILE: mmdetection/mmdet/models/anchor_heads/ssd_head.py
class SSDHead (line 14) | class SSDHead(AnchorHead):
method __init__ (line 16) | def __init__(self,
method init_weights (line 95) | def init_weights(self):
method forward (line 100) | def forward(self, feats):
method loss_single (line 109) | def loss_single(self, cls_score, bbox_pred, labels, label_weights,
method loss (line 133) | def loss(self,
FILE: mmdetection/mmdet/models/backbones/resnet.py
class BasicBlock (line 15) | class BasicBlock(nn.Module):
method __init__ (line 18) | def __init__(self,
method norm1 (line 61) | def norm1(self):
method norm2 (line 65) | def norm2(self):
method forward (line 68) | def forward(self, x):
class Bottleneck (line 87) | class Bottleneck(nn.Module):
method __init__ (line 90) | def __init__(self,
method norm1 (line 191) | def norm1(self):
method norm2 (line 195) | def norm2(self):
method norm3 (line 199) | def norm3(self):
method forward (line 202) | def forward(self, x):
function make_res_layer (line 244) | def make_res_layer(block,
class ResNet (line 299) | class ResNet(nn.Module):
method __init__ (line 331) | def __init__(self,
method norm1 (line 403) | def norm1(self):
method _make_stem_layer (line 406) | def _make_stem_layer(self):
method _freeze_stages (line 420) | def _freeze_stages(self):
method init_weights (line 433) | def init_weights(self, pretrained=None):
method forward (line 459) | def forward(self, x):
method train (line 472) | def train(self, mode=True):
FILE: mmdetection/mmdet/models/backbones/resnext.py
class Bottleneck (line 12) | class Bottleneck(_Bottleneck):
method __init__ (line 14) | def __init__(self, *args, groups=1, base_width=4, **kwargs):
function make_res_layer (line 94) | def make_res_layer(block,
class ResNeXt (line 155) | class ResNeXt(ResNet):
method __init__ (line 187) | def __init__(self, groups=1, base_width=4, **kwargs):
FILE: mmdetection/mmdet/models/backbones/ssd_vgg.py
class SSDVGG (line 13) | class SSDVGG(VGG):
method __init__ (line 19) | def __init__(self,
method init_weights (line 55) | def init_weights(self, pretrained=None):
method forward (line 76) | def forward(self, x):
method _make_extra_layers (line 92) | def _make_extra_layers(self, outplanes):
class L2Norm (line 119) | class L2Norm(nn.Module):
method __init__ (line 121) | def __init__(self, n_dims, scale=20., eps=1e-10):
method forward (line 128) | def forward(self, x):
FILE: mmdetection/mmdet/models/bbox_heads/bbox_head.py
class BBoxHead (line 11) | class BBoxHead(nn.Module):
method __init__ (line 15) | def __init__(self,
method init_weights (line 49) | def init_weights(self):
method forward (line 57) | def forward(self, x):
method get_target (line 65) | def get_target(self, sampling_results, gt_bboxes, gt_labels,
method loss (line 83) | def loss(self,
method get_det_bboxes (line 110) | def get_det_bboxes(self,
method refine_bboxes (line 140) | def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
method regress_by_class (line 180) | def regress_by_class(self, rois, label, bbox_pred, img_meta):
FILE: mmdetection/mmdet/models/bbox_heads/convfc_bbox_head.py
class ConvFCBBoxHead (line 9) | class ConvFCBBoxHead(BBoxHead):
method __init__ (line 18) | def __init__(self,
method _add_conv_fc_branch (line 83) | def _add_conv_fc_branch(self,
method init_weights (line 124) | def init_weights(self):
method forward (line 132) | def forward(self, x):
class SharedFCBBoxHead (line 172) | class SharedFCBBoxHead(ConvFCBBoxHead):
method __init__ (line 174) | def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):
FILE: mmdetection/mmdet/models/builder.py
function _build_module (line 8) | def _build_module(cfg, registry, default_args):
function build (line 27) | def build(cfg, registry, default_args=None):
function build_backbone (line 35) | def build_backbone(cfg):
function build_neck (line 39) | def build_neck(cfg):
function build_roi_extractor (line 43) | def build_roi_extractor(cfg):
function build_shared_head (line 47) | def build_shared_head(cfg):
function build_head (line 51) | def build_head(cfg):
function build_detector (line 55) | def build_detector(cfg, train_cfg=None, test_cfg=None):
FILE: mmdetection/mmdet/models/detectors/base.py
class BaseDetector (line 12) | class BaseDetector(nn.Module):
method __init__ (line 17) | def __init__(self):
method with_neck (line 21) | def with_neck(self):
method with_shared_head (line 25) | def with_shared_head(self):
method with_bbox (line 29) | def with_bbox(self):
method with_mask (line 33) | def with_mask(self):
method extract_feat (line 37) | def extract_feat(self, imgs):
method extract_feats (line 40) | def extract_feats(self, imgs):
method forward_train (line 46) | def forward_train(self, imgs, img_metas, **kwargs):
method simple_test (line 50) | def simple_test(self, img, img_meta, **kwargs):
method aug_test (line 54) | def aug_test(self, imgs, img_metas, **kwargs):
method init_weights (line 57) | def init_weights(self, pretrained=None):
method forward_test (line 62) | def forward_test(self, imgs, img_metas, **kwargs):
method forward (line 82) | def forward(self, img, img_meta, return_loss=True, **kwargs):
method show_result (line 88) | def show_result(self,
FILE: mmdetection/mmdet/models/detectors/cascade_rcnn.py
class CascadeRCNN (line 15) | class CascadeRCNN(BaseDetector, RPNTestMixin):
method __init__ (line 17) | def __init__(self,
method with_rpn (line 89) | def with_rpn(self):
method init_weights (line 92) | def init_weights(self, pretrained=None):
method extract_feat (line 114) | def extract_feat(self, img):
method forward_train (line 120) | def forward_train(self,
method simple_test (line 240) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 364) | def aug_test(self, img, img_meta, proposals=None, rescale=False):
method show_result (line 367) | def show_result(self, data, result, img_norm_cfg, **kwargs):
FILE: mmdetection/mmdet/models/detectors/ensemble_htc.py
class EnsembleHTC (line 7) | class EnsembleHTC(BaseDetector):
method __init__ (line 8) | def __init__(self, models):
method simple_test (line 12) | def simple_test(self, img, img_meta, **kwargs):
method forward_train (line 15) | def forward_train(self, imgs, img_metas, **kwargs):
method extract_feat (line 18) | def extract_feat(self, imgs):
method aug_test (line 21) | def aug_test(self, imgs, img_metas, **kwargs):
FILE: mmdetection/mmdet/models/detectors/fast_rcnn.py
class FastRCNN (line 6) | class FastRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
method forward_test (line 31) | def forward_test(self, imgs, img_metas, proposals, **kwargs):
FILE: mmdetection/mmdet/models/detectors/faster_rcnn.py
class FasterRCNN (line 6) | class FasterRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdetection/mmdet/models/detectors/fcos.py
class FCOS (line 6) | class FCOS(SingleStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdetection/mmdet/models/detectors/htc.py
class HybridTaskCascade (line 14) | class HybridTaskCascade(CascadeRCNN, RPNTestMixin):
method __init__ (line 16) | def __init__(self,
method with_semantic (line 38) | def with_semantic(self):
method _bbox_forward_train (line 44) | def _bbox_forward_train(self,
method _mask_forward_train (line 74) | def _mask_forward_train(self,
method _bbox_forward_test (line 115) | def _bbox_forward_test(self, stage, x, rois, semantic_feat=None):
method _mask_forward_test (line 130) | def _mask_forward_test(self, stage, x, bboxes, semantic_feat=None):
method forward_train (line 158) | def forward_train(self,
method simple_test (line 270) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 395) | def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
FILE: mmdetection/mmdet/models/detectors/mask_rcnn.py
class MaskRCNN (line 6) | class MaskRCNN(TwoStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdetection/mmdet/models/detectors/retinanet.py
class RetinaNet (line 6) | class RetinaNet(SingleStageDetector):
method __init__ (line 8) | def __init__(self,
FILE: mmdetection/mmdet/models/detectors/rpn.py
class RPN (line 11) | class RPN(BaseDetector, RPNTestMixin):
method __init__ (line 13) | def __init__(self,
method init_weights (line 28) | def init_weights(self, pretrained=None):
method extract_feat (line 35) | def extract_feat(self, img):
method forward_train (line 41) | def forward_train(self,
method simple_test (line 57) | def simple_test(self, img, img_meta, rescale=False):
method aug_test (line 66) | def aug_test(self, imgs, img_metas, rescale=False):
method show_result (line 79) | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=...
FILE: mmdetection/mmdet/models/detectors/single_stage.py
class SingleStageDetector (line 10) | class SingleStageDetector(BaseDetector):
method __init__ (line 12) | def __init__(self,
method init_weights (line 28) | def init_weights(self, pretrained=None):
method extract_feat (line 39) | def extract_feat(self, img):
method forward_train (line 45) | def forward_train(self,
method simple_test (line 58) | def simple_test(self, img, img_meta, rescale=False):
method aug_test (line 69) | def aug_test(self, imgs, img_metas, rescale=False):
FILE: mmdetection/mmdet/models/detectors/test_mixins.py
class RPNTestMixin (line 5) | class RPNTestMixin(object):
method simple_test_rpn (line 7) | def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
method aug_test_rpn (line 13) | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
class BBoxTestMixin (line 28) | class BBoxTestMixin(object):
method simple_test_bboxes (line 30) | def simple_test_bboxes(self,
method aug_test_bboxes (line 55) | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_c...
class MaskTestMixin (line 92) | class MaskTestMixin(object):
method simple_test_mask (line 94) | def simple_test_mask(self,
method aug_test_mask (line 121) | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
FILE: mmdetection/mmdet/models/detectors/two_stage.py
class TwoStageDetector (line 12) | class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
method __init__ (line 15) | def __init__(self,
method with_rpn (line 60) | def with_rpn(self):
method init_weights (line 63) | def init_weights(self, pretrained=None):
method extract_feat (line 84) | def extract_feat(self, img):
method forward_train (line 90) | def forward_train(self,
method simple_test (line 192) | def simple_test(self, img, img_meta, proposals=None, rescale=False):
method aug_test (line 213) | def aug_test(self, imgs, img_metas, rescale=False):
FILE: mmdetection/mmdet/models/mask_heads/fcn_mask_head.py
class FCNMaskHead (line 13) | class FCNMaskHead(nn.Module):
method __init__ (line 15) | def __init__(self,
method init_weights (line 79) | def init_weights(self):
method forward (line 87) | def forward(self, x):
method get_target (line 97) | def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
method loss (line 106) | def loss(self, mask_pred, mask_targets, labels):
method get_seg_masks (line 116) | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
FILE: mmdetection/mmdet/models/mask_heads/fused_semantic_head.py
class FusedSemanticHead (line 10) | class FusedSemanticHead(nn.Module):
method __init__ (line 24) | def __init__(self,
method init_weights (line 79) | def init_weights(self):
method forward (line 82) | def forward(self, feats):
method loss (line 101) | def loss(self, mask_pred, labels):
FILE: mmdetection/mmdet/models/mask_heads/htc_mask_head.py
class HTCMaskHead (line 7) | class HTCMaskHead(FCNMaskHead):
method __init__ (line 9) | def __init__(self, *args, **kwargs):
method init_weights (line 18) | def init_weights(self):
method forward (line 22) | def forward(self, x, res_feat=None, return_logits=True, return_feat=Tr...
FILE: mmdetection/mmdet/models/necks/fpn.py
class FPN (line 10) | class FPN(nn.Module):
method __init__ (line 12) | def __init__(self,
method init_weights (line 92) | def init_weights(self):
method forward (line 97) | def forward(self, inputs):
FILE: mmdetection/mmdet/models/registry.py
class Registry (line 4) | class Registry(object):
method __init__ (line 6) | def __init__(self, name):
method name (line 11) | def name(self):
method module_dict (line 15) | def module_dict(self):
method _register_module (line 18) | def _register_module(self, module_class):
method register_module (line 33) | def register_module(self, cls):
FILE: mmdetection/mmdet/models/roi_extractors/single_level.py
class SingleRoIExtractor (line 11) | class SingleRoIExtractor(nn.Module):
method __init__ (line 24) | def __init__(self,
method num_inputs (line 36) | def num_inputs(self):
method init_weights (line 40) | def init_weights(self):
method build_roi_layers (line 43) | def build_roi_layers(self, layer_cfg, featmap_strides):
method map_roi_levels (line 52) | def map_roi_levels(self, rois, num_levels):
method forward (line 73) | def forward(self, feats, rois):
FILE: mmdetection/mmdet/models/shared_heads/res_layer.py
class ResLayer (line 12) | class ResLayer(nn.Module):
method __init__ (line 14) | def __init__(self,
method init_weights (line 46) | def init_weights(self, pretrained=None):
method forward (line 59) | def forward(self, x):
method train (line 64) | def train(self, mode=True):
FILE: mmdetection/mmdet/models/utils/conv_module.py
function build_conv_layer (line 16) | def build_conv_layer(cfg, *args, **kwargs):
class ConvModule (line 44) | class ConvModule(nn.Module):
method __init__ (line 67) | def __init__(self,
method norm (line 139) | def norm(self):
method init_weights (line 142) | def init_weights(self):
method forward (line 148) | def forward(self, x, activate=True, norm=True):
FILE: mmdetection/mmdet/models/utils/conv_ws.py
function conv_ws_2d (line 5) | def conv_ws_2d(input,
class ConvWS2d (line 21) | class ConvWS2d(nn.Conv2d):
method __init__ (line 23) | def __init__(self,
method forward (line 44) | def forward(self, x):
FILE: mmdetection/mmdet/models/utils/norm.py
function build_norm_layer (line 13) | def build_norm_layer(cfg, num_features, postfix=''):
FILE: mmdetection/mmdet/models/utils/scale.py
class Scale (line 5) | class Scale(nn.Module):
method __init__ (line 7) | def __init__(self, scale=1.0):
method forward (line 11) | def forward(self, x):
FILE: mmdetection/mmdet/models/utils/weight_init.py
function xavier_init (line 5) | def xavier_init(module, gain=1, bias=0, distribution='normal'):
function normal_init (line 15) | def normal_init(module, mean=0, std=1, bias=0):
function uniform_init (line 21) | def uniform_init(module, a=0, b=1, bias=0):
function kaiming_init (line 27) | def kaiming_init(module,
function bias_init_with_prob (line 43) | def bias_init_with_prob(prior_prob):
FILE: mmdetection/mmdet/ops/dcn/functions/deform_conv.py
class DeformConvFunction (line 8) | class DeformConvFunction(Function):
method forward (line 11) | def forward(ctx,
method backward (line 55) | def backward(ctx, grad_output):
method _output_size (line 92) | def _output_size(input, weight, padding, dilation, stride):
class ModulatedDeformConvFunction (line 108) | class ModulatedDeformConvFunction(Function):
method forward (line 111) | def forward(ctx,
method backward (line 146) | def backward(ctx, grad_output):
method _infer_shape (line 168) | def _infer_shape(ctx, input, weight):
FILE: mmdetection/mmdet/ops/dcn/functions/deform_pool.py
class DeformRoIPoolingFunction (line 7) | class DeformRoIPoolingFunction(Function):
method forward (line 10) | def forward(ctx,
method backward (line 50) | def backward(ctx, grad_output):
FILE: mmdetection/mmdet/ops/dcn/modules/deform_conv.py
class DeformConv (line 10) | class DeformConv(nn.Module):
method __init__ (line 12) | def __init__(self,
method reset_parameters (line 47) | def reset_parameters(self):
method forward (line 54) | def forward(self, x, offset):
class DeformConvPack (line 59) | class DeformConvPack(DeformConv):
method __init__ (line 61) | def __init__(self, *args, **kwargs):
method init_offset (line 74) | def init_offset(self):
method forward (line 78) | def forward(self, x):
class ModulatedDeformConv (line 84) | class ModulatedDeformConv(nn.Module):
method __init__ (line 86) | def __init__(self,
method reset_parameters (line 116) | def reset_parameters(self):
method forward (line 125) | def forward(self, x, offset, mask):
class ModulatedDeformConvPack (line 131) | class ModulatedDeformConvPack(ModulatedDeformConv):
method __init__ (line 133) | def __init__(self, *args, **kwargs):
method init_offset (line 146) | def init_offset(self):
method forward (line 150) | def forward(self, x):
FILE: mmdetection/mmdet/ops/dcn/modules/deform_pool.py
class DeformRoIPooling (line 6) | class DeformRoIPooling(nn.Module):
method __init__ (line 8) | def __init__(self,
method forward (line 27) | def forward(self, data, rois, offset):
class DeformRoIPoolingPack (line 36) | class DeformRoIPoolingPack(DeformRoIPooling):
method __init__ (line 38) | def __init__(self,
method forward (line 72) | def forward(self, data, rois):
class ModulatedDeformRoIPoolingPack (line 95) | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
method __init__ (line 97) | def __init__(self,
method forward (line 150) | def forward(self, data, rois):
FILE: mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.cpp
function shape_check (line 61) | void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOu...
function deform_conv_forward_cuda (line 151) | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
function deform_conv_backward_input_cuda (line 260) | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
function deform_conv_backward_parameters_cuda (line 373) | int deform_conv_backward_parameters_cuda(
function modulated_deform_conv_cuda_forward (line 486) | void modulated_deform_conv_cuda_forward(
function modulated_deform_conv_cuda_backward (line 566) | void modulated_deform_conv_cuda_backward(
function PYBIND11_MODULE (line 681) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/dcn/src/deform_pool_cuda.cpp
function deform_psroi_pooling_cuda_forward (line 30) | void deform_psroi_pooling_cuda_forward(
function deform_psroi_pooling_cuda_backward (line 54) | void deform_psroi_pooling_cuda_backward(
function PYBIND11_MODULE (line 81) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/nms/nms_wrapper.py
function nms (line 8) | def nms(dets, iou_thr, device_id=None):
function soft_nms (line 52) | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
FILE: mmdetection/mmdet/ops/nms/setup.py
function customize_compiler_for_nvcc (line 23) | def customize_compiler_for_nvcc(self):
class custom_build_ext (line 60) | class custom_build_ext(build_ext):
method build_extensions (line 62) | def build_extensions(self):
FILE: mmdetection/mmdet/ops/nms/src/nms_cpu.cpp
function nms_cpu_kernel (line 5) | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
function nms (line 61) | at::Tensor nms(const at::Tensor& dets, const float threshold) {
function PYBIND11_MODULE (line 69) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/nms/src/nms_cuda.cpp
function nms (line 8) | at::Tensor nms(const at::Tensor& dets, const float threshold) {
function PYBIND11_MODULE (line 15) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/roi_align/functions/roi_align.py
class RoIAlignFunction (line 6) | class RoIAlignFunction(Function):
method forward (line 9) | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
method backward (line 39) | def backward(ctx, grad_output):
FILE: mmdetection/mmdet/ops/roi_align/modules/roi_align.py
class RoIAlign (line 5) | class RoIAlign(Module):
method __init__ (line 7) | def __init__(self, out_size, spatial_scale, sample_num=0):
method forward (line 14) | def forward(self, features, rois):
FILE: mmdetection/mmdet/ops/roi_align/src/roi_align_cuda.cpp
function roi_align_forward_cuda (line 27) | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
function roi_align_backward_cuda (line 55) | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
function PYBIND11_MODULE (line 82) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/roi_pool/functions/roi_pool.py
class RoIPoolFunction (line 7) | class RoIPoolFunction(Function):
method forward (line 10) | def forward(ctx, features, rois, out_size, spatial_scale):
method backward (line 38) | def backward(ctx, grad_output):
FILE: mmdetection/mmdet/ops/roi_pool/modules/roi_pool.py
class RoIPool (line 5) | class RoIPool(Module):
method __init__ (line 7) | def __init__(self, out_size, spatial_scale):
method forward (line 13) | def forward(self, features, rois):
FILE: mmdetection/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
function roi_pooling_forward_cuda (line 26) | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
function roi_pooling_backward_cuda (line 54) | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
function PYBIND11_MODULE (line 83) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py
class SigmoidFocalLossFunction (line 8) | class SigmoidFocalLossFunction(Function):
method forward (line 11) | def forward(ctx, input, target, gamma=2.0, alpha=0.25, reduction='mean'):
method backward (line 31) | def backward(ctx, d_loss):
FILE: mmdetection/mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py
class SigmoidFocalLoss (line 6) | class SigmoidFocalLoss(nn.Module):
method __init__ (line 8) | def __init__(self, gamma, alpha):
method forward (line 13) | def forward(self, logits, targets):
method __repr__ (line 18) | def __repr__(self):
FILE: mmdetection/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp
function SigmoidFocalLoss_forward (line 17) | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
function SigmoidFocalLoss_backward (line 27) | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
function PYBIND11_MODULE (line 38) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: mmdetection/setup.py
function readme (line 7) | def readme():
function get_git_hash (line 22) | def get_git_hash():
function get_hash (line 48) | def get_hash():
function write_version_py (line 63) | def write_version_py():
function get_version (line 77) | def get_version():
FILE: mmdetection/tools/analyze_logs.py
function cal_train_time (line 10) | def cal_train_time(log_dicts, args):
function plot_curve (line 34) | def plot_curve(log_dicts, args):
function add_plot_parser (line 94) | def add_plot_parser(subparsers):
function add_time_parser (line 122) | def add_time_parser(subparsers):
function parse_args (line 138) | def parse_args():
function load_json_logs (line 148) | def load_json_logs(json_logs):
function main (line 165) | def main():
FILE: mmdetection/tools/coco_eval.py
function main (line 6) | def main():
FILE: mmdetection/tools/convert_datasets/pascal_voc.py
function parse_xml (line 13) | def parse_xml(args):
function cvt_annotations (line 67) | def cvt_annotations(devkit_path, years, split, out_file):
function parse_args (line 94) | def parse_args():
function main (line 103) | def main():
FILE: mmdetection/tools/publish_model.py
function parse_args (line 6) | def parse_args():
function process_checkpoint (line 15) | def process_checkpoint(in_file, out_file):
function main (line 28) | def main():
FILE: mmdetection/tools/test.py
function single_gpu_test (line 18) | def single_gpu_test(model, data_loader, show=False):
function multi_gpu_test (line 37) | def multi_gpu_test(model, data_loader, tmpdir=None):
function collect_results (line 60) | def collect_results(result_part, size, tmpdir=None):
function parse_args (line 103) | def parse_args():
function main (line 128) | def main():
FILE: mmdetection/tools/test_ensemble.py
function single_gpu_test (line 19) | def single_gpu_test(model, data_loader, show=False):
function multi_gpu_test (line 38) | def multi_gpu_test(model, data_loader, tmpdir=None):
function collect_results (line 61) | def collect_results(result_part, size, tmpdir=None):
function parse_args (line 104) | def parse_args():
function main (line 128) | def main():
FILE: mmdetection/tools/train.py
function parse_args (line 14) | def parse_args():
function main (line 42) | def main():
FILE: mmdetection/tools/upgrade_model_version.py
function convert (line 8) | def convert(in_file, out_file):
function main (line 33) | def main():
FILE: mmdetection/tools/voc_eval.py
function voc_eval (line 10) | def voc_eval(result_file, dataset, iou_thr=0.5):
function main (line 46) | def main():
FILE: src/create_mmdetection_test.py
function parse_args (line 12) | def parse_args():
function convert (line 21) | def convert(group: dict, root) -> dict:
function main (line 37) | def main():
FILE: src/create_mmdetection_train.py
function parse_args (line 11) | def parse_args():
function main (line 21) | def main():
FILE: src/draw.py
function parse_args (line 17) | def parse_args():
function get_spaced_colors (line 30) | def get_spaced_colors(n, start_color=(75, 0, 130)):
function put_text (line 42) | def put_text(img, color, text, i, x_shift=10, y_shift=10):
function draw_masks (line 50) | def draw_masks(img, masks, colors, classes):
function get_gt_masks (line 63) | def get_gt_masks(annotation):
function draw (line 70) | def draw(args, root, output, metric_threshold, colors, classes):
function main (line 88) | def main():
FILE: src/eda.py
function parse_args (line 14) | def parse_args():
function draw_masks (line 21) | def draw_masks(img, masks):
function main (line 30) | def main():
FILE: src/metric.py
function precision_at (line 5) | def precision_at(threshold, iou):
function calc_iou (line 14) | def calc_iou(y_true, y_prediction):
function calc_score_per_class (line 42) | def calc_score_per_class(y_true, y_prediction):
FILE: src/prune.py
function parse_args (line 6) | def parse_args():
function main (line 13) | def main():
FILE: src/rle.py
function kaggle_rle_encode (line 7) | def kaggle_rle_encode(mask):
function kaggle_rle_decode (line 15) | def kaggle_rle_decode(rle, h, w):
function coco_rle_encode (line 25) | def coco_rle_encode(mask):
function coco_rle_decode (line 35) | def coco_rle_decode(rle, h, w):
function kaggle2coco (line 39) | def kaggle2coco(kaggle_rle, h, w):
function main (line 60) | def main():
FILE: src/rm_attribute_classes.py
function parse_args (line 7) | def parse_args():
function main (line 14) | def main():
FILE: src/split.py
function parse_args (line 9) | def parse_args():
function main (line 20) | def main():
FILE: src/submit.py
function parse_args (line 14) | def parse_args():
function decode_and_resize (line 24) | def decode_and_resize(
function create_mask (line 41) | def create_mask(args):
function main (line 90) | def main():
FILE: src/utils.py
function group2mmdetection (line 7) | def group2mmdetection(group: dict) -> dict:
function create_labeled_mask (line 32) | def create_labeled_mask(mask):
function check_overlaps (line 36) | def check_overlaps(mask):
function hard_overlaps_suppression (line 41) | def hard_overlaps_suppression(binary_mask, scores):
FILE: src/visualization.py
function draw_bounding_box_on_image (line 9) | def draw_bounding_box_on_image(
function draw_bounding_boxes_on_image_array (line 53) | def draw_bounding_boxes_on_image_array(
function draw_bounding_boxes_on_image (line 61) | def draw_bounding_boxes_on_image(
Condensed preview — 270 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,079K chars).
[
{
"path": ".dockerignore",
"chars": 1246,
"preview": "# custom:\n\n.git/*\ndata/*\nipynb/*\n.idea/*\n\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# "
},
{
"path": ".gitignore",
"chars": 1230,
"preview": "*.ipynb\n\n.idea/\n.DS_Store\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.s"
},
{
"path": "Dockerfile",
"chars": 1054,
"preview": "FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel\n\nRUN apt-get update && apt-get install -y \\\n git \\\n wget \\\n "
},
{
"path": "LICENSE",
"chars": 1067,
"preview": "MIT License\n\nCopyright (c) 2019 Miras Amir\n\nPermission is hereby granted, free of charge, to any person obtaining a copy"
},
{
"path": "Makefile",
"chars": 1207,
"preview": "APP_NAME=amirassov/kaggle-imaterialist\nCONTAINER_NAME=kaggle-imaterialist\n\n# HELP\n.PHONY: help\n\nhelp: ## This help.\n\t@aw"
},
{
"path": "README.md",
"chars": 4553,
"preview": "# The First Place Solution of [iMaterialist (Fashion) 2019](https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/)\n\n"
},
{
"path": "configs/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900.py",
"chars": 7789,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained=None,\n interleaved=True"
},
{
"path": "mmdetection/.travis.yml",
"chars": 111,
"preview": "dist: trusty\nlanguage: python\n\ninstall:\n - pip install flake8\n\npython:\n - \"3.5\"\n - \"3.6\"\n\nscript:\n - flake8"
},
{
"path": "mmdetection/GETTING_STARTED.md",
"chars": 8118,
"preview": "# Getting Started\n\nThis page provides basic tutorials about the usage of mmdetection.\nFor installation instructions, ple"
},
{
"path": "mmdetection/INSTALL.md",
"chars": 2504,
"preview": "## Installation\n\n### Requirements\n\n- Linux\n- Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/))\n- PyT"
},
{
"path": "mmdetection/LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "mmdetection/MODEL_ZOO.md",
"chars": 36585,
"preview": "# Benchmark and Model Zoo\n\n## Environment\n\n### Hardware\n\n- 8 NVIDIA Tesla V100 GPUs\n- Intel Xeon 4114 CPU @ 2.20GHz\n\n###"
},
{
"path": "mmdetection/README.md",
"chars": 4512,
"preview": "\n# mmdetection\n\n## Introduction\n\nThe master branch works with **PyTorch 1.1** or higher. If you would like to use PyTorc"
},
{
"path": "mmdetection/TECHNICAL_DETAILS.md",
"chars": 3594,
"preview": "## Overview\n\nIn this section, we will introduce the main units of training a detector:\ndata loading, model and iteration"
},
{
"path": "mmdetection/compile.sh",
"chars": 689,
"preview": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\necho \"Building roi align op...\"\ncd mmdet/ops/roi_align\nif [ -d \"build\" "
},
{
"path": "mmdetection/configs/cascade_mask_rcnn_r101_fpn_1x.py",
"chars": 6789,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet101',\n back"
},
{
"path": "mmdetection/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py",
"chars": 6742,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,"
},
{
"path": "mmdetection/configs/cascade_mask_rcnn_r50_fpn_1x.py",
"chars": 6786,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n backb"
},
{
"path": "mmdetection/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py",
"chars": 6839,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='open-mmlab://resnext101_32x4d',"
},
{
"path": "mmdetection/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py",
"chars": 6839,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='open-mmlab://resnext101_64x4d',"
},
{
"path": "mmdetection/configs/cascade_rcnn_r101_fpn_1x.py",
"chars": 6315,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet101',\n back"
},
{
"path": "mmdetection/configs/cascade_rcnn_r50_caffe_c4_1x.py",
"chars": 6506,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,"
},
{
"path": "mmdetection/configs/cascade_rcnn_r50_fpn_1x.py",
"chars": 6312,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n backb"
},
{
"path": "mmdetection/configs/cascade_rcnn_x101_32x4d_fpn_1x.py",
"chars": 6365,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='open-mmlab://resnext101_32x4d',"
},
{
"path": "mmdetection/configs/cascade_rcnn_x101_64x4d_fpn_1x.py",
"chars": 6365,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='open-mmlab://resnext101_64x4d',"
},
{
"path": "mmdetection/configs/dcn/README.md",
"chars": 5023,
"preview": "# Deformable Convolutional Networks\n\n# Introduction\n\n```\n@inproceedings{dai2017deformable,\n title={Deformable Convoluti"
},
{
"path": "mmdetection/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py",
"chars": 6967,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n backb"
},
{
"path": "mmdetection/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py",
"chars": 6493,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n backb"
},
{
"path": "mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py",
"chars": 4818,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py",
"chars": 4901,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='open-mmlab://resnext101_32x4d',\n backbone=dict("
},
{
"path": "mmdetection/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py",
"chars": 4777,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py",
"chars": 4818,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py",
"chars": 4787,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py",
"chars": 5110,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n typ"
},
{
"path": "mmdetection/configs/fast_mask_rcnn_r101_fpn_1x.py",
"chars": 4047,
"preview": "# model settings\nmodel = dict(\n type='FastRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n ty"
},
{
"path": "mmdetection/configs/fast_mask_rcnn_r50_caffe_c4_1x.py",
"chars": 4062,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='FastRCNN',\n pretrained='open"
},
{
"path": "mmdetection/configs/fast_mask_rcnn_r50_fpn_1x.py",
"chars": 4044,
"preview": "# model settings\nmodel = dict(\n type='FastRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n typ"
},
{
"path": "mmdetection/configs/fast_rcnn_r101_fpn_1x.py",
"chars": 3628,
"preview": "# model settings\nmodel = dict(\n type='FastRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n ty"
},
{
"path": "mmdetection/configs/fast_rcnn_r50_caffe_c4_1x.py",
"chars": 3812,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='FastRCNN',\n pretrained='open"
},
{
"path": "mmdetection/configs/fast_rcnn_r50_fpn_1x.py",
"chars": 3625,
"preview": "# model settings\nmodel = dict(\n type='FastRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n typ"
},
{
"path": "mmdetection/configs/faster_rcnn_ohem_r50_fpn_1x.py",
"chars": 4635,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/faster_rcnn_r101_fpn_1x.py",
"chars": 4640,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/faster_rcnn_r50_caffe_c4_1x.py",
"chars": 4712,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='FasterRCNN',\n pretrained='op"
},
{
"path": "mmdetection/configs/faster_rcnn_r50_fpn_1x.py",
"chars": 4637,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/faster_rcnn_x101_32x4d_fpn_1x.py",
"chars": 4690,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='open-mmlab://resnext101_32x4d',\n backbone=dict("
},
{
"path": "mmdetection/configs/faster_rcnn_x101_64x4d_fpn_1x.py",
"chars": 4690,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='open-mmlab://resnext101_64x4d',\n backbone=dict("
},
{
"path": "mmdetection/configs/fcos/README.md",
"chars": 1747,
"preview": "# FCOS: Fully Convolutional One-Stage Object Detection\n\n## Introduction\n\n```\n@article{tian2019fcos,\n title={FCOS: Fully"
},
{
"path": "mmdetection/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py",
"chars": 3349,
"preview": "# model settings\nmodel = dict(\n type='FCOS',\n pretrained='open-mmlab://resnet101_caffe',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py",
"chars": 3336,
"preview": "# model settings\nmodel = dict(\n type='FCOS',\n pretrained='open-mmlab://resnext101_64x4d',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py",
"chars": 3281,
"preview": "# model settings\nmodel = dict(\n type='FCOS',\n pretrained='open-mmlab://resnet50_caffe',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/gn/README.md",
"chars": 2210,
"preview": "# Group Normalization\n\n## Introduction\n\n```\n@inproceedings{wu2018group,\n title={Group Normalization},\n author={Wu, Yux"
},
{
"path": "mmdetection/configs/gn/mask_rcnn_r101_fpn_gn_2x.py",
"chars": 5185,
"preview": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n type='MaskRCNN',\n p"
},
{
"path": "mmdetection/configs/gn/mask_rcnn_r50_fpn_gn_2x.py",
"chars": 5182,
"preview": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n type='MaskRCNN',\n p"
},
{
"path": "mmdetection/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py",
"chars": 5190,
"preview": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n type='MaskRCNN',\n p"
},
{
"path": "mmdetection/configs/gn+ws/README.md",
"chars": 4277,
"preview": "# Weight Standardization\n\n## Introduction\n\n```\n@article{weightstandardization,\n author = {Siyuan Qiao and Huiyu Wang"
},
{
"path": "mmdetection/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py",
"chars": 4872,
"preview": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = di"
},
{
"path": "mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py",
"chars": 5328,
"preview": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = di"
},
{
"path": "mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py",
"chars": 5321,
"preview": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = di"
},
{
"path": "mmdetection/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py",
"chars": 5379,
"preview": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = di"
},
{
"path": "mmdetection/configs/htc/README.md",
"chars": 3310,
"preview": "# Hybrid Task Cascade for Instance Segmentation\n\n## Introduction\n\nWe provide config files to reproduce the results in th"
},
{
"path": "mmdetection/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py",
"chars": 7729,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='open-mmlab://resnext101_6"
},
{
"path": "mmdetection/configs/htc/htc_r101_fpn_20e.py",
"chars": 7402,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='modelzoo://resnet101',\n "
},
{
"path": "mmdetection/configs/htc/htc_r50_fpn_1x.py",
"chars": 7397,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n "
},
{
"path": "mmdetection/configs/htc/htc_r50_fpn_20e.py",
"chars": 7399,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n "
},
{
"path": "mmdetection/configs/htc/htc_without_semantic_r50_fpn_1x.py",
"chars": 6843,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='modelzoo://resnet50',\n "
},
{
"path": "mmdetection/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py",
"chars": 7459,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='open-mmlab://resnext101_3"
},
{
"path": "mmdetection/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py",
"chars": 7459,
"preview": "# model settings\nmodel = dict(\n type='HybridTaskCascade',\n num_stages=3,\n pretrained='open-mmlab://resnext101_6"
},
{
"path": "mmdetection/configs/mask_rcnn_r101_fpn_1x.py",
"chars": 4932,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n ty"
},
{
"path": "mmdetection/configs/mask_rcnn_r50_caffe_c4_1x.py",
"chars": 4953,
"preview": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n type='MaskRCNN',\n pretrained='open"
},
{
"path": "mmdetection/configs/mask_rcnn_r50_fpn_1x.py",
"chars": 4929,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n typ"
},
{
"path": "mmdetection/configs/mask_rcnn_x101_32x4d_fpn_1x.py",
"chars": 4982,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='open-mmlab://resnext101_32x4d',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/mask_rcnn_x101_64x4d_fpn_1x.py",
"chars": 4982,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='open-mmlab://resnext101_64x4d',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py",
"chars": 4891,
"preview": "# model settings\nmodel = dict(\n type='FasterRCNN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/pascal_voc/ssd300_voc.py",
"chars": 4023,
"preview": "# model settings\ninput_size = 300\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "mmdetection/configs/pascal_voc/ssd512_voc.py",
"chars": 4042,
"preview": "# model settings\ninput_size = 512\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "mmdetection/configs/retinanet_r101_fpn_1x.py",
"chars": 3265,
"preview": "# model settings\nmodel = dict(\n type='RetinaNet',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n t"
},
{
"path": "mmdetection/configs/retinanet_r50_fpn_1x.py",
"chars": 3262,
"preview": "# model settings\nmodel = dict(\n type='RetinaNet',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n ty"
},
{
"path": "mmdetection/configs/retinanet_x101_32x4d_fpn_1x.py",
"chars": 3315,
"preview": "# model settings\nmodel = dict(\n type='RetinaNet',\n pretrained='open-mmlab://resnext101_32x4d',\n backbone=dict(\n"
},
{
"path": "mmdetection/configs/retinanet_x101_64x4d_fpn_1x.py",
"chars": 3315,
"preview": "# model settings\nmodel = dict(\n type='RetinaNet',\n pretrained='open-mmlab://resnext101_64x4d',\n backbone=dict(\n"
},
{
"path": "mmdetection/configs/rpn_r101_fpn_1x.py",
"chars": 3347,
"preview": "# model settings\nmodel = dict(\n type='RPN',\n pretrained='modelzoo://resnet101',\n backbone=dict(\n type='R"
},
{
"path": "mmdetection/configs/rpn_r50_caffe_c4_1x.py",
"chars": 3373,
"preview": "# model settings\nmodel = dict(\n type='RPN',\n pretrained='open-mmlab://resnet50_caffe',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/rpn_r50_fpn_1x.py",
"chars": 3344,
"preview": "# model settings\nmodel = dict(\n type='RPN',\n pretrained='modelzoo://resnet50',\n backbone=dict(\n type='Re"
},
{
"path": "mmdetection/configs/rpn_x101_32x4d_fpn_1x.py",
"chars": 3398,
"preview": "# model settings\nmodel = dict(\n type='RPN',\n pretrained='open-mmlab://resnext101_32x4d',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/rpn_x101_64x4d_fpn_1x.py",
"chars": 3398,
"preview": "# model settings\nmodel = dict(\n type='RPN',\n pretrained='open-mmlab://resnext101_64x4d',\n backbone=dict(\n "
},
{
"path": "mmdetection/configs/ssd300_coco.py",
"chars": 3904,
"preview": "# model settings\ninput_size = 300\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "mmdetection/configs/ssd512_coco.py",
"chars": 3921,
"preview": "# model settings\ninput_size = 512\nmodel = dict(\n type='SingleStageDetector',\n pretrained='open-mmlab://vgg16_caffe"
},
{
"path": "mmdetection/mmdet/__init__.py",
"chars": 92,
"preview": "from .version import __version__, short_version\n\n__all__ = ['__version__', 'short_version']\n"
},
{
"path": "mmdetection/mmdet/apis/__init__.py",
"chars": 310,
"preview": "from .env import init_dist, get_root_logger, set_random_seed\nfrom .train import train_detector\nfrom .inference import in"
},
{
"path": "mmdetection/mmdet/apis/env.py",
"chars": 2041,
"preview": "import logging\nimport os\nimport random\nimport subprocess\n\nimport numpy as np\nimport torch\nimport torch.distributed as di"
},
{
"path": "mmdetection/mmdet/apis/inference.py",
"chars": 4995,
"preview": "import warnings\n\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as maskUtils\nimport torch\nfrom mmcv.runner impor"
},
{
"path": "mmdetection/mmdet/apis/train.py",
"chars": 7459,
"preview": "from __future__ import division\n\nimport re\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv.runner import Run"
},
{
"path": "mmdetection/mmdet/core/__init__.py",
"chars": 300,
"preview": "from .anchor import * # noqa: F401, F403\nfrom .bbox import * # noqa: F401, F403\nfrom .mask import * # noqa: F401, F40"
},
{
"path": "mmdetection/mmdet/core/anchor/__init__.py",
"chars": 135,
"preview": "from .anchor_generator import AnchorGenerator\nfrom .anchor_target import anchor_target\n\n__all__ = ['AnchorGenerator', 'a"
},
{
"path": "mmdetection/mmdet/core/anchor/anchor_generator.py",
"chars": 3117,
"preview": "import torch\n\n\nclass AnchorGenerator(object):\n\n def __init__(self, base_size, scales, ratios, scale_major=True, ctr=N"
},
{
"path": "mmdetection/mmdet/core/anchor/anchor_target.py",
"chars": 7198,
"preview": "import torch\n\nfrom ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta\nfrom ..utils import multi_"
},
{
"path": "mmdetection/mmdet/core/bbox/__init__.py",
"chars": 1059,
"preview": "from .geometry import bbox_overlaps\nfrom .assigners import BaseAssigner, MaxIoUAssigner, AssignResult\nfrom .samplers imp"
},
{
"path": "mmdetection/mmdet/core/bbox/assign_sampling.py",
"chars": 1185,
"preview": "import mmcv\n\nfrom . import assigners, samplers\n\n\ndef build_assigner(cfg, **kwargs):\n if isinstance(cfg, assigners.Bas"
},
{
"path": "mmdetection/mmdet/core/bbox/assigners/__init__.py",
"chars": 187,
"preview": "from .base_assigner import BaseAssigner\nfrom .max_iou_assigner import MaxIoUAssigner\nfrom .assign_result import AssignRe"
},
{
"path": "mmdetection/mmdet/core/bbox/assigners/assign_result.py",
"chars": 664,
"preview": "import torch\n\n\nclass AssignResult(object):\n\n def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):\n "
},
{
"path": "mmdetection/mmdet/core/bbox/assigners/base_assigner.py",
"chars": 195,
"preview": "from abc import ABCMeta, abstractmethod\n\n\nclass BaseAssigner(metaclass=ABCMeta):\n\n @abstractmethod\n def assign(sel"
},
{
"path": "mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py",
"chars": 6462,
"preview": "import torch\n\nfrom .base_assigner import BaseAssigner\nfrom .assign_result import AssignResult\nfrom ..geometry import bbo"
},
{
"path": "mmdetection/mmdet/core/bbox/bbox_target.py",
"chars": 2799,
"preview": "import torch\n\nfrom .transforms import bbox2delta\nfrom ..utils import multi_apply\n\n\ndef bbox_target(pos_bboxes_list,\n "
},
{
"path": "mmdetection/mmdet/core/bbox/geometry.py",
"chars": 2163,
"preview": "import torch\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):\n \"\"\"Calculate overlap between two s"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/__init__.py",
"chars": 562,
"preview": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSample"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/base_sampler.py",
"chars": 2753,
"preview": "from abc import ABCMeta, abstractmethod\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass BaseSampler(m"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/combined_sampler.py",
"chars": 509,
"preview": "from .base_sampler import BaseSampler\nfrom ..assign_sampling import build_sampler\n\n\nclass CombinedSampler(BaseSampler):\n"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py",
"chars": 1765,
"preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass InstanceBalancedPosSampler(RandomSamp"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py",
"chars": 2757,
"preview": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass IoUBalancedNegSampler(RandomSampler):"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py",
"chars": 2756,
"preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom ..transforms import bbox2roi\n\n\nclass OHEMSampler(BaseSampler):\n"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py",
"chars": 829,
"preview": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\nclass PseudoSampler(Ba"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/random_sampler.py",
"chars": 1858,
"preview": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\n\n\nclass RandomSampler(BaseSampler):\n\n def __in"
},
{
"path": "mmdetection/mmdet/core/bbox/samplers/sampling_result.py",
"chars": 790,
"preview": "import torch\n\n\nclass SamplingResult(object):\n\n def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_resul"
},
{
"path": "mmdetection/mmdet/core/bbox/transforms.py",
"chars": 5849,
"preview": "import mmcv\nimport numpy as np\nimport torch\n\n\ndef bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):\n "
},
{
"path": "mmdetection/mmdet/core/evaluation/__init__.py",
"chars": 967,
"preview": "from .class_names import (voc_classes, imagenet_det_classes,\n imagenet_vid_classes, coco_classe"
},
{
"path": "mmdetection/mmdet/core/evaluation/bbox_overlaps.py",
"chars": 1642,
"preview": "import numpy as np\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou'):\n \"\"\"Calculate the ious between each bbox of bbo"
},
{
"path": "mmdetection/mmdet/core/evaluation/class_names.py",
"chars": 5155,
"preview": "import mmcv\n\n\ndef voc_classes():\n return [\n 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'c"
},
{
"path": "mmdetection/mmdet/core/evaluation/coco_utils.py",
"chars": 4807,
"preview": "import mmcv\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\nfrom .recall"
},
{
"path": "mmdetection/mmdet/core/evaluation/eval_hooks.py",
"chars": 6181,
"preview": "import os\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv."
},
{
"path": "mmdetection/mmdet/core/evaluation/mean_ap.py",
"chars": 15761,
"preview": "import mmcv\nimport numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\nfrom .cla"
},
{
"path": "mmdetection/mmdet/core/evaluation/recall.py",
"chars": 5961,
"preview": "import numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\n\n\ndef _recalls(all_io"
},
{
"path": "mmdetection/mmdet/core/loss/__init__.py",
"chars": 532,
"preview": "from .losses import (\n weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,\n sigmoid_focal_lo"
},
{
"path": "mmdetection/mmdet/core/loss/losses.py",
"chars": 4988,
"preview": "# TODO merge naive and weighted loss.\nimport torch\nimport torch.nn.functional as F\n\nfrom ..bbox import bbox_overlaps\nfro"
},
{
"path": "mmdetection/mmdet/core/mask/__init__.py",
"chars": 128,
"preview": "from .utils import split_combined_polys\nfrom .mask_target import mask_target\n\n__all__ = ['split_combined_polys', 'mask_t"
},
{
"path": "mmdetection/mmdet/core/mask/mask_target.py",
"chars": 1427,
"preview": "import torch\nimport numpy as np\nimport mmcv\n\n\ndef mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_li"
},
{
"path": "mmdetection/mmdet/core/mask/utils.py",
"chars": 1172,
"preview": "import mmcv\n\n\ndef split_combined_polys(polys, poly_lens, polys_per_mask):\n \"\"\"Split the combined 1-D polys into masks"
},
{
"path": "mmdetection/mmdet/core/post_processing/__init__.py",
"chars": 283,
"preview": "from .bbox_nms import multiclass_nms\nfrom .merge_augs import (merge_aug_proposals, merge_aug_bboxes,\n "
},
{
"path": "mmdetection/mmdet/core/post_processing/bbox_nms.py",
"chars": 2350,
"preview": "import torch\n\nfrom mmdet.ops.nms import nms_wrapper\n\n\ndef multiclass_nms(multi_bboxes,\n multi_scores,\n"
},
{
"path": "mmdetection/mmdet/core/post_processing/merge_augs.py",
"chars": 3317,
"preview": "import torch\n\nimport numpy as np\n\nfrom mmdet.ops import nms\nfrom ..bbox import bbox_mapping_back\n\n\ndef merge_aug_proposa"
},
{
"path": "mmdetection/mmdet/core/utils/__init__.py",
"chars": 210,
"preview": "from .dist_utils import allreduce_grads, DistOptimizerHook\nfrom .misc import tensor2imgs, unmap, multi_apply\n\n__all__ = "
},
{
"path": "mmdetection/mmdet/core/utils/dist_utils.py",
"chars": 1941,
"preview": "from collections import OrderedDict\n\nimport torch.distributed as dist\nfrom torch._utils import (_flatten_dense_tensors, "
},
{
"path": "mmdetection/mmdet/core/utils/misc.py",
"chars": 1108,
"preview": "from functools import partial\n\nimport mmcv\nimport numpy as np\nfrom six.moves import map, zip\n\n\ndef tensor2imgs(tensor, m"
},
{
"path": "mmdetection/mmdet/datasets/__init__.py",
"chars": 657,
"preview": "from .custom import CustomDataset\nfrom .xml_style import XMLDataset\nfrom .coco import CocoDataset\nfrom .voc import VOCDa"
},
{
"path": "mmdetection/mmdet/datasets/coco.py",
"chars": 4871,
"preview": "import numpy as np\nfrom pycocotools.coco import COCO\n\nfrom .custom import CustomDataset\n\n\nclass CocoDataset(CustomDatase"
},
{
"path": "mmdetection/mmdet/datasets/concat_dataset.py",
"chars": 698,
"preview": "import numpy as np\nfrom torch.utils.data.dataset import ConcatDataset as _ConcatDataset\n\n\nclass ConcatDataset(_ConcatDat"
},
{
"path": "mmdetection/mmdet/datasets/custom.py",
"chars": 12339,
"preview": "import os.path as osp\n\nimport mmcv\nimport numpy as np\nfrom mmcv.parallel import DataContainer as DC\nfrom torch.utils.dat"
},
{
"path": "mmdetection/mmdet/datasets/extra_aug.py",
"chars": 698,
"preview": "import numpy as np\n\nimport albumentations as A\nfrom mmcv.runner import obj_from_dict\nfrom . import transforms\n\n\nclass Ex"
},
{
"path": "mmdetection/mmdet/datasets/loader/__init__.py",
"chars": 183,
"preview": "from .build_loader import build_dataloader\nfrom .sampler import GroupSampler, DistributedGroupSampler\n\n__all__ = [\n '"
},
{
"path": "mmdetection/mmdet/datasets/loader/build_loader.py",
"chars": 1763,
"preview": "from functools import partial\n\nfrom mmcv.runner import get_dist_info\nfrom mmcv.parallel import collate\nfrom torch.utils."
},
{
"path": "mmdetection/mmdet/datasets/loader/sampler.py",
"chars": 5649,
"preview": "from __future__ import division\n\nimport math\nimport torch\nimport numpy as np\n\nfrom torch.distributed import get_world_si"
},
{
"path": "mmdetection/mmdet/datasets/repeat_dataset.py",
"chars": 479,
"preview": "import numpy as np\n\n\nclass RepeatDataset(object):\n\n def __init__(self, dataset, times):\n self.dataset = datase"
},
{
"path": "mmdetection/mmdet/datasets/transforms.py",
"chars": 6452,
"preview": "import random\n\nimport albumentations as A\nimport albumentations.augmentations.functional as F\nimport mmcv\nimport numpy a"
},
{
"path": "mmdetection/mmdet/datasets/utils.py",
"chars": 3986,
"preview": "import copy\nfrom collections import Sequence\n\nimport mmcv\nfrom mmcv.runner import obj_from_dict\nimport torch\n\nimport mat"
},
{
"path": "mmdetection/mmdet/datasets/voc.py",
"chars": 638,
"preview": "from .xml_style import XMLDataset\n\n\nclass VOCDataset(XMLDataset):\n\n CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat'"
},
{
"path": "mmdetection/mmdet/datasets/xml_style.py",
"chars": 2687,
"preview": "import os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom .custom import CustomData"
},
{
"path": "mmdetection/mmdet/models/__init__.py",
"chars": 825,
"preview": "from .backbones import * # noqa: F401,F403\nfrom .necks import * # noqa: F401,F403\nfrom .roi_extractors import * # noq"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/__init__.py",
"chars": 238,
"preview": "from .anchor_head import AnchorHead\nfrom .fcos_head import FCOSHead\nfrom .retina_head import RetinaHead\nfrom .rpn_head i"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/anchor_head.py",
"chars": 11497,
"preview": "from __future__ import division\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/fcos_head.py",
"chars": 14973,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core imp"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/retina_head.py",
"chars": 2969,
"preview": "import numpy as np\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n\nfrom .anchor_head import AnchorHead\nfrom ..re"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/rpn_head.py",
"chars": 4048,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core imp"
},
{
"path": "mmdetection/mmdet/models/anchor_heads/ssd_head.py",
"chars": 7573,
"preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\n"
},
{
"path": "mmdetection/mmdet/models/backbones/__init__.py",
"chars": 161,
"preview": "from .resnet import ResNet, make_res_layer\nfrom .resnext import ResNeXt\nfrom .ssd_vgg import SSDVGG\n\n__all__ = ['ResNet'"
},
{
"path": "mmdetection/mmdet/models/backbones/resnet.py",
"chars": 15417,
"preview": "import logging\n\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom torch.nn.modules.batchnorm import _BatchN"
},
{
"path": "mmdetection/mmdet/models/backbones/resnext.py",
"chars": 7624,
"preview": "import math\n\nimport torch.nn as nn\n\nfrom mmdet.ops import DeformConv, ModulatedDeformConv\nfrom .resnet import Bottleneck"
},
{
"path": "mmdetection/mmdet/models/backbones/ssd_vgg.py",
"chars": 4510,
"preview": "import logging\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (VGG, xavier_ini"
},
{
"path": "mmdetection/mmdet/models/bbox_heads/__init__.py",
"chars": 157,
"preview": "from .bbox_head import BBoxHead\nfrom .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead\n\n__all__ = ['BBoxHead', '"
},
{
"path": "mmdetection/mmdet/models/bbox_heads/bbox_head.py",
"chars": 7749,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.core import (delta2bbox, multiclass_nms, "
},
{
"path": "mmdetection/mmdet/models/bbox_heads/convfc_bbox_head.py",
"chars": 7039,
"preview": "import torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HE"
},
{
"path": "mmdetection/mmdet/models/builder.py",
"chars": 1605,
"preview": "import mmcv\nfrom torch import nn\n\nfrom .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,\n "
},
{
"path": "mmdetection/mmdet/models/detectors/__init__.py",
"chars": 554,
"preview": "from .base import BaseDetector\nfrom .single_stage import SingleStageDetector\nfrom .two_stage import TwoStageDetector\nfro"
},
{
"path": "mmdetection/mmdet/models/detectors/base.py",
"chars": 4526,
"preview": "import logging\nfrom abc import ABCMeta, abstractmethod\n\nimport mmcv\nimport numpy as np\nimport torch.nn as nn\nimport pyco"
},
{
"path": "mmdetection/mmdet/models/detectors/cascade_rcnn.py",
"chars": 15842,
"preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins im"
},
{
"path": "mmdetection/mmdet/models/detectors/ensemble_htc.py",
"chars": 5661,
"preview": "from torch import nn\nfrom mmdet.core import (bbox2result, bbox_mapping)\nfrom mmdet.core import (bbox2roi, merge_aug_mask"
},
{
"path": "mmdetection/mmdet/models/detectors/fast_rcnn.py",
"chars": 1768,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FastRCNN(Two"
},
{
"path": "mmdetection/mmdet/models/detectors/faster_rcnn.py",
"chars": 781,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FasterRCNN(T"
},
{
"path": "mmdetection/mmdet/models/detectors/fcos.py",
"chars": 473,
"preview": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FCOS(S"
},
{
"path": "mmdetection/mmdet/models/detectors/htc.py",
"chars": 21714,
"preview": "import torch\nimport torch.nn.functional as F\n\nfrom mmdet.core import (bbox2result, build_assigner, build_sampler,\n "
},
{
"path": "mmdetection/mmdet/models/detectors/mask_rcnn.py",
"chars": 926,
"preview": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass MaskRCNN(Two"
},
{
"path": "mmdetection/mmdet/models/detectors/retinanet.py",
"chars": 488,
"preview": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass Retina"
},
{
"path": "mmdetection/mmdet/models/detectors/rpn.py",
"chars": 3381,
"preview": "import mmcv\n\nfrom mmdet.core import tensor2imgs, bbox_mapping\nfrom .base import BaseDetector\nfrom .test_mixins import RP"
},
{
"path": "mmdetection/mmdet/models/detectors/single_stage.py",
"chars": 2348,
"preview": "import torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet"
},
{
"path": "mmdetection/mmdet/models/detectors/test_mixins.py",
"chars": 6547,
"preview": "from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,\n merge_aug_bboxes, merge_aug"
},
{
"path": "mmdetection/mmdet/models/detectors/two_stage.py",
"chars": 9289,
"preview": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin,"
},
{
"path": "mmdetection/mmdet/models/mask_heads/__init__.py",
"chars": 192,
"preview": "from .fcn_mask_head import FCNMaskHead\nfrom .htc_mask_head import HTCMaskHead\nfrom .fused_semantic_head import FusedSema"
},
{
"path": "mmdetection/mmdet/models/mask_heads/fcn_mask_head.py",
"chars": 6611,
"preview": "import mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nimport torch\nimport torch.nn as nn\n\nfrom ..registry "
},
{
"path": "mmdetection/mmdet/models/mask_heads/fused_semantic_head.py",
"chars": 3475,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import kaiming_init\n\nfrom ..registry import HEADS\nfr"
},
{
"path": "mmdetection/mmdet/models/mask_heads/htc_mask_head.py",
"chars": 1178,
"preview": "from .fcn_mask_head import FCNMaskHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HEADS.register_mod"
},
{
"path": "mmdetection/mmdet/models/necks/__init__.py",
"chars": 40,
"preview": "from .fpn import FPN\n\n__all__ = ['FPN']\n"
},
{
"path": "mmdetection/mmdet/models/necks/fpn.py",
"chars": 5068,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\nfrom ..registry import NECKS\nfro"
},
{
"path": "mmdetection/mmdet/models/registry.py",
"chars": 1168,
"preview": "import torch.nn as nn\n\n\nclass Registry(object):\n\n def __init__(self, name):\n self._name = name\n self._m"
},
{
"path": "mmdetection/mmdet/models/roi_extractors/__init__.py",
"chars": 79,
"preview": "from .single_level import SingleRoIExtractor\n\n__all__ = ['SingleRoIExtractor']\n"
},
{
"path": "mmdetection/mmdet/models/roi_extractors/single_level.py",
"chars": 3075,
"preview": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom mmdet import ops\nfrom ..registry import ROI_EX"
},
{
"path": "mmdetection/mmdet/models/shared_heads/__init__.py",
"chars": 56,
"preview": "from .res_layer import ResLayer\n\n__all__ = ['ResLayer']\n"
},
{
"path": "mmdetection/mmdet/models/shared_heads/res_layer.py",
"chars": 2152,
"preview": "import logging\n\nimport torch.nn as nn\nfrom mmcv.cnn import constant_init, kaiming_init\nfrom mmcv.runner import load_chec"
},
{
"path": "mmdetection/mmdet/models/utils/__init__.py",
"chars": 483,
"preview": "from .conv_ws import conv_ws_2d, ConvWS2d\nfrom .conv_module import build_conv_layer, ConvModule\nfrom .norm import build_"
},
{
"path": "mmdetection/mmdet/models/utils/conv_module.py",
"chars": 5730,
"preview": "import warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import kaiming_init, constant_init\n\nfrom .conv_ws import ConvWS2d\nf"
},
{
"path": "mmdetection/mmdet/models/utils/conv_ws.py",
"chars": 1335,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\n\n\ndef conv_ws_2d(input,\n weight,\n bias"
},
{
"path": "mmdetection/mmdet/models/utils/norm.py",
"chars": 1685,
"preview": "import torch.nn as nn\n\n\nnorm_cfg = {\n # format: layer_type: (abbreviation, module)\n 'BN': ('bn', nn.BatchNorm2d),\n"
},
{
"path": "mmdetection/mmdet/models/utils/scale.py",
"chars": 266,
"preview": "import torch\nimport torch.nn as nn\n\n\nclass Scale(nn.Module):\n\n def __init__(self, scale=1.0):\n super(Scale, se"
},
{
"path": "mmdetection/mmdet/models/utils/weight_init.py",
"chars": 1455,
"preview": "import numpy as np\nimport torch.nn as nn\n\n\ndef xavier_init(module, gain=1, bias=0, distribution='normal'):\n assert di"
},
{
"path": "mmdetection/mmdet/ops/__init__.py",
"chars": 820,
"preview": "from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv,\n ModulatedDeformConvPack, DeformRoIP"
},
{
"path": "mmdetection/mmdet/ops/dcn/__init__.py",
"chars": 656,
"preview": "from .functions.deform_conv import deform_conv, modulated_deform_conv\nfrom .functions.deform_pool import deform_roi_pool"
},
{
"path": "mmdetection/mmdet/ops/dcn/functions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdetection/mmdet/ops/dcn/functions/deform_conv.py",
"chars": 7291,
"preview": "import torch\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\n\nfrom .. import deform_conv_cu"
},
{
"path": "mmdetection/mmdet/ops/dcn/functions/deform_pool.py",
"chars": 2370,
"preview": "import torch\nfrom torch.autograd import Function\n\nfrom .. import deform_pool_cuda\n\n\nclass DeformRoIPoolingFunction(Funct"
},
{
"path": "mmdetection/mmdet/ops/dcn/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "mmdetection/mmdet/ops/dcn/modules/deform_conv.py",
"chars": 5198,
"preview": "import math\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn.modules.utils import _pair\n\nfrom ..functions.deform_conv i"
},
{
"path": "mmdetection/mmdet/ops/dcn/modules/deform_pool.py",
"chars": 7058,
"preview": "from torch import nn\n\nfrom ..functions.deform_pool import deform_roi_pooling\n\n\nclass DeformRoIPooling(nn.Module):\n\n d"
},
{
"path": "mmdetection/mmdet/ops/dcn/setup.py",
"chars": 469,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='defor"
},
{
"path": "mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.cpp",
"chars": 29235,
"preview": "// modify from\n// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/def"
}
]
// ... and 70 more files (download for full content)
About this extraction
This page contains the full source code of the amirassov/kaggle-imaterialist GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 270 files (1004.0 KB), approximately 280.7k tokens, and a symbol index with 624 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.