[
  {
    "path": ".dockerignore",
    "content": "# custom:\n\n.git/*\ndata/*\nipynb/*\n.idea/*\n\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n"
  },
  {
    "path": ".gitignore",
    "content": "*.ipynb\n\n.idea/\n.DS_Store\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n"
  },
  {
    "path": "Dockerfile",
    "content": "FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel\n\nRUN apt-get update && apt-get install -y \\\n    git \\\n    wget \\\n    curl \\\n    cmake \\\n    unzip \\\n    build-essential \\\n    libsm6 \\\n    libxext6 \\\n    libfontconfig1 \\\n    libxrender1 \\\n    libswscale-dev \\\n    libtbb2 \\\n    libtbb-dev \\\n    libjpeg-dev \\\n    libpng-dev \\\n    libtiff-dev \\\n    libjasper-dev \\\n    libavformat-dev \\\n    libpq-dev \\\n    libturbojpeg \\\n    software-properties-common \\\n    && apt-get clean \\\n    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*\n\nRUN pip install --no-cache-dir \\\n    numpy \\\n    pandas \\\n    PyYAML \\\n    cycler \\\n    dill \\\n    h5py \\\n    imgaug \\\n    matplotlib \\\n    opencv-contrib-python \\\n    Pillow \\\n    scikit-image \\\n    scikit-learn \\\n    scipy \\\n    setuptools \\\n    six \\\n    tqdm \\\n    ipython \\\n    ipdb \\\n    albumentations \\\n    click \\\n    jpeg4py \\\n    addict \\\n    colorama \\\n    torchvision \\\n    iterative-stratification\n\nRUN pip install --upgrade --no-cache-dir cython && pip install --no-cache-dir pycocotools==2.0.0 mmcv==0.2.5\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2019 Miras Amir\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "Makefile",
    "content": "APP_NAME=amirassov/kaggle-imaterialist\nCONTAINER_NAME=kaggle-imaterialist\n\n# HELP\n.PHONY: help\n\nhelp: ## This help.\n\t@awk 'BEGIN (FS = \":.*?## \") /^[a-zA-Z_-]+:.*?## / (printf \"\\033[36m%-30s\\033[0m %s\\n\", $$1, $$2)' $(MAKEFILE_LIST)\n\nbuild:  ## Build the container\n\tnvidia-docker build -t $(APP_NAME) .\n\nrun-dgx: ## Run container in omen\n\tnvidia-docker run \\\n\t\t-itd \\\n\t\t--ipc=host \\\n\t\t--name=$(CONTAINER_NAME) \\\n\t\t-e DISPLAY=localhost:10.0 \\\n\t\t-v /tmp/.X11-unix:/tmp/.X11-unix \\\n\t\t-v /raid/data_share/amirassov/kaggle-imaterialist_data:/data \\\n\t\t-v /raid/data_share/amirassov/kaggle-imaterialist_dumps:/dumps \\\n\t\t-v $(shell pwd):/kaggle-imaterialist $(APP_NAME) bash\n\nrun-omen: ## Run container in omen\n\tnvidia-docker run \\\n\t\t-itd \\\n\t\t--ipc=host \\\n\t\t--name=$(CONTAINER_NAME) \\\n\t\t-e DISPLAY=localhost:10.0 \\\n\t\t-v /tmp/.X11-unix:/tmp/.X11-unix \\\n\t\t-v /home/videoanalytics/data/kaggle-imaterialist_data:/data \\\n\t\t-v /home/videoanalytics/data/dumps:/dumps \\\n\t\t-v $(shell pwd):/kaggle-imaterialist $(APP_NAME) bash\n\nexec: ## Run a bash in a running container\n\tnvidia-docker exec -it $(CONTAINER_NAME) bash\n\nstop: ## Stop and remove a running container\n\tdocker stop $(CONTAINER_NAME); docker rm $(CONTAINER_NAME)\n"
  },
  {
    "path": "README.md",
    "content": "# The First Place Solution of [iMaterialist (Fashion) 2019](https://www.kaggle.com/c/imaterialist-fashion-2019-FGVC6/)\n\n![ensemble](figures/prediction.png)\n\n## Solution\nMy solution is based on the COCO challenge 2018 winners article: https://arxiv.org/abs/1901.07518. \n\n### Model: \n[Hybrid Task Cascade with ResNeXt-101-64x4d-FPN backbone](https://github.com/open-mmlab/mmdetection/blob/master/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py). This model has a metric Mask mAP = 43.9 on COCO dataset. This is SOTA for instance segmentation.\n\n### Validation:\nFor validation, I used 450 training samples splitted using https://github.com/trent-b/iterative-stratification.\n\n### Preprocessing:\nI applied light augmentatios from the [albumentations](https://github.com/albu/albumentations) library to the original image. Then I use multi-scale training: in each iteration, the scale of short edge is randomly sampled\nfrom [600, 1200], and the scale of long edge is fixed as 1900.\n\n![preprocessing](figures/preproc.png)\n\n### Training details:\n* pre-train from COCO\n* optimizer: `SGD(lr=0.03, momentum=0.9, weight_decay=0.0001)`\n* batch_size: 16 = 2 images per gpu x 8 gpus Tesla V100\n* learning rate scheduler:\n```\nif iterations < 500:\n   lr = warmup(warmup_ratio=1 / 3)\nif epochs == 10:\n   lr = lr ∗ 0.1\nif epochs == 18:\n   lr = lr ∗ 0.1\nif epochs > 20:\n   stop\n```\n* training time: ~3 days.\n\n### Parameter tuning:\nAfter the 12th epoch with the default parameters, the metric on LB was **0.21913**. Next, I tuned postprocessing thresholds using validation data:\n```\nrcnn=dict(\n    score_thr=0.5,\n    nms=dict(type='nms', iou_thr=0.3),\n    max_per_img=100,\n    mask_thr_binary=0.45\n)\n```\n\nThis improved the metric on LB: **0.21913 -&gt; 0.30011.**\n\n### Test time augmentation:\nI use 3 scales as well as horizontal flip at test time and ensemble the results. Testing scales are (1000, 1600), (1200, 1900), (1400, 2200). \n\nI drew a TTA scheme for Mask R-CNN, which is implemented in mmdetection library. For Hybrid Task Cascade R-CNN, I rewrote this code. \n\nThis improved the metric on LB: **0.30011 -&gt; 0.31074.**\n\n![TTA](figures/tta.png)\n\n### Ensemble:\nI ensemble the 3 best checkpoints of my model. The ensemble scheme is similar to TTA. \n\nThis improved the metric on LB: **0.31074 -&gt; 0.31626.**\n\n![ensemble](figures/ensemble.png)\n\n### Attributes:\nI didn't use attributes at all: they were difficult to predict and the removal of classes with attributes greatly improved the metric. \n\nDuring the whole competition, I deleted classes with attributes: `{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}` U `{27, 28, 33}`. But two days before the end I read [the discussion] (https://www.kaggle.com/c/kaggle-imaterialist-fashion-2019-FGVC6/discussion/94811#latest548137) and added back classes `{27, 28, 33 }`. \n\nThis improved the metric on LB: **0.31626 -&gt; 0.33511.**\n\n### Postprocessing for masks\nMy post-processing algorithm for avoid intersections of masks of the same class:\n```python\ndef hard_overlaps_suppression(binary_mask, scores):\n    not_overlap_mask = []\n    for i in np.argsort(scores)[::-1]:\n        current_mask = binary_mask[..., i].copy()\n        for mask in not_overlap_mask:\n            current_mask = np.bitwise_and(current_mask, np.invert(mask))\n        not_overlap_mask.append(current_mask)\n    return np.stack(not_overlap_mask, -1)\n```\n\n### Small postprocessing:\nI deleted objects with an area of less than 20 pixels. \n\nThis improved the metric on LB: **0.33511 -&gt; 0.33621.**\n\n## How to run?\n\n### Docker\n```bash\nmake build\nmake run-[server-name]\nmake exec\n```\n\n### Build mmdetection:\n```bash\ncd mmdetection\nbash compile.sh\npython setup.py develop\n```\n\n### Prepare pretrained weights:\n```bash\nbash prepare_weights.sh\n```\n\n### Data structure\n```\n/data/\n├── train/\n│   └── ...\n├── test/\n│   └── ...\n└── train.csv.zip\n/dumps/\n└── htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900/\n\n```\nFix the [error](https://www.kaggle.com/c/kaggle-imaterialist-fashion-2019-FGVC6/discussion/91217#latest-529042) in `train.csv.zip.`\n\n### Prepare annotations for mmdetection:\n```bash\ncd scripts\nbash create_mmdetection_train.sh\nbash create_mmdetection_test.sh\nbash split.sh\n```\n\n### Training the model:\n```bash\nCUDA_VISIBLE_DEVICES=[list of gpus] bash dist_train.sh [config] [gpus] [--validate] \n```\n\n#### My best checkpoint:\nhttps://yadi.sk/d/-raqliq_ad6r_Q\n\n### Test the model:\n```bash\nCUDA_VISIBLE_DEVICES=[list of gpus] bash dist_test_ensemble.sh [config] [gpus]\n```\n\n\n## References\n* https://github.com/open-mmlab/mmdetection\n"
  },
  {
    "path": "configs/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained=None,\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            groups=64,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=47,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=47,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=47,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=47))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.5,\n        nms=dict(type='nms', iou_thr=0.3),\n        max_per_img=100,\n        mask_thr_binary=0.45),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CustomDataset'\ndata_root = '/data/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + '/data/train_99_mmdetection.pkl',\n        img_prefix=data_root + 'train/',\n        img_scale=[(600, 1900), (1200, 1900)],\n        multiscale_mode='range',\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=False,\n        with_label=True,\n        extra_aug=dict(\n            type='Compose',\n            transforms=[\n                dict(\n                    p=0.5,\n                    max_h_size=64,\n                    type='Cutout'\n                ),\n                dict(\n                    brightness_limit=0.3,\n                    contrast_limit=0.3,\n                    p=0.5,\n                    type='RandomBrightnessContrast'\n                ),\n                dict(\n                    p=0.5,\n                    quality_lower=80,\n                    quality_upper=99,\n                    type='JpegCompression'\n                ),\n            ],\n            p=1.0\n        )\n    ),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + '/data/val_01_mmdetection.pkl',\n        img_prefix=data_root + 'train/',\n        img_scale=(1200, 1900),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'test_mmdetection.pkl',\n        img_prefix=data_root + 'test/',\n        img_scale=[(1000, 1600), (1200, 1900), (1400, 2200)],\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=1.0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[10, 18])\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n    ])\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\n\nwork_dir = '/dumps/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e_1200x1900'\nload_from = '/dumps/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c_prune.pth'\n\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/.travis.yml",
    "content": "dist: trusty\nlanguage: python\n\ninstall:\n  - pip install flake8\n\npython:\n  - \"3.5\"\n  - \"3.6\"\n\nscript:\n  - flake8"
  },
  {
    "path": "mmdetection/GETTING_STARTED.md",
    "content": "# Getting Started\n\nThis page provides basic tutorials about the usage of mmdetection.\nFor installation instructions, please see [INSTALL.md](INSTALL.md).\n\n## Inference with pretrained models\n\nWe provide testing scripts to evaluate a whole dataset (COCO, PASCAL VOC, etc.),\nand also some high-level apis for easier integration to other projects.\n\n### Test a dataset\n\n- [x] single GPU testing\n- [x] multiple GPU testing\n- [x] visualize detection results\n\nYou can use the following commands to test a dataset.\n\n```shell\n# single-gpu testing\npython tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]\n\n# multi-gpu testing\n./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]\n```\n\nOptional arguments:\n- `RESULT_FILE`: Filename of the output results in pickle format. If not specified, the results will not be saved to a file.\n- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values are: `proposal_fast`, `proposal`, `bbox`, `segm`, `keypoints`.\n- `--show`: If specified, detection results will be ploted on the images and shown in a new window. Only applicable for single GPU testing.\n\nExamples:\n\nAssume that you have already downloaded the checkpoints to `checkpoints/`.\n\n1. Test Faster R-CNN and show the results.\n\n```shell\npython tools/test.py configs/faster_rcnn_r50_fpn_1x.py \\\n    checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth \\\n    --show\n```\n\n2. Test Mask R-CNN and evaluate the bbox and mask AP.\n\n```shell\npython tools/test.py configs/mask_rcnn_r50_fpn_1x.py \\\n    checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \\\n    --out results.pkl --eval bbox segm\n```\n\n3. Test Mask R-CNN with 8 GPUs, and evaluate the bbox and mask AP.\n\n```shell\n./tools/dist_test.sh configs/mask_rcnn_r50_fpn_1x.py \\\n    checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth \\\n    8 --out results.pkl --eval bbox segm\n```\n\n### High-level APIs for testing images.\n\nHere is an example of building the model and test given images.\n\n```python\nfrom mmdet.apis import init_detector, inference_detector, show_result\n\nconfig_file = 'configs/faster_rcnn_r50_fpn_1x.py'\ncheckpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'\n\n# build the model from a config file and a checkpoint file\nmodel = init_detector(config_file, checkpoint_file)\n\n# test a single image and show the results\nimg = 'test.jpg'  # or img = mmcv.imread(img), which will only load it once\nresult = inference_detector(model, img)\nshow_result(img, result, model.CLASSES)\n\n# test a list of images and write the results to image files\nimgs = ['test1.jpg', 'test2.jpg']\nfor i, result in enumerate(inference_detector(model, imgs, device='cuda:0')):\n    show_result(imgs[i], result, model.CLASSES, out_file='result_{}.jpg'.format(i))\n```\n\n\n## Train a model\n\nmmdetection implements distributed training and non-distributed training,\nwhich uses `MMDistributedDataParallel` and `MMDataParallel` respectively.\n\nAll outputs (log files and checkpoints) will be saved to the working directory,\nwhich is specified by `work_dir` in the config file.\n\n**\\*Important\\***: The default learning rate in config files is for 8 GPUs.\nIf you use less or more than 8 GPUs, you need to set the learning rate proportional\nto the GPU num, e.g., 0.01 for 4 GPUs and 0.04 for 16 GPUs.\n\n### Train with a single GPU\n\n```shell\npython tools/train.py ${CONFIG_FILE}\n```\n\nIf you want to specify the working directory in the command, you can add an argument `--work_dir ${YOUR_WORK_DIR}`.\n\n### Train with multiple GPUs\n\n```shell\n./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]\n```\n\nOptional arguments are:\n\n- `--validate` (recommended): Perform evaluation at every k (default=1) epochs during the training.\n- `--work_dir ${WORK_DIR}`: Override the working directory specified in the config file.\n- `--resume_from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.\n\n### Train with multiple machines\n\nIf you run mmdetection on a cluster managed with [slurm](https://slurm.schedmd.com/), you can just use the script `slurm_train.sh`.\n\n```shell\n./tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} ${WORK_DIR} [${GPUS}]\n```\n\nHere is an example of using 16 GPUs to train Mask R-CNN on the dev partition.\n\n```shell\n./tools/slurm_train.sh dev mask_r50_1x configs/mask_rcnn_r50_fpn_1x.py /nfs/xxxx/mask_rcnn_r50_fpn_1x 16\n```\n\nYou can check [slurm_train.sh](tools/slurm_train.sh) for full arguments and environment variables.\n\nIf you have just multiple machines connected with ethernet, you can refer to\npytorch [launch utility](https://pytorch.org/docs/stable/distributed_deprecated.html#launch-utility).\nUsually it is slow if you do not have high speed networking like infiniband.\n\n\n## How-to\n\n### Use my own datasets\n\nThe simplest way is to convert your dataset to existing dataset formats (COCO or PASCAL VOC).\n\nHere we show an example of adding a custom dataset of 5 classes, assuming it is also in COCO format.\n\nIn `mmdet/datasets/my_dataset.py`:\n\n```python\nfrom .coco import CocoDataset\n\n\nclass MyDataset(CocoDataset):\n\n    CLASSES = ('a', 'b', 'c', 'd', 'e')\n```\n\nIn `mmdet/datasets/__init__.py`:\n\n```python\nfrom .my_dataset import MyDataset\n```\n\nThen you can use `MyDataset` in config files, with the same API as CocoDataset.\n\n\nIt is also fine if you do not want to convert the annotation format to COCO or PASCAL format.\nActually, we define a simple annotation format and all existing datasets are\nprocessed to be compatible with it, either online or offline.\n\nThe annotation of a dataset is a list of dict, each dict corresponds to an image.\nThere are 3 field `filename` (relative path), `width`, `height` for testing,\nand an additional field `ann` for training. `ann` is also a dict containing at least 2 fields:\n`bboxes` and `labels`, both of which are numpy arrays. Some datasets may provide\nannotations like crowd/difficult/ignored bboxes, we use `bboxes_ignore` and `labels_ignore`\nto cover them.\n\nHere is an example.\n```\n[\n    {\n        'filename': 'a.jpg',\n        'width': 1280,\n        'height': 720,\n        'ann': {\n            'bboxes': <np.ndarray, float32> (n, 4),\n            'labels': <np.ndarray, float32> (n, ),\n            'bboxes_ignore': <np.ndarray, float32> (k, 4),\n            'labels_ignore': <np.ndarray, float32> (k, ) (optional field)\n        }\n    },\n    ...\n]\n```\n\nThere are two ways to work with custom datasets.\n\n- online conversion\n\n  You can write a new Dataset class inherited from `CustomDataset`, and overwrite two methods\n  `load_annotations(self, ann_file)` and `get_ann_info(self, idx)`,\n  like [CocoDataset](mmdet/datasets/coco.py) and [VOCDataset](mmdet/datasets/voc.py).\n\n- offline conversion\n\n  You can convert the annotation format to the expected format above and save it to\n  a pickle or json file, like [pascal_voc.py](tools/convert_datasets/pascal_voc.py).\n  Then you can simply use `CustomDataset`.\n\n### Develop new components\n\nWe basically categorize model components into 4 types.\n\n- backbone: usually a FCN network to extract feature maps, e.g., ResNet, MobileNet.\n- neck: the component between backbones and heads, e.g., FPN, PAFPN.\n- head: the component for specific tasks, e.g., bbox prediction and mask prediction.\n- roi extractor: the part for extracting RoI features from feature maps, e.g., RoI Align.\n\nHere we show how to develop new components with an example of MobileNet.\n\n1. Create a new file `mmdet/models/backbones/mobilenet.py`.\n\n```python\nimport torch.nn as nn\n\nfrom ..registry import BACKBONES\n\n\n@BACKBONES.register\nclass MobileNet(nn.Module):\n\n    def __init__(self, arg1, arg2):\n        pass\n\n    def forward(x):  # should return a tuple\n        pass\n```\n\n2. Import the module in `mmdet/models/backbones/__init__.py`.\n\n```python\nfrom .mobilenet import MobileNet\n```\n\n3. Use it in your config file.\n\n```python\nmodel = dict(\n    ...\n    backbone=dict(\n        type='MobileNet',\n        arg1=xxx,\n        arg2=xxx),\n    ...\n```\n\nFor more information on how it works, you can refer to [TECHNICAL_DETAILS.md](TECHNICAL_DETAILS.md) (TODO).\n"
  },
  {
    "path": "mmdetection/INSTALL.md",
    "content": "## Installation\n\n### Requirements\n\n- Linux\n- Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/))\n- PyTorch 1.0+ or PyTorch-nightly\n- CUDA 9.0+\n- NCCL 2+\n- GCC 4.9+\n- [mmcv](https://github.com/open-mmlab/mmcv)\n\nWe have tested the following versions of OS and softwares:\n\n- OS: Ubuntu 16.04/18.04 and CentOS 7.2\n- CUDA: 9.0/9.2/10.0\n- NCCL: 2.1.15/2.2.13/2.3.7/2.4.2\n- GCC: 4.9/5.3/5.4/7.3\n\n### Install mmdetection\n\na. Create a conda virtual environment and activate it. Then install Cython.\n\n```shell\nconda create -n open-mmlab python=3.7 -y\nsource activate open-mmlab\n\nconda install cython\n```\n\nb. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/).\n\nc. Clone the mmdetection repository.\n\n```shell\ngit clone https://github.com/open-mmlab/mmdetection.git\ncd mmdetection\n```\n\nd. Compile cuda extensions.\n\n```shell\n./compile.sh\n```\n\ne. Install mmdetection (other dependencies will be installed automatically).\n\n```shell\npython setup.py develop\n# or \"pip install -e .\"\n```\n\nNote:\n\n1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d.\nThe git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models.\n\n2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again.\n\n### Prepare COCO dataset.\n\nIt is recommended to symlink the dataset root to `$MMDETECTION/data`.\n\n```\nmmdetection\n├── mmdet\n├── tools\n├── configs\n├── data\n│   ├── coco\n│   │   ├── annotations\n│   │   ├── train2017\n│   │   ├── val2017\n│   │   ├── test2017\n│   ├── VOCdevkit\n│   │   ├── VOC2007\n│   │   ├── VOC2012\n\n```\n\n### Scripts\n[Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is\na script for setting up mmdetection with conda.\n\n### Notice\nYou can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently.\n\nIf there are more than one mmdetection on your machine, and you want to use them alternatively.\nPlease insert the following code to the main file\n```python\nimport os.path as osp\nimport sys\nsys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))\n```\nor run the following command in the terminal of corresponding folder.\n```shell\nexport PYTHONPATH=`pwd`:$PYTHONPATH\n```\n"
  },
  {
    "path": "mmdetection/LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "mmdetection/MODEL_ZOO.md",
    "content": "# Benchmark and Model Zoo\n\n## Environment\n\n### Hardware\n\n- 8 NVIDIA Tesla V100 GPUs\n- Intel Xeon 4114 CPU @ 2.20GHz\n\n### Software environment\n\n- Python 3.6 / 3.7\n- PyTorch Nightly\n- CUDA 9.0.176\n- CUDNN 7.0.4\n- NCCL 2.1.15\n\n## Mirror sites\n\nWe use AWS as the main site to host our model zoo, and maintain a mirror on aliyun.\nYou can replace `https://s3.ap-northeast-2.amazonaws.com/open-mmlab` with `https://open-mmlab.oss-cn-beijing.aliyuncs.com` in model urls.\n\n## Common settings\n\n- All FPN baselines and RPN-C4 baselines were trained using 8 GPU with a batch size of 16 (2 images per GPU). Other C4 baselines were trained using 8 GPU with a batch size of 8 (1 image per GPU).\n- All models were trained on `coco_2017_train`, and tested on the `coco_2017_val`.\n- We use distributed training and BN layer stats are fixed.\n- We adopt the same training schedules as Detectron. 1x indicates 12 epochs and 2x indicates 24 epochs, which corresponds to slightly less iterations than Detectron and the difference can be ignored.\n- All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.\n- For fair comparison with other codebases, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows.\n- We report the inference time as the overall time including data loading, network forwarding and post processing.\n\n\n## Baselines\n\nMore models with different backbones will be added to the model zoo.\n\n### RPN\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR1000 |                                                          Download                                                          |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------: |\n|     R-50-C4     |  caffe  |   1x    |    -     |          -          |      20.5      |  51.1  |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_caffe_c4_1x-ea7d3428.pth)       |\n|     R-50-C4     |  caffe  |   2x    |   2.2    |        0.17         |      20.3      |  52.2  |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_caffe_c4_2x-c6d5b958.pth)       |\n|     R-50-C4     | pytorch |   1x    |    -     |          -          |      20.1      |  50.2  |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_c4_1x-eb38972b.pth)          |\n|     R-50-C4     | pytorch |   2x    |    -     |          -          |      20.0      |  51.1  |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_c4_2x-3d4c1e14.pth)          |\n|    R-50-FPN     |  caffe  |   1x    |   3.3    |        0.253        |      16.9      |  58.2  |                                                             -                                                              |\n|    R-50-FPN     | pytorch |   1x    |   3.5    |        0.276        |      17.7      |  57.1  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_1x_20181010-4a9c0712.pth)     |\n|    R-50-FPN     | pytorch |   2x    |    -     |          -          |       -        |  57.6  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r50_fpn_2x_20181010-88a4a471.pth)     |\n|    R-101-FPN    |  caffe  |   1x    |   5.2    |        0.379        |      13.9      |  59.4  |                                                             -                                                              |\n|    R-101-FPN    | pytorch |   1x    |   5.4    |        0.396        |      14.4      |  58.6  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_1x_20181129-f50da4bd.pth)    |\n|    R-101-FPN    | pytorch |   2x    |    -     |          -          |       -        |  59.1  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_r101_fpn_2x_20181129-e42c6c9a.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   6.6    |        0.589        |      11.8      |  59.4  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_1x_20181218-7e379d26.pth) |\n| X-101-32x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  59.9  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_32x4d_fpn_2x_20181218-0510af40.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   9.5    |        0.955        |      8.3       |  59.8  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_1x_20181218-c1a24f1f.pth) |\n| X-101-64x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  60.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/rpn_x101_64x4d_fpn_2x_20181218-c22bdd70.pth) |\n\n### Faster R-CNN\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                              Download                                                              |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :--------------------------------------------------------------------------------------------------------------------------------: |\n|     R-50-C4     |  caffe  |   1x    |    -     |          -          |      9.5       |  34.9  |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_caffe_c4_1x-75ecfdfa.pth)       |\n|     R-50-C4     |  caffe  |   2x    |   4.0    |        0.39         |      9.3       |  36.5  |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_caffe_c4_2x-71c67f27.pth)       |\n|     R-50-C4     | pytorch |   1x    |    -     |          -          |      9.3       |  33.9  |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_c4_1x-642cf91f.pth)          |\n|     R-50-C4     | pytorch |   2x    |    -     |          -          |      9.4       |  35.9  |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_c4_2x-6e4fdf4f.pth)          |\n|    R-50-FPN     |  caffe  |   1x    |   3.6    |        0.333        |      13.5      |  36.6  |                                                                 -                                                                  |\n|    R-50-FPN     | pytorch |   1x    |   3.8    |        0.353        |      13.6      |  36.4  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth)     |\n|    R-50-FPN     | pytorch |   2x    |    -     |          -          |       -        |  37.7  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_2x_20181010-443129e1.pth)     |\n|    R-101-FPN    |  caffe  |   1x    |   5.5    |        0.465        |      11.5      |  38.8  |                                                                 -                                                                  |\n|    R-101-FPN    | pytorch |   1x    |   5.7    |        0.474        |      11.9      |  38.5  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_1x_20181129-d1468807.pth)    |\n|    R-101-FPN    | pytorch |   2x    |    -     |          -          |       -        |  39.4  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r101_fpn_2x_20181129-73e7ade7.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   6.9    |        0.672        |      10.3      |  40.1  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_1x_20181218-ad81c133.pth) |\n| X-101-32x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  40.4  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_32x4d_fpn_2x_20181218-0ed58946.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   9.8    |        1.040        |      7.3       |  41.3  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_1x_20181218-c9c69c8f.pth) |\n| X-101-64x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  40.7  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_x101_64x4d_fpn_2x_20181218-fe94f9b8.pth) |\n\n### Mask R-CNN\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP |                                                             Download                                                             |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :------------------------------------------------------------------------------------------------------------------------------: |\n|     R-50-C4     |  caffe  |   1x    |    -     |          -          |      8.1       |  35.9  |  31.5   |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_caffe_c4_1x-02a4ad3b.pth)       |\n|     R-50-C4     |  caffe  |   2x    |   4.2    |        0.43         |      8.1       |  37.9  |  32.9   |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_caffe_c4_2x-d150973a.pth)       |\n|     R-50-C4     | pytorch |   1x    |    -     |          -          |      7.9       |  35.1  |  31.2   |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_c4_1x-a83bdd40.pth)          |\n|     R-50-C4     | pytorch |   2x    |    -     |          -          |      8.0       |  37.2  |  32.5   |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_c4_2x-3cf169a9.pth)          |\n|    R-50-FPN     |  caffe  |   1x    |   3.8    |        0.430        |      10.2      |  37.4  |  34.3   |                                                                -                                                                 |\n|    R-50-FPN     | pytorch |   1x    |   3.9    |        0.453        |      10.6      |  37.3  |  34.2   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth)     |\n|    R-50-FPN     | pytorch |   2x    |    -     |          -          |       -        |  38.5  |  35.1   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_2x_20181010-41d35c05.pth)     |\n|    R-101-FPN    |  caffe  |   1x    |   5.7    |        0.534        |      9.4       |  39.9  |  36.1   |                                                                -                                                                 |\n|    R-101-FPN    | pytorch |   1x    |   5.8    |        0.571        |      9.5       |  39.4  |  35.9   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_1x_20181129-34ad1961.pth)    |\n|    R-101-FPN    | pytorch |   2x    |    -     |          -          |       -        |  40.3  |  36.5   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_2x_20181129-a254bdfc.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   7.1    |        0.759        |      8.3       |  41.1  |  37.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_1x_20181218-44e635cc.pth) |\n| X-101-32x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  41.4  |  37.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_32x4d_fpn_2x_20181218-f023dffa.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   10.0   |        1.102        |      6.5       |  42.1  |  38.0   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_1x_20181218-cb159987.pth) |\n| X-101-64x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  42.0  |  37.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_x101_64x4d_fpn_2x_20181218-ea936e44.pth) |\n\n### Fast R-CNN (with pre-computed proposals)\n\n| Backbone  |  Style  |  Type  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP |                                                            Download                                                             |\n| :-------: | :-----: | :----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------: |\n|  R-50-C4  |  caffe  | Faster |   1x    |    -     |          -          |      6.7       |  35.0  |    -    |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_caffe_c4_1x-0ef9a60b.pth)      |\n|  R-50-C4  |  caffe  | Faster |   2x    |   3.8    |        0.34         |      6.6       |  36.4  |    -    |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_c4_2x-657a9fc6.pth)         |\n|  R-50-C4  | pytorch | Faster |   1x    |    -     |          -          |      6.3       |  34.2  |    -    |         [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_c4_1x-2bc00ca9.pth)         |\n|  R-50-C4  | pytorch | Faster |   2x    |    -     |          -          |      6.1       |  35.8  |    -    |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_caffe_c4_2x-9171d0fc.pth)      |\n| R-50-FPN  |  caffe  | Faster |   1x    |   3.3    |        0.242        |      18.4      |  36.6  |    -    |                                                                -                                                                |\n| R-50-FPN  | pytorch | Faster |   1x    |   3.5    |        0.250        |      16.5      |  35.8  |    -    |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_1x_20181010-08160859.pth)    |\n|  R-50-C4  |  caffe  |  Mask  |   1x    |    -     |          -          |      8.1       |  35.9  |  31.5   |   [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_caffe_c4_1x-b43f7f3c.pth)    |\n|  R-50-C4  |  caffe  |  Mask  |   2x    |   4.2    |        0.43         |      8.1       |  37.9  |  32.9   |   [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_caffe_c4_2x-e3580184.pth)    |\n|  R-50-C4  | pytorch |  Mask  |   1x    |    -     |          -          |      7.9       |  35.1  |  31.2   |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_c4_1x-bc7fa8c8.pth)       |\n|  R-50-C4  | pytorch |  Mask  |   2x    |    -     |          -          |      8.0       |  37.2  |  32.5   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth)  |\n| R-50-FPN  | pytorch | Faster |   2x    |    -     |          -          |       -        |  37.1  |    -    |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r50_fpn_2x_20181010-d263ada5.pth)    |\n| R-101-FPN |  caffe  | Faster |   1x    |   5.2    |        0.355        |      14.4      |  38.6  |    -    |                                                                -                                                                |\n| R-101-FPN | pytorch | Faster |   1x    |   5.4    |        0.388        |      13.2      |  38.1  |    -    |   [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_1x_20181129-ffaa2eb0.pth)    |\n| R-101-FPN | pytorch | Faster |   2x    |    -     |          -          |       -        |  38.8  |    -    |   [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_rcnn_r101_fpn_2x_20181129-9dba92ce.pth)    |\n| R-50-FPN  |  caffe  |  Mask  |   1x    |   3.4    |        0.328        |      12.8      |  37.3  |  34.5   |                                                                -                                                                |\n| R-50-FPN  | pytorch |  Mask  |   1x    |   3.5    |        0.346        |      12.7      |  36.8  |  34.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_1x_20181010-e030a38f.pth)  |\n| R-50-FPN  | pytorch |  Mask  |   2x    |    -     |          -          |       -        |  37.9  |  34.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r50_fpn_2x_20181010-5048cb03.pth)  |\n| R-101-FPN |  caffe  |  Mask  |   1x    |   5.2    |        0.429        |      11.2      |  39.4  |  36.1   |                                                                -                                                                |\n| R-101-FPN | pytorch |  Mask  |   1x    |   5.4    |        0.462        |      10.9      |  38.9  |  35.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_1x_20181129-2273fa9b.pth) |\n| R-101-FPN | pytorch |  Mask  |   2x    |    -     |          -          |       -        |  39.9  |  36.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fast_mask_rcnn_r101_fpn_2x_20181129-bf63ec5e.pth) |\n\n### RetinaNet\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                             Download                                                             |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |\n|    R-50-FPN     |  caffe  |   1x    |   3.4    |        0.285        |      12.5      |  35.8  |                                                                -                                                                 |\n|    R-50-FPN     | pytorch |   1x    |   3.6    |        0.308        |      12.1      |  35.6  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth)     |\n|    R-50-FPN     | pytorch |   2x    |    -     |          -          |       -        |  36.5  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_2x_20181125-8b724df2.pth)     |\n|    R-101-FPN    |  caffe  |   1x    |   5.3    |        0.410        |      10.4      |  37.8  |                                                                -                                                                 |\n|    R-101-FPN    | pytorch |   1x    |   5.5    |        0.429        |      10.9      |  37.7  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_1x_20181129-f016f384.pth)    |\n|    R-101-FPN    | pytorch |   2x    |    -     |          -          |       -        |  38.1  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r101_fpn_2x_20181129-72c14526.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   6.7    |        0.632        |      9.3       |  39.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_1x_20190501-967812ba.pth) |\n| X-101-32x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  39.3  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_32x4d_fpn_2x_20181218-8596452d.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   9.6    |        0.993        |      7.0       |  40.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_1x_20181218-a0a22662.pth) |\n| X-101-64x4d-FPN | pytorch |   2x    |    -     |          -          |       -        |  39.6  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_x101_64x4d_fpn_2x_20181218-5e88d045.pth) |\n\n### Cascade R-CNN\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                              Download                                                               |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :---------------------------------------------------------------------------------------------------------------------------------: |\n|     R-50-C4     |  caffe  |   1x    |   8.7    |        0.92         |      5.0       |  38.7  |      [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_caffe_c4_1x-7c85c62b.pth)       |\n|    R-50-FPN     |  caffe  |   1x    |   3.9    |        0.464        |      10.9      |  40.5  |                                                                  -                                                                  |\n|    R-50-FPN     | pytorch |   1x    |   4.1    |        0.455        |      11.9      |  40.4  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_1x_20190501-3b6211ab.pth)     |\n|    R-50-FPN     | pytorch |   20e   |    -     |          -          |       -        |  41.1  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r50_fpn_20e_20181123-db483a09.pth)    |\n|    R-101-FPN    |  caffe  |   1x    |   5.8    |        0.569        |      9.6       |  42.4  |                                                                  -                                                                  |\n|    R-101-FPN    | pytorch |   1x    |   6.0    |        0.584        |      10.3      |  42.0  |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_1x_20181129-d64ebac7.pth)    |\n|    R-101-FPN    | pytorch |   20e   |    -     |          -          |       -        |  42.5  |   [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_r101_fpn_20e_20181129-b46dcede.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   7.2    |        0.770        |      8.9       |  43.6  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth) |\n| X-101-32x4d-FPN | pytorch |   20e   |    -     |          -          |       -        |  44.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_2x_20181218-28f73c4c.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   10.0   |        1.133        |      6.7       |  44.5  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_1x_20181218-e2dc376a.pth) |\n| X-101-64x4d-FPN | pytorch |   20e   |    -     |          -          |       -        |  44.7  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_64x4d_fpn_2x_20181218-5add321e.pth) |\n\n### Cascade Mask R-CNN\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP |                                                                 Download                                                                  |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :---------------------------------------------------------------------------------------------------------------------------------------: |\n|     R-50-C4     |  caffe  |   1x    |   9.1    |        0.99         |      4.5       |  39.3  |  32.8   |       [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_caffe_c4_1x-f72cc254.pth)       |\n|    R-50-FPN     |  caffe  |   1x    |   5.1    |        0.692        |      7.6       |  40.9  |  35.5   |                                                                     -                                                                     |\n|    R-50-FPN     | pytorch |   1x    |   5.3    |        0.683        |      7.4       |  41.2  |  35.7   |     [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth)     |\n|    R-50-FPN     | pytorch |   20e   |    -     |          -          |       -        |  42.3  |  36.6   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_20e_20181123-6e0c9713.pth)     |\n|    R-101-FPN    |  caffe  |   1x    |   7.0    |        0.803        |      7.2       |  43.1  |  37.2   |                                                                     -                                                                     |\n|    R-101-FPN    | pytorch |   1x    |   7.2    |        0.807        |      6.8       |  42.6  |  37.0   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_1x_20181129-64f00602.pth)     |\n|    R-101-FPN    | pytorch |   20e   |    -     |          -          |       -        |  43.3  |  37.6   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r101_fpn_20e_20181129-cb85151d.pth)    |\n| X-101-32x4d-FPN | pytorch |   1x    |   8.4    |        0.976        |      6.6       |  44.4  |  38.2   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_1x_20181218-1d944c89.pth)  |\n| X-101-32x4d-FPN | pytorch |   20e   |    -     |          -          |       -        |  44.7  |  38.6   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_32x4d_fpn_20e_20181218-761a3473.pth) |\n| X-101-64x4d-FPN | pytorch |   1x    |   11.4   |        1.33         |      5.3       |  45.4  |  39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_1x_20190501-827e0a70.pth)  |\n| X-101-64x4d-FPN | pytorch |   20e   |    -     |          -          |       -        |  45.7  |  39.4   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_x101_64x4d_fpn_20e_20181218-630773a7.pth) |\n\n**Notes:**\n\n- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs.\n\n### Hybrid Task Cascade (HTC)\n\n|    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP |                                                            Download                                                             |\n| :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :-----: | :-----------------------------------------------------------------------------------------------------------------------------: |\n|    R-50-FPN     | pytorch |   1x    |   7.4    |        0.936        |      4.1       |  42.1  |  37.3   |     [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth)     |\n|    R-50-FPN     | pytorch |   20e   |    -     |          -          |       -        |  43.2  |  38.1   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth)     |\n|    R-101-FPN    | pytorch |   20e   |   9.3    |        1.051        |      4.0       |  44.9  |  39.4   |    [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth)    |\n| X-101-32x4d-FPN | pytorch |   20e   |   5.8    |        0.769        |      3.8       |  46.1  |  40.3   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |\n| X-101-64x4d-FPN | pytorch |   20e   |   7.5    |        1.120        |      3.5       |  46.9  |  40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |\n\n**Notes:**\n\n- Please refer to [Hybrid Task Cascade](configs/htc/README.md) for details and more a powerful model (50.7/43.9).\n\n### SSD\n\n| Backbone | Size  | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                             Download                                                              |\n| :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :-------------------------------------------------------------------------------------------------------------------------------: |\n|  VGG16   |  300  | caffe |  120e   |   3.5    |        0.256        |  25.9 / 34.6   |  25.7  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) |\n|  VGG16   |  512  | caffe |  120e   |   7.6    |        0.412        |  20.7 / 25.4   |  29.3  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_coco_vgg16_caffe_120e_20181221-d48b0be8.pth) |\n\n### SSD (PASCAL VOC)\n\n| Backbone | Size  | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                             Download                                                             |\n| :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |\n|  VGG16   |  300  | caffe |  240e   |   2.5    |        0.159        |  35.7 / 53.6   |  77.5  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) |\n|  VGG16   |  512  | caffe |  240e   |   4.3    |        0.214        |  27.5 / 35.9   |  80.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) |\n\n**Notes:**\n\n- `cudnn.benchmark` is set as `True` for SSD training and testing.\n- Inference time is reported for batch size = 1 and batch size = 8.\n- The speed difference between VOC and COCO is caused by model parameters and nms.\n\n### Group Normalization (GN)\n\nPlease refer to [Group Normalization](configs/gn/README.md) for details.\n\n### Weight Standardization\n\nPlease refer to [Weight Standardization](configs/gn+ws/README.md) for details.\n\n### Deformable Convolution v2\n\nPlease refer to [Deformable Convolutional Networks](configs/dcn/README.md) for details.\n\n\n## Comparison with Detectron and maskrcnn-benchmark\n\nWe compare mmdetection with [Detectron](https://github.com/facebookresearch/Detectron)\nand [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). The backbone used is R-50-FPN.\n\nIn general, mmdetection has 3 advantages over Detectron.\n\n- **Higher performance** (especially in terms of mask AP)\n- **Faster training speed**\n- **Memory efficient**\n\n### Performance\n\nDetectron and maskrcnn-benchmark use caffe-style ResNet as the backbone.\nWe report results using both caffe-style (weights converted from\n[here](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#imagenet-pretrained-models))\nand pytorch-style (weights from the official model zoo) ResNet backbone,\nindicated as *pytorch-style results* / *caffe-style results*.\n\nWe find that pytorch-style ResNet usually converges slower than caffe-style ResNet,\nthus leading to slightly lower results in 1x schedule, but the final results\nof 2x schedule is higher.\n\n<table>\n  <tr>\n    <th>Type</th>\n    <th>Lr schd</th>\n    <th>Detectron</th>\n    <th>maskrcnn-benchmark</th>\n    <th>mmdetection</th>\n  </tr>\n  <tr>\n    <td rowspan=\"2\">RPN</td>\n    <td>1x</td>\n    <td>57.2</td>\n    <td>-</td>\n    <td>57.1 / 58.2</td>\n  </tr>\n  <tr>\n    <td>2x</td>\n    <td>-</td>\n    <td>-</td>\n    <td>57.6 / -</td>\n  </tr>\n  <tr>\n    <td rowspan=\"2\">Faster R-CNN</td>\n    <td>1x</td>\n    <td>36.7</td>\n    <td>36.8</td>\n    <td>36.4 / 36.6</td>\n  </tr>\n  <tr>\n    <td>2x</td>\n    <td>37.9</td>\n    <td>-</td>\n    <td>37.7 / -</td>\n  </tr>\n  <tr>\n    <td rowspan=\"2\">Mask R-CNN</td>\n    <td>1x</td>\n    <td>37.7 &amp; 33.9</td>\n    <td>37.8 &amp; 34.2</td>\n    <td>37.3 &amp; 34.2 / 37.4 &amp; 34.3</td>\n  </tr>\n  <tr>\n    <td>2x</td>\n    <td>38.6 &amp; 34.5</td>\n    <td>-</td>\n    <td>38.5 &amp; 35.1 / -</td>\n  </tr>\n  <tr>\n    <td rowspan=\"2\">Fast R-CNN</td>\n    <td>1x</td>\n    <td>36.4</td>\n    <td>-</td>\n    <td>35.8 / 36.6</td>\n  </tr>\n  <tr>\n    <td>2x</td>\n    <td>36.8</td>\n    <td>-</td>\n    <td>37.1 / -</td>\n  </tr>\n  <tr>\n    <td rowspan=\"2\">Fast R-CNN (w/mask)</td>\n    <td>1x</td>\n    <td>37.3 &amp; 33.7</td>\n    <td>-</td>\n    <td>36.8 &amp; 34.1 / 37.3 &amp; 34.5</td>\n  </tr>\n  <tr>\n    <td>2x</td>\n    <td>37.7 &amp; 34.0</td>\n    <td>-</td>\n    <td>37.9 &amp; 34.8 / -</td>\n  </tr>\n</table>\n\n### Training Speed\n\nThe training speed is measure with s/iter. The lower, the better.\n\n<table>\n  <tr>\n    <th>Type</th>\n    <th>Detectron (P100<sup>1</sup>)</th>\n    <th>maskrcnn-benchmark (V100)</th>\n    <th>mmdetection (V100<sup>2</sup>)</th>\n  </tr>\n  <tr>\n    <td>RPN</td>\n    <td>0.416</td>\n    <td>-</td>\n    <td>0.253</td>\n  </tr>\n  <tr>\n    <td>Faster R-CNN</td>\n    <td>0.544</td>\n    <td>0.353</td>\n    <td>0.333</td>\n  </tr>\n  <tr>\n    <td>Mask R-CNN</td>\n    <td>0.889</td>\n    <td>0.454</td>\n    <td>0.430</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN</td>\n    <td>0.285</td>\n    <td>-</td>\n    <td>0.242</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN (w/mask)</td>\n    <td>0.377</td>\n    <td>-</td>\n    <td>0.328</td>\n  </tr>\n</table>\n\n\\*1. Facebook's Big Basin servers (P100/V100) is slightly faster than the servers we use. mmdetection can also run slightly faster on FB's servers.\n\n\\*2. For fair comparison, we list the caffe-style results here.\n\n\n### Inference Speed\n\nThe inference speed is measured with fps (img/s) on a single GPU. The higher, the better.\n\n<table>\n  <tr>\n    <th>Type</th>\n    <th>Detectron (P100)</th>\n    <th>maskrcnn-benchmark (V100)</th>\n    <th>mmdetection (V100)</th>\n  </tr>\n  <tr>\n    <td>RPN</td>\n    <td>12.5</td>\n    <td>-</td>\n    <td>16.9</td>\n  </tr>\n  <tr>\n    <td>Faster R-CNN</td>\n    <td>10.3</td>\n    <td>7.9</td>\n    <td>13.5</td>\n  </tr>\n  <tr>\n    <td>Mask R-CNN</td>\n    <td>8.5</td>\n    <td>7.7</td>\n    <td>10.2</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN</td>\n    <td>12.5</td>\n    <td>-</td>\n    <td>18.4</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN (w/mask)</td>\n    <td>9.9</td>\n    <td>-</td>\n    <td>12.8</td>\n  </tr>\n</table>\n\n### Training memory\n\n<table>\n  <tr>\n    <th>Type</th>\n    <th>Detectron</th>\n    <th>maskrcnn-benchmark</th>\n    <th>mmdetection</th>\n  </tr>\n  <tr>\n    <td>RPN</td>\n    <td>6.4</td>\n    <td>-</td>\n    <td>3.3</td>\n  </tr>\n  <tr>\n    <td>Faster R-CNN</td>\n    <td>7.2</td>\n    <td>4.4</td>\n    <td>3.6</td>\n  </tr>\n  <tr>\n    <td>Mask R-CNN</td>\n    <td>8.6</td>\n    <td>5.2</td>\n    <td>3.8</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN</td>\n    <td>6.0</td>\n    <td>-</td>\n    <td>3.3</td>\n  </tr>\n  <tr>\n    <td>Fast R-CNN (w/mask)</td>\n    <td>7.9</td>\n    <td>-</td>\n    <td>3.4</td>\n  </tr>\n</table>\n\nThere is no doubt that maskrcnn-benchmark and mmdetection is more memory efficient than Detectron,\nand the main advantage is PyTorch itself. We also perform some memory optimizations to push it forward.\n\nNote that Caffe2 and PyTorch have different apis to obtain memory usage with different implementations.\nFor all codebases, `nvidia-smi` shows a larger memory usage than the reported number in the above table.\n\n\n\n"
  },
  {
    "path": "mmdetection/README.md",
    "content": "\n# mmdetection\n\n## Introduction\n\nThe master branch works with **PyTorch 1.1** or higher. If you would like to use PyTorch 0.4.1,\nplease checkout to the [pytorch-0.4.1](https://github.com/open-mmlab/mmdetection/tree/pytorch-0.4.1) branch.\n\nmmdetection is an open source object detection toolbox based on PyTorch. It is\na part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).\n\n![demo image](demo/coco_test_12510.jpg)\n\n### Major features\n\n- **Modular Design**\n\n  One can easily construct a customized object detection framework by combining different components.\n\n- **Support of multiple frameworks out of box**\n\n  The toolbox directly supports popular detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc.\n\n- **Efficient**\n\n  All basic bbox and mask operations run on GPUs now.\n  The training speed is nearly 2x faster than Detectron and comparable to maskrcnn-benchmark.\n\n- **State of the art**\n\n  This was the codebase of the *MMDet* team, who won the [COCO Detection 2018 challenge](http://cocodataset.org/#detection-leaderboard).\n\nApart from mmdetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research,\nwhich is heavily depended on by this toolbox.\n\n## License\n\nThis project is released under the [Apache 2.0 license](LICENSE).\n\n## Updates\n\nv0.6.0 (14/04/2019)\n- Up to 30% speedup compared to the model zoo.\n- Support both PyTorch stable and nightly version.\n- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.\n\nv0.6rc0(06/02/2019)\n- Migrate to PyTorch 1.0.\n\nv0.5.7 (06/02/2019)\n- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))\n- This is the last release based on PyTorch 0.4.1.\n\nv0.5.6 (17/01/2019)\n- Add support for Group Normalization.\n- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.\n\nv0.5.5 (22/12/2018)\n- Add SSD for COCO and PASCAL VOC.\n- Add ResNeXt backbones and detection models.\n- Refactoring for Samplers/Assigners and add OHEM.\n- Add VOC dataset and evaluation scripts.\n\nv0.5.4 (27/11/2018)\n- Add SingleStageDetector and RetinaNet.\n\nv0.5.3 (26/11/2018)\n- Add Cascade R-CNN and Cascade Mask R-CNN.\n- Add support for Soft-NMS in config files.\n\nv0.5.2 (21/10/2018)\n- Add support for custom datasets.\n- Add a script to convert PASCAL VOC annotations to the expected format.\n\nv0.5.1 (20/10/2018)\n- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.\n- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.\n\n## Benchmark and model zoo\n\nSupported methods and backbones are shown in the below table.\nResults and models are available in the [Model zoo](MODEL_ZOO.md).\n\n|                    | ResNet   | ResNeXt  | SENet    | VGG      |\n|--------------------|:--------:|:--------:|:--------:|:--------:|\n| RPN                | ✓        | ✓        | ☐        | ✗        |\n| Fast R-CNN         | ✓        | ✓        | ☐        | ✗        |\n| Faster R-CNN       | ✓        | ✓        | ☐        | ✗        |\n| Mask R-CNN         | ✓        | ✓        | ☐        | ✗        |\n| Cascade R-CNN      | ✓        | ✓        | ☐        | ✗        |\n| Cascade Mask R-CNN | ✓        | ✓        | ☐        | ✗        |\n| SSD                | ✗        | ✗        | ✗        | ✓        |\n| RetinaNet          | ✓        | ✓        | ☐        | ✗        |\n| Hybrid Task Cascade| ✓        | ✓        | ☐        | ✗        |\n| FCOS               | ✓        | ✓        | ☐        | ✗        |\n\nOther features\n- [x] DCNv2\n- [x] Group Normalization\n- [x] Weight Standardization\n- [x] OHEM\n- [x] Soft-NMS\n- [ ] Mixed Precision (FP16) Training (coming soon)\n\n\n## Installation\n\nPlease refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.\n\n\n## Get Started\n\nPlease see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of mmdetection.\n\n\n## Citation\n\nIf you use our codebase or models in your research, please cite this project.\nWe will release a paper or technical report later.\n\n```\n@misc{mmdetection2018,\n  author =       {Kai Chen and Jiangmiao Pang and Jiaqi Wang and Yu Xiong and Xiaoxiao Li\n                  and Shuyang Sun and Wansen Feng and Ziwei Liu and Jianping Shi and\n                  Wanli Ouyang and Chen Change Loy and Dahua Lin},\n  title =        {mmdetection},\n  howpublished = {\\url{https://github.com/open-mmlab/mmdetection}},\n  year =         {2018}\n}\n```\n"
  },
  {
    "path": "mmdetection/TECHNICAL_DETAILS.md",
    "content": "## Overview\n\nIn this section, we will introduce the main units of training a detector:\ndata loading, model and iteration pipeline.\n\n## Data loading\n\nFollowing typical conventions, we use `Dataset` and `DataLoader` for data loading\nwith multiple workers. `Dataset` returns a dict of data items corresponding\nthe arguments of models' forward method.\nSince the data in object detection may not be the same size (image size, gt bbox size, etc.),\nwe introduce a new `DataContainer` type in `mmcv` to help collect and distribute\ndata of different size.\nSee [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.\n\n## Model\n\nIn mmdetection, model components are basically categorized as 4 types.\n\n- backbone: usually a FCN network to extract feature maps, e.g., ResNet.\n- neck: the part between backbones and heads, e.g., FPN, ASPP.\n- head: the part for specific tasks, e.g., bbox prediction and mask prediction.\n- roi extractor: the part for extracting features from feature maps, e.g., RoI Align.\n\nWe also write implement some general detection pipelines with the above components,\nsuch as `SingleStageDetector` and `TwoStageDetector`.\n\n### Build a model with basic components\n\nFollowing some basic pipelines (e.g., two-stage detectors), the model structure\ncan be customized through config files with no pains.\n\nIf we want to implement some new components, e.g, the path aggregation\nFPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.\n\n1. create a new file in `mmdet/models/necks/pafpn.py`.\n\n    ```python\n    class PAFPN(nn.Module):\n\n        def __init__(self,\n                    in_channels,\n                    out_channels,\n                    num_outs,\n                    start_level=0,\n                    end_level=-1,\n                    add_extra_convs=False):\n            pass\n        \n        def forward(self, inputs):\n            # implementation is ignored\n            pass\n    ```\n\n2. modify the config file from\n\n    ```python\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5)\n    ```\n\n    to\n\n    ```python\n    neck=dict(\n        type='PAFPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5)\n    ```\n\nWe will release more components (backbones, necks, heads) for research purpose.\n\n### Write a new model\n\nTo write a new detection pipeline, you need to inherit from `BaseDetector`,\nwhich defines the following abstract methods.\n\n- `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).\n- `forward_train()`: forward method of the training mode\n- `simple_test()`: single scale testing without augmentation\n- `aug_test()`: testing with augmentation (multi-scale, flip, etc.)\n\n[TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)\nis a good example which shows how to do that.\n\n## Iteration pipeline\n\nWe adopt distributed training for both single machine and multiple machines.\nSupposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.\n\nEach process keeps an isolated model, data loader, and optimizer.\nModel parameters are only synchronized once at the begining.\nAfter a forward and backward pass, gradients will be allreduced among all GPUs,\nand the optimizer will update model parameters.\nSince the gradients are allreduced, the model parameter stays the same for all processes after the iteration.\n"
  },
  {
    "path": "mmdetection/compile.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\necho \"Building roi align op...\"\ncd mmdet/ops/roi_align\nif [ -d \"build\" ]; then\n    rm -r build\nfi\n$PYTHON setup.py build_ext --inplace\n\necho \"Building roi pool op...\"\ncd ../roi_pool\nif [ -d \"build\" ]; then\n    rm -r build\nfi\n$PYTHON setup.py build_ext --inplace\n\necho \"Building nms op...\"\ncd ../nms\nif [ -d \"build\" ]; then\n    rm -r build\nfi\n$PYTHON setup.py build_ext --inplace\n\necho \"Building dcn...\"\ncd ../dcn\nif [ -d \"build\" ]; then\n    rm -r build\nfi\n$PYTHON setup.py build_ext --inplace\n\necho \"Building sigmoid focal loss op...\"\ncd ../sigmoid_focal_loss\nif [ -d \"build\" ]; then\n    rm -r build\nfi\n$PYTHON setup.py build_ext --inplace\n"
  },
  {
    "path": "mmdetection/configs/cascade_mask_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_mask_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_scales=[2, 4, 8, 16, 32],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[16],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=[\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=None,\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=0,\n        in_channels=2048,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=12000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=6000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_mask_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_mask_rcnn_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_mask_rcnn_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_scales=[2, 4, 8, 16, 32],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[16],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=[\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=12000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=6000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_r50_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_rcnn_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/cascade_rcnn_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/README.md",
    "content": "# Deformable Convolutional Networks\n\n# Introduction\n\n```\n@inproceedings{dai2017deformable,\n  title={Deformable Convolutional Networks},\n  author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},\n  booktitle={Proceedings of the IEEE international conference on computer vision},\n  year={2017}\n}\n\n@article{zhu2018deformable,\n  title={Deformable ConvNets v2: More Deformable, Better Results},\n  author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},\n  journal={arXiv preprint arXiv:1811.11168},\n  year={2018}\n}\n```\n\n## Results and Models\n\n| Backbone  | Model        | Style   | Conv          | Pool   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |\n|:---------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|\n| R-50-FPN  | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 3.9      | 0.594               | 10.2           | 40.0   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-e41688c9.pth) |\n| R-50-FPN  | Faster       | pytorch | mdconv(c3-c5) | -      | 1x      | 3.7      | 0.598               | 10.0           | 40.2   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-1b768045.pth) |\n| R-50-FPN  | Faster       | pytorch | -             | dpool  | 1x      | 4.6      | 0.714               | 8.7            | 37.8   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dpool_r50_fpn_1x_20190125-f4fc1d70.pth) |\n| R-50-FPN  | Faster       | pytorch | -             | mdpool | 1x      | 5.2      | 0.769               | 8.2            | 38.0   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_mdpool_r50_fpn_1x_20190125-473d0f3d.pth) |\n| R-101-FPN | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 5.8      | 0.811               | 8.0            | 42.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-a7e31b65.pth) |\n| X-101-32x4d-FPN | Faster       | pytorch | dconv(c3-c5)  | -      | 1x      | 7.1      | 1.126               | 6.6            | 43.4   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x_20190201-6d46376f.pth) |\n| R-50-FPN  | Mask         | pytorch | dconv(c3-c5)  | -      | 1x      | 4.5      | 0.712               | 7.7            | 41.1   | 37.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-4f94ff79.pth) |\n| R-50-FPN  | Mask         | pytorch | mdconv(c3-c5) | -      | 1x      | 4.5      | 0.712               | 7.7            | 41.3   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_mdconv_c3-c5_r50_fpn_1x_20190125-c5601dc3.pth) |\n| R-101-FPN | Mask         | pytorch | dconv(c3-c5)  | -      | 1x      | 6.4      | 0.939               | 6.5            | 43.2   | 38.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-decb6db5.pth) |\n| R-50-FPN  | Cascade      | pytorch | dconv(c3-c5)  | -      | 1x      | 4.4      | 0.660               | 7.6            | 44.0   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth) |\n| R-101-FPN | Cascade      | pytorch | dconv(c3-c5)  | -      | 1x      | 6.3      | 0.881               | 6.8            | 45.0   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-aaa877cc.pth) |\n| R-50-FPN  | Cascade Mask | pytorch | dconv(c3-c5)  | -      | 1x      | 6.6      | 0.942               | 5.7            | 44.4   | 38.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-09d8a443.pth) |\n| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5)  | -      | 1x      | 8.5      | 1.156               | 5.1            | 45.7   | 39.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_20190125-0d62c190.pth) |\n\n**Notes:**\n\n- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling.\n- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster.\n- **Memory, Train/Inf time is outdated.**"
  },
  {
    "path": "mmdetection/configs/dcn/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ])\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/cascade_rcnn_dconv_c3-c5_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_dconv_c3-c5_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            groups=32,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/faster_rcnn_dpool_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(\n            type='DeformRoIPoolingPack',\n            out_size=7,\n            out_channels=256,\n            no_trans=False,\n            group_size=1,\n            trans_std=0.1),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_dpool_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=True,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_mdconv_c3-c5_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/faster_rcnn_mdpool_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(\n            type='ModulatedDeformRoIPoolingPack',\n            out_size=7,\n            out_channels=256,\n            no_trans=False,\n            group_size=1,\n            trans_std=0.1),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_mdpool_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/dcn/mask_rcnn_dconv_c3-c5_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_dconv_c3-c5_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_mask_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FastRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_mask_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_mask_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='FastRCNN',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=dict(\n        type='BBoxHead',\n        with_avg_pool=True,\n        roi_feat_size=7,\n        in_channels=2048,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=None,\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=0,\n        in_channels=2048,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=14,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_mask_rcnn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_mask_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FastRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FastRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='FastRCNN',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=dict(\n        type='BBoxHead',\n        with_avg_pool=True,\n        roi_feat_size=7,\n        in_channels=2048,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_c4_1x_val2017.pkl',\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_rcnn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fast_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FastRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fast_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_ohem_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='OHEMSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_scales=[2, 4, 8, 16, 32],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[16],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=dict(\n        type='BBoxHead',\n        with_avg_pool=True,\n        roi_feat_size=7,\n        in_channels=2048,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=12000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=6000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/faster_rcnn_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fcos/README.md",
    "content": "# FCOS: Fully Convolutional One-Stage Object Detection\n\n## Introduction\n\n```\n@article{tian2019fcos,\n  title={FCOS: Fully Convolutional One-Stage Object Detection},\n  author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},\n  journal={arXiv preprint arXiv:1904.01355},\n  year={2019}\n}\n```\n\n## Results and Models\n\n| Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |\n|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|\n| R-50-FPN  | caffe   | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_1x-9f253a93.pth) |\n| R-50-FPN  | caffe   | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_fpn_2x-f7329d80.pth) |\n| R-101-FPN | caffe   | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_1x-e4889733.pth) |\n| R-101-FPN | caffe   | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_fpn_2x-42e6f62d.pth) |\n| X-101-64x4d-FPN | caffe   |2x | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_x101_64x4d_fpn_2x-a36c0872.pth) |\n\n**Notes:**\n- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.\n"
  },
  {
    "path": "mmdetection/configs/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py",
    "content": "# model settings\nmodel = dict(\n    type='FCOS',\n    pretrained='open-mmlab://resnet101_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='caffe'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        extra_convs_on_inputs=False,  # use P5\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.01,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='constant',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndevice_ids = range(4)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py",
    "content": "# model settings\nmodel = dict(\n    type='FCOS',\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        extra_convs_on_inputs=False,  # use P5\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.01,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='constant',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndevice_ids = range(8)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py",
    "content": "# model settings\nmodel = dict(\n    type='FCOS',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='caffe'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        extra_convs_on_inputs=False,  # use P5\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.01,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='constant',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndevice_ids = range(4)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/fcos_r50_caffe_fpn_gn_1x_4gpu'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn/README.md",
    "content": "# Group Normalization\n\n## Introduction\n\n```\n@inproceedings{wu2018group,\n  title={Group Normalization},\n  author={Wu, Yuxin and He, Kaiming},\n  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},\n  year={2018}\n}\n```\n\n## Results and Models\n\n| Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |\n|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|\n| R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.8   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |\n| R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |\n| R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.5   | 37.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |\n| R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |\n| R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |\n| R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.0   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |\n\n**Notes:**\n- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).\n- The `3x` schedule is epoch [28, 34, 36].\n- **Memory, Train/Inf time is outdated.**"
  },
  {
    "path": "mmdetection/configs/gn/mask_rcnn_r101_fpn_gn_2x.py",
    "content": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://detectron/resnet101_gn',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        norm_cfg=norm_cfg))\n\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn/mask_rcnn_r50_fpn_gn_2x.py",
    "content": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://detectron/resnet50_gn',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        norm_cfg=norm_cfg))\n\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_gn_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn/mask_rcnn_r50_fpn_gn_contrib_2x.py",
    "content": "# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\n\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://contrib/resnet50_gn',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        norm_cfg=norm_cfg))\n\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn+ws/README.md",
    "content": "# Weight Standardization\n\n## Introduction\n\n```\n@article{weightstandardization,\n  author    = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille},\n  title     = {Weight Standardization},\n  journal   = {arXiv preprint arXiv:1903.10520},\n  year      = {2019},\n}\n```\n\n## Results and Models\n\nFaster R-CNN\n\n| Backbone  | Style   | Normalization | Lr schd | box AP | mask AP | Download |\n|:---------:|:-------:|:-------------:|:-------:|:------:|:-------:|:--------:|\n| R-50-FPN  | pytorch | GN            | 1x      | 37.8   | -       | - |\n| R-50-FPN  | pytorch | GN+WS         | 1x      | 38.9   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/faster_rcnn_r50_fpn_gn_ws_1x_20190418-935d00b6.pth) |\n| R-101-FPN | pytorch | GN            | 1x      | 39.8   | -       | - |\n| R-101-FPN | pytorch | GN+WS         | 1x      | 41.4   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/faster_rcnn_r101_fpn_gn_ws_1x_20190419-728705ec.pth) |\n| X-50-32x4d-FPN | pytorch | GN       | 1x      | 36.5   | -       | - |\n| X-50-32x4d-FPN | pytorch | GN+WS    | 1x      | 39.9   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/faster_rcnn_x50_32x4d_fpn_gn_ws_1x_20190419-4e61072b.pth) |\n| X-101-32x4d-FPN | pytorch | GN      | 1x      | 33.2   | -       | - |\n| X-101-32x4d-FPN | pytorch | GN+WS   | 1x      | 41.8   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/faster_rcnn_x101_32x4d_fpn_gn_ws_1x_20190419-c78e5583.pth) |\n\nMask R-CNN\n\n| Backbone  | Style   | Normalization | Lr schd | box AP | mask AP | Download |\n|:---------:|:-------:|:-------------:|:-------:|:------:|:-------:|:--------:|\n| R-50-FPN  | pytorch | GN            | 2x      | 39.9   | 36.0    | - |\n| R-50-FPN  | pytorch | GN+WS         | 2x      | 40.3   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_r50_fpn_gn_ws_2x_20190419-9ec97bbb.pth) |\n| R-101-FPN | pytorch | GN            | 2x      | 41.6   | 37.3    | - |\n| R-101-FPN | pytorch | GN+WS         | 2x      | 42.0   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_r101_fpn_gn_ws_2x_20190419-bc7399a6.pth) |\n| X-50-32x4d-FPN | pytorch | GN       | 2x      | 39.2   | 35.5    | - |\n| X-50-32x4d-FPN | pytorch | GN+WS    | 2x      | 40.7   | 36.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_x50_32x4d_fpn_gn_ws_2x_20190419-2110205e.pth) |\n| X-101-32x4d-FPN | pytorch | GN      | 2x      | 36.4   | 33.1    | - |\n| X-101-32x4d-FPN | pytorch | GN+WS   | 2x      | 42.1   | 37.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x_20190419-7777b15f.pth) |\n| R-50-FPN  | pytorch | GN            | 20-23-24e | 40.6   | 36.6    | - |\n| R-50-FPN  | pytorch | GN+WS         | 20-23-24e | 41.1   | 37.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e_20190425-1d9e499e.pth) |\n| R-101-FPN | pytorch | GN            | 20-23-24e | 42.3   | 38.1    | - |\n| R-101-FPN | pytorch | GN+WS         | 20-23-24e | 43.0   | 38.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_r101_fpn_gn_ws_20_23_24e_20190425-66cb3792.pth) |\n| X-50-32x4d-FPN | pytorch | GN       | 20-23-24e | 39.6   | 35.9    | - |\n| X-50-32x4d-FPN | pytorch | GN+WS    | 20-23-24e | 41.9   | 37.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_x50_32x4d_fpn_gn_ws_20_23_24e_20190425-d01e2200.pth) |\n| X-101-32x4d-FPN | pytorch | GN      | 20-23-24e | 36.6   | 33.4    | - |\n| X-101-32x4d-FPN | pytorch | GN+WS   | 20-23-24e | 43.4   | 38.7    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ws/mask_rcnn_x101_32x4d_fpn_gn_ws_20_23_24e_20190425-1ff3e5b2.pth) |\n\nNote:\n\n- GN+WS requires about 5% more memory than GN, and it is only 5% slower than GN.\n- In the paper, a 20-23-24e lr schedule is used instead of 2x.\n- The X-50-GN and X-101-GN pretrained models are also shared by the authors."
  },
  {
    "path": "mmdetection/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws_1x.py",
    "content": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='open-mmlab://jhu/resnet50_gn_ws',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_gn_ws_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_20_23_24e.py",
    "content": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://jhu/resnet50_gn_ws',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[20, 23])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_20_23_24e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws_2x.py",
    "content": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://jhu/resnet50_gn_ws',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_gn_ws_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py",
    "content": "# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://jhu/resnext101_32x4d_gn_ws',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='ConvFCBBoxHead',\n        num_shared_convs=4,\n        num_shared_fcs=1,\n        in_channels=256,\n        conv_out_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81,\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_x101_32x4d_fpn_gn_ws_2x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/README.md",
    "content": "# Hybrid Task Cascade for Instance Segmentation\n\n## Introduction\n\nWe provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518).\n\n```\n@inproceedings{chen2019hybrid,\n  title={Hybrid task cascade for instance segmentation},\n  author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin},\n  booktitle={IEEE Conference on Computer Vision and Pattern Recognition},\n  year={2019}\n}\n```\n\n## Dataset\n\nHTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path.\nThe directory should be like this.\n\n```\nmmdetection\n├── mmdet\n├── tools\n├── configs\n├── data\n│   ├── coco\n│   │   ├── annotations\n│   │   ├── train2017\n│   │   ├── val2017\n│   │   ├── test2017\n|   |   ├── stuffthingmaps\n```\n\n## Results and Models\n\nThe results on COCO 2017val is shown in the below table. (results on test-dev are usually slightly higher than val)\n\n| Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |\n|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|\n| R-50-FPN  | pytorch | 1x      | 7.4      | 0.936               | 4.1            | 42.1   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) |\n| R-50-FPN  | pytorch | 20e     | -        | -                   | -              | 43.2   | 38.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) |\n| R-101-FPN | pytorch | 20e     | 9.3      | 1.051               | 4.0            | 44.9   | 39.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) |\n| X-101-32x4d-FPN | pytorch |20e| 5.8      | 0.769               | 3.8            | 46.1   | 40.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |\n| X-101-64x4d-FPN | pytorch |20e| 7.5      | 1.120               | 3.5            | 46.9   | 40.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |\n\n- In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC.\n- We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models.\nIf you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01.\n\nWe also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used.\n\n| Backbone         | Style   | DCN   | training scales | Lr schd | box AP | mask AP | Download |\n|:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:|\n| X-101-64x4d-FPN  | pytorch | c3-c5 | 400~1400        | 20e     | 50.7   | 43.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) |"
  },
  {
    "path": "mmdetection/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_64x4d',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        dcn=dict(\n            modulated=False,\n            groups=64,\n            deformable_groups=1,\n            fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=1,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=[(1600, 400), (1600, 1400)],\n        multiscale_mode='range',\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 19])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_r101_fpn_20e.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='modelzoo://resnet101',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 19])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_r101_fpn_20e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_r50_fpn_20e.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 19])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_r50_fpn_20e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_without_semantic_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='modelzoo://resnet50',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_without_semantic_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_32x4d',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=1,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 19])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_x101_32x4d_fpn_20e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/htc/htc_x101_64x4d_fpn_20e_16gpu.py",
    "content": "# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    num_stages=3,\n    pretrained='open-mmlab://resnext101_64x4d',\n    interleaved=True,\n    mask_info_flow=True,\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=[\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.05, 0.05, 0.1, 0.1],\n            reg_class_agnostic=True),\n        dict(\n            type='SharedFCBBoxHead',\n            num_fcs=2,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=81,\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.033, 0.033, 0.067, 0.067],\n            reg_class_agnostic=True)\n    ],\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='HTCMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81),\n    semantic_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[8]),\n    semantic_head=dict(\n        type='FusedSemanticHead',\n        num_ins=5,\n        fusion_level=1,\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=183,\n        ignore_label=255,\n        loss_weight=0.2))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=[\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.6,\n                min_pos_iou=0.6,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.7,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)\n    ],\n    stage_loss_weights=[1, 0.5, 0.25])\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.001,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5),\n    keep_all_stages=False)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=1,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        seg_scale_factor=1 / 8,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True,\n        with_semantic_seg=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 19])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 20\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/htc_x101_64x4d_fpn_20e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/mask_rcnn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/mask_rcnn_r50_caffe_c4_1x.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe'),\n    shared_head=dict(\n        type='ResLayer',\n        depth=50,\n        stage=3,\n        stride=2,\n        dilation=1,\n        style='caffe',\n        norm_cfg=norm_cfg,\n        norm_eval=True),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_scales=[2, 4, 8, 16, 32],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[16],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=1024,\n        featmap_strides=[16]),\n    bbox_head=dict(\n        type='BBoxHead',\n        with_avg_pool=True,\n        roi_feat_size=7,\n        in_channels=2048,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=None,\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=0,\n        in_channels=2048,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=12000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=14,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=6000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/mask_rcnn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/mask_rcnn_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/mask_rcnn_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=81,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False),\n    mask_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    mask_head=dict(\n        type='FCNMaskHead',\n        num_convs=4,\n        in_channels=256,\n        conv_out_channels=256,\n        num_classes=81))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        mask_size=28,\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05,\n        nms=dict(type='nms', iou_thr=0.5),\n        max_per_img=100,\n        mask_thr_binary=0.5))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=True,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/mask_rcnn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True),\n    bbox_roi_extractor=dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32]),\n    bbox_head=dict(\n        type='SharedFCBBoxHead',\n        num_fcs=2,\n        in_channels=256,\n        fc_out_channels=1024,\n        roi_feat_size=7,\n        num_classes=21,\n        target_means=[0., 0., 0., 0.],\n        target_stds=[0.1, 0.1, 0.2, 0.2],\n        reg_class_agnostic=False))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False),\n    rpn_proposal=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.5,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=512,\n            pos_fraction=0.25,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=True),\n        pos_weight=-1,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_post=1000,\n        max_num=1000,\n        nms_thr=0.7,\n        min_bbox_size=0),\n    rcnn=dict(\n        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)\n    # soft-nms is also supported for rcnn testing\n    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)\n)\n# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',  # to avoid reloading datasets frequently\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=[\n                data_root + 'VOC2007/ImageSets/Main/trainval.txt',\n                data_root + 'VOC2012/ImageSets/Main/trainval.txt'\n            ],\n            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],\n            img_scale=(1000, 600),\n            img_norm_cfg=img_norm_cfg,\n            size_divisor=32,\n            flip_ratio=0.5,\n            with_mask=False,\n            with_crowd=True,\n            with_label=True)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(1000, 600),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=True,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(1000, 600),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(policy='step', step=[3])  # actual epoch = 3 * 3 = 9\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 4  # actual epoch = 4 * 3 = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/faster_rcnn_r50_fpn_1x_voc0712'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/pascal_voc/ssd300_voc.py",
    "content": "# model settings\ninput_size = 300\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe',\n    backbone=dict(\n        type='SSDVGG',\n        input_size=input_size,\n        depth=16,\n        with_last_pool=False,\n        ceil_mode=True,\n        out_indices=(3, 4),\n        out_feature_indices=(22, 34),\n        l2_norm_scale=20),\n    neck=None,\n    bbox_head=dict(\n        type='SSDHead',\n        input_size=input_size,\n        in_channels=(512, 1024, 512, 256, 256, 256),\n        num_classes=21,\n        anchor_strides=(8, 16, 32, 64, 100, 300),\n        basesize_ratio_range=(0.2, 0.9),\n        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),\n        target_means=(.0, .0, .0, .0),\n        target_stds=(0.1, 0.1, 0.2, 0.2)))\ncudnn_benchmark = True\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.,\n        ignore_iof_thr=-1,\n        gt_max_assign_all=False),\n    smoothl1_beta=1.,\n    allowed_border=-1,\n    pos_weight=-1,\n    neg_pos_ratio=3,\n    debug=False)\ntest_cfg = dict(\n    nms=dict(type='nms', iou_thr=0.45),\n    min_bbox_size=0,\n    score_thr=0.02,\n    max_per_img=200)\n# model training and testing settings\n# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=4,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=10,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=[\n                data_root + 'VOC2007/ImageSets/Main/trainval.txt',\n                data_root + 'VOC2012/ImageSets/Main/trainval.txt'\n            ],\n            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],\n            img_scale=(300, 300),\n            img_norm_cfg=img_norm_cfg,\n            size_divisor=None,\n            flip_ratio=0.5,\n            with_mask=False,\n            with_crowd=False,\n            with_label=True,\n            test_mode=False,\n            extra_aug=dict(\n                photo_metric_distortion=dict(\n                    brightness_delta=32,\n                    contrast_range=(0.5, 1.5),\n                    saturation_range=(0.5, 1.5),\n                    hue_delta=18),\n                expand=dict(\n                    mean=img_norm_cfg['mean'],\n                    to_rgb=img_norm_cfg['to_rgb'],\n                    ratio_range=(1, 4)),\n                random_crop=dict(\n                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),\n            resize_keep_ratio=False)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(300, 300),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(300, 300),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False))\n# optimizer\noptimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 20])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/ssd300_voc'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/pascal_voc/ssd512_voc.py",
    "content": "# model settings\ninput_size = 512\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe',\n    backbone=dict(\n        type='SSDVGG',\n        input_size=input_size,\n        depth=16,\n        with_last_pool=False,\n        ceil_mode=True,\n        out_indices=(3, 4),\n        out_feature_indices=(22, 34),\n        l2_norm_scale=20),\n    neck=None,\n    bbox_head=dict(\n        type='SSDHead',\n        input_size=input_size,\n        in_channels=(512, 1024, 512, 256, 256, 256, 256),\n        num_classes=21,\n        anchor_strides=(8, 16, 32, 64, 128, 256, 512),\n        basesize_ratio_range=(0.15, 0.9),\n        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]),\n        target_means=(.0, .0, .0, .0),\n        target_stds=(0.1, 0.1, 0.2, 0.2)))\ncudnn_benchmark = True\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.,\n        ignore_iof_thr=-1,\n        gt_max_assign_all=False),\n    smoothl1_beta=1.,\n    allowed_border=-1,\n    pos_weight=-1,\n    neg_pos_ratio=3,\n    debug=False)\ntest_cfg = dict(\n    nms=dict(type='nms', iou_thr=0.45),\n    min_bbox_size=0,\n    score_thr=0.02,\n    max_per_img=200)\n# model training and testing settings\n# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=4,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=10,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=[\n                data_root + 'VOC2007/ImageSets/Main/trainval.txt',\n                data_root + 'VOC2012/ImageSets/Main/trainval.txt'\n            ],\n            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],\n            img_scale=(512, 512),\n            img_norm_cfg=img_norm_cfg,\n            size_divisor=None,\n            flip_ratio=0.5,\n            with_mask=False,\n            with_crowd=False,\n            with_label=True,\n            test_mode=False,\n            extra_aug=dict(\n                photo_metric_distortion=dict(\n                    brightness_delta=32,\n                    contrast_range=(0.5, 1.5),\n                    saturation_range=(0.5, 1.5),\n                    hue_delta=18),\n                expand=dict(\n                    mean=img_norm_cfg['mean'],\n                    to_rgb=img_norm_cfg['to_rgb'],\n                    ratio_range=(1, 4)),\n                random_crop=dict(\n                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),\n            resize_keep_ratio=False)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(512, 512),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        img_scale=(512, 512),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False))\n# optimizer\noptimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 20])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/ssd512_voc'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/retinanet_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RetinaNet',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5),\n    bbox_head=dict(\n        type='RetinaHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        octave_base_scale=4,\n        scales_per_octave=3,\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[8, 16, 32, 64, 128],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndevice_ids = range(8)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/retinanet_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/retinanet_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RetinaNet',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5),\n    bbox_head=dict(\n        type='RetinaHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        octave_base_scale=4,\n        scales_per_octave=3,\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[8, 16, 32, 64, 128],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndevice_ids = range(8)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/retinanet_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/retinanet_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RetinaNet',\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5),\n    bbox_head=dict(\n        type='RetinaHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        octave_base_scale=4,\n        scales_per_octave=3,\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[8, 16, 32, 64, 128],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndevice_ids = range(8)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/retinanet_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/retinanet_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RetinaNet',\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5),\n    bbox_head=dict(\n        type='RetinaHead',\n        num_classes=81,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        octave_base_scale=4,\n        scales_per_octave=3,\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[8, 16, 32, 64, 128],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0]))\n# training and testing settings\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0,\n        ignore_iof_thr=-1),\n    smoothl1_beta=0.11,\n    gamma=2.0,\n    alpha=0.25,\n    allowed_border=-1,\n    pos_weight=-1,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_thr=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=True),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndevice_ids = range(8)\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/retinanet_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/rpn_r101_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    pretrained='modelzoo://resnet101',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n# runner configs\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/rpn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/rpn_r50_caffe_c4_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    pretrained='open-mmlab://resnet50_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    neck=None,\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_scales=[2, 4, 8, 16, 32],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[16],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=12000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n# runner configs\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/rpn_r50_caffe_c4_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/rpn_r50_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    pretrained='modelzoo://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n# runner configs\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/rpn_r50_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/rpn_x101_32x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n# runner configs\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/rpn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/rpn_x101_64x4d_fpn_1x.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_scales=[8],\n        anchor_ratios=[0.5, 1.0, 2.0],\n        anchor_strides=[4, 8, 16, 32, 64],\n        target_means=[.0, .0, .0, .0],\n        target_stds=[1.0, 1.0, 1.0, 1.0],\n        use_sigmoid_cls=True))\n# model training and testing settings\ntrain_cfg = dict(\n    rpn=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.7,\n            neg_iou_thr=0.3,\n            min_pos_iou=0.3,\n            ignore_iof_thr=-1),\n        sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        allowed_border=0,\n        pos_weight=-1,\n        smoothl1_beta=1 / 9.0,\n        debug=False))\ntest_cfg = dict(\n    rpn=dict(\n        nms_across_levels=False,\n        nms_pre=2000,\n        nms_post=2000,\n        max_num=2000,\n        nms_thr=0.7,\n        min_bbox_size=0))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0.5,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_crowd=False,\n        with_label=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(1333, 800),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=32,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n# runner configs\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 12\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/rpn_r101_fpn_1x'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/ssd300_coco.py",
    "content": "# model settings\ninput_size = 300\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe',\n    backbone=dict(\n        type='SSDVGG',\n        input_size=input_size,\n        depth=16,\n        with_last_pool=False,\n        ceil_mode=True,\n        out_indices=(3, 4),\n        out_feature_indices=(22, 34),\n        l2_norm_scale=20),\n    neck=None,\n    bbox_head=dict(\n        type='SSDHead',\n        input_size=input_size,\n        in_channels=(512, 1024, 512, 256, 256, 256),\n        num_classes=81,\n        anchor_strides=(8, 16, 32, 64, 100, 300),\n        basesize_ratio_range=(0.15, 0.9),\n        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),\n        target_means=(.0, .0, .0, .0),\n        target_stds=(0.1, 0.1, 0.2, 0.2)))\ncudnn_benchmark = True\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.,\n        ignore_iof_thr=-1,\n        gt_max_assign_all=False),\n    smoothl1_beta=1.,\n    allowed_border=-1,\n    pos_weight=-1,\n    neg_pos_ratio=3,\n    debug=False)\ntest_cfg = dict(\n    nms=dict(type='nms', iou_thr=0.45),\n    min_bbox_size=0,\n    score_thr=0.02,\n    max_per_img=200)\n# model training and testing settings\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            img_scale=(300, 300),\n            img_norm_cfg=img_norm_cfg,\n            size_divisor=None,\n            flip_ratio=0.5,\n            with_mask=False,\n            with_crowd=False,\n            with_label=True,\n            test_mode=False,\n            extra_aug=dict(\n                photo_metric_distortion=dict(\n                    brightness_delta=32,\n                    contrast_range=(0.5, 1.5),\n                    saturation_range=(0.5, 1.5),\n                    hue_delta=18),\n                expand=dict(\n                    mean=img_norm_cfg['mean'],\n                    to_rgb=img_norm_cfg['to_rgb'],\n                    ratio_range=(1, 4)),\n                random_crop=dict(\n                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),\n            resize_keep_ratio=False)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(300, 300),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(300, 300),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False))\n# optimizer\noptimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/ssd300_coco'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/configs/ssd512_coco.py",
    "content": "# model settings\ninput_size = 512\nmodel = dict(\n    type='SingleStageDetector',\n    pretrained='open-mmlab://vgg16_caffe',\n    backbone=dict(\n        type='SSDVGG',\n        input_size=input_size,\n        depth=16,\n        with_last_pool=False,\n        ceil_mode=True,\n        out_indices=(3, 4),\n        out_feature_indices=(22, 34),\n        l2_norm_scale=20),\n    neck=None,\n    bbox_head=dict(\n        type='SSDHead',\n        input_size=input_size,\n        in_channels=(512, 1024, 512, 256, 256, 256, 256),\n        num_classes=81,\n        anchor_strides=(8, 16, 32, 64, 128, 256, 512),\n        basesize_ratio_range=(0.1, 0.9),\n        anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]),\n        target_means=(.0, .0, .0, .0),\n        target_stds=(0.1, 0.1, 0.2, 0.2)))\ncudnn_benchmark = True\ntrain_cfg = dict(\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.,\n        ignore_iof_thr=-1,\n        gt_max_assign_all=False),\n    smoothl1_beta=1.,\n    allowed_border=-1,\n    pos_weight=-1,\n    neg_pos_ratio=3,\n    debug=False)\ntest_cfg = dict(\n    nms=dict(type='nms', iou_thr=0.45),\n    min_bbox_size=0,\n    score_thr=0.02,\n    max_per_img=200)\n# model training and testing settings\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ndata = dict(\n    imgs_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            img_scale=(512, 512),\n            img_norm_cfg=img_norm_cfg,\n            size_divisor=None,\n            flip_ratio=0.5,\n            with_mask=False,\n            with_crowd=False,\n            with_label=True,\n            test_mode=False,\n            extra_aug=dict(\n                photo_metric_distortion=dict(\n                    brightness_delta=32,\n                    contrast_range=(0.5, 1.5),\n                    saturation_range=(0.5, 1.5),\n                    hue_delta=18),\n                expand=dict(\n                    mean=img_norm_cfg['mean'],\n                    to_rgb=img_norm_cfg['to_rgb'],\n                    ratio_range=(1, 4)),\n                random_crop=dict(\n                    min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),\n            resize_keep_ratio=False)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(512, 512),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        img_scale=(512, 512),\n        img_norm_cfg=img_norm_cfg,\n        size_divisor=None,\n        flip_ratio=0,\n        with_mask=False,\n        with_label=False,\n        test_mode=True,\n        resize_keep_ratio=False))\n# optimizer\noptimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 24\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/ssd512_coco'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "mmdetection/mmdet/__init__.py",
    "content": "from .version import __version__, short_version\n\n__all__ = ['__version__', 'short_version']\n"
  },
  {
    "path": "mmdetection/mmdet/apis/__init__.py",
    "content": "from .env import init_dist, get_root_logger, set_random_seed\nfrom .train import train_detector\nfrom .inference import init_detector, inference_detector, show_result\n\n__all__ = [\n    'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',\n    'init_detector', 'inference_detector', 'show_result'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/apis/env.py",
    "content": "import logging\nimport os\nimport random\nimport subprocess\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nimport torch.multiprocessing as mp\nfrom mmcv.runner import get_dist_info\n\n\ndef init_dist(launcher, backend='nccl', **kwargs):\n    if mp.get_start_method(allow_none=True) is None:\n        mp.set_start_method('spawn')\n    if launcher == 'pytorch':\n        _init_dist_pytorch(backend, **kwargs)\n    elif launcher == 'mpi':\n        _init_dist_mpi(backend, **kwargs)\n    elif launcher == 'slurm':\n        _init_dist_slurm(backend, **kwargs)\n    else:\n        raise ValueError('Invalid launcher type: {}'.format(launcher))\n\n\ndef _init_dist_pytorch(backend, **kwargs):\n    # TODO: use local_rank instead of rank % num_gpus\n    rank = int(os.environ['RANK'])\n    num_gpus = torch.cuda.device_count()\n    torch.cuda.set_device(rank % num_gpus)\n    dist.init_process_group(backend=backend, **kwargs)\n\n\ndef _init_dist_mpi(backend, **kwargs):\n    raise NotImplementedError\n\n\ndef _init_dist_slurm(backend, port=29500, **kwargs):\n    proc_id = int(os.environ['SLURM_PROCID'])\n    ntasks = int(os.environ['SLURM_NTASKS'])\n    node_list = os.environ['SLURM_NODELIST']\n    num_gpus = torch.cuda.device_count()\n    torch.cuda.set_device(proc_id % num_gpus)\n    addr = subprocess.getoutput(\n        'scontrol show hostname {} | head -n1'.format(node_list))\n    os.environ['MASTER_PORT'] = str(port)\n    os.environ['MASTER_ADDR'] = addr\n    os.environ['WORLD_SIZE'] = str(ntasks)\n    os.environ['RANK'] = str(proc_id)\n    dist.init_process_group(backend=backend)\n\n\ndef set_random_seed(seed):\n    random.seed(seed)\n    np.random.seed(seed)\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n\n\ndef get_root_logger(log_level=logging.INFO):\n    logger = logging.getLogger()\n    if not logger.hasHandlers():\n        logging.basicConfig(\n            format='%(asctime)s - %(levelname)s - %(message)s',\n            level=log_level)\n    rank, _ = get_dist_info()\n    if rank != 0:\n        logger.setLevel('ERROR')\n    return logger\n"
  },
  {
    "path": "mmdetection/mmdet/apis/inference.py",
    "content": "import warnings\n\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as maskUtils\nimport torch\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet.core import get_classes\nfrom mmdet.datasets import to_tensor\nfrom mmdet.datasets.transforms import ImageTransform\nfrom mmdet.models import build_detector\n\n\ndef init_detector(config, checkpoint=None, device='cuda:0'):\n    \"\"\"Initialize a detector from config file.\n\n    Args:\n        config (str or :obj:`mmcv.Config`): Config file path or the config\n            object.\n        checkpoint (str, optional): Checkpoint path. If left as None, the model\n            will not load any weights.\n\n    Returns:\n        nn.Module: The constructed detector.\n    \"\"\"\n    if isinstance(config, str):\n        config = mmcv.Config.fromfile(config)\n    elif not isinstance(config, mmcv.Config):\n        raise TypeError('config must be a filename or Config object, '\n                        'but got {}'.format(type(config)))\n    config.model.pretrained = None\n    model = build_detector(config.model, test_cfg=config.test_cfg)\n    if checkpoint is not None:\n        checkpoint = load_checkpoint(model, checkpoint)\n        if 'CLASSES' in checkpoint['meta']:\n            model.CLASSES = checkpoint['meta']['classes']\n        else:\n            warnings.warn('Class names are not saved in the checkpoint\\'s '\n                          'meta data, use COCO classes by default.')\n            model.CLASSES = get_classes('coco')\n    model.cfg = config  # save the config in the model for convenience\n    model.to(device)\n    model.eval()\n    return model\n\n\ndef inference_detector(model, imgs):\n    \"\"\"Inference image(s) with the detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        imgs (str/ndarray or list[str/ndarray]): Either image files or loaded\n            images.\n\n    Returns:\n        If imgs is a str, a generator will be returned, otherwise return the\n        detection results directly.\n    \"\"\"\n    cfg = model.cfg\n    img_transform = ImageTransform(\n        size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg)\n\n    device = next(model.parameters()).device  # model device\n    if not isinstance(imgs, list):\n        return _inference_single(model, imgs, img_transform, device)\n    else:\n        return _inference_generator(model, imgs, img_transform, device)\n\n\ndef _prepare_data(img, img_transform, cfg, device):\n    ori_shape = img.shape\n    img, img_shape, pad_shape, scale_factor = img_transform(\n        img,\n        scale=cfg.data.test.img_scale,\n        keep_ratio=cfg.data.test.get('resize_keep_ratio', True))\n    img = to_tensor(img).to(device).unsqueeze(0)\n    img_meta = [\n        dict(\n            ori_shape=ori_shape,\n            img_shape=img_shape,\n            pad_shape=pad_shape,\n            scale_factor=scale_factor,\n            flip=False)\n    ]\n    return dict(img=[img], img_meta=[img_meta])\n\n\ndef _inference_single(model, img, img_transform, device):\n    img = mmcv.imread(img)\n    data = _prepare_data(img, img_transform, model.cfg, device)\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result\n\n\ndef _inference_generator(model, imgs, img_transform, device):\n    for img in imgs:\n        yield _inference_single(model, img, img_transform, device)\n\n\n# TODO: merge this method with the one in BaseDetector\ndef show_result(img, result, class_names, score_thr=0.3, out_file=None):\n    \"\"\"Visualize the detection results on the image.\n\n    Args:\n        img (str or np.ndarray): Image filename or loaded image.\n        result (tuple[list] or list): The detection result, can be either\n            (bbox, segm) or just bbox.\n        class_names (list[str] or tuple[str]): A list of class names.\n        score_thr (float): The threshold to visualize the bboxes and masks.\n        out_file (str, optional): If specified, the visualization result will\n            be written to the out file instead of shown in a window.\n    \"\"\"\n    assert isinstance(class_names, (tuple, list))\n    img = mmcv.imread(img)\n    if isinstance(result, tuple):\n        bbox_result, segm_result = result\n    else:\n        bbox_result, segm_result = result, None\n    bboxes = np.vstack(bbox_result)\n    # draw segmentation masks\n    if segm_result is not None:\n        segms = mmcv.concat_list(segm_result)\n        inds = np.where(bboxes[:, -1] > score_thr)[0]\n        for i in inds:\n            color_mask = np.random.randint(\n                0, 256, (1, 3), dtype=np.uint8)\n            mask = maskUtils.decode(segms[i]).astype(np.bool)\n            img[mask] = img[mask] * 0.5 + color_mask * 0.5\n    # draw bounding boxes\n    labels = [\n        np.full(bbox.shape[0], i, dtype=np.int32)\n        for i, bbox in enumerate(bbox_result)\n    ]\n    labels = np.concatenate(labels)\n    mmcv.imshow_det_bboxes(\n        img.copy(),\n        bboxes,\n        labels,\n        class_names=class_names,\n        score_thr=score_thr,\n        show=out_file is None,\n        out_file=out_file)\n"
  },
  {
    "path": "mmdetection/mmdet/apis/train.py",
    "content": "from __future__ import division\n\nimport re\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv.runner import Runner, DistSamplerSeedHook, obj_from_dict\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\n\nfrom mmdet import datasets\nfrom mmdet.core import (DistOptimizerHook, DistEvalmAPHook,\n                        CocoDistEvalRecallHook, CocoDistEvalmAPHook)\nfrom mmdet.datasets import build_dataloader\nfrom mmdet.models import RPN\nfrom .env import get_root_logger\n\n\ndef parse_losses(losses):\n    log_vars = OrderedDict()\n    for loss_name, loss_value in losses.items():\n        if isinstance(loss_value, torch.Tensor):\n            log_vars[loss_name] = loss_value.mean()\n        elif isinstance(loss_value, list):\n            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)\n        else:\n            raise TypeError(\n                '{} is not a tensor or list of tensors'.format(loss_name))\n\n    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)\n\n    log_vars['loss'] = loss\n    for name in log_vars:\n        log_vars[name] = log_vars[name].item()\n\n    return loss, log_vars\n\n\ndef batch_processor(model, data, train_mode):\n    losses = model(**data)\n    loss, log_vars = parse_losses(losses)\n\n    outputs = dict(\n        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))\n\n    return outputs\n\n\ndef train_detector(model,\n                   dataset,\n                   cfg,\n                   distributed=False,\n                   validate=False,\n                   logger=None):\n    if logger is None:\n        logger = get_root_logger(cfg.log_level)\n\n    # start training\n    if distributed:\n        _dist_train(model, dataset, cfg, validate=validate)\n    else:\n        _non_dist_train(model, dataset, cfg, validate=validate)\n\n\ndef build_optimizer(model, optimizer_cfg):\n    \"\"\"Build optimizer from configs.\n\n    Args:\n        model (:obj:`nn.Module`): The model with parameters to be optimized.\n        optimizer_cfg (dict): The config dict of the optimizer.\n            Positional fields are:\n                - type: class name of the optimizer.\n                - lr: base learning rate.\n            Optional fields are:\n                - any arguments of the corresponding optimizer type, e.g.,\n                  weight_decay, momentum, etc.\n                - paramwise_options: a dict with 3 accepted fileds\n                  (bias_lr_mult, bias_decay_mult, norm_decay_mult).\n                  `bias_lr_mult` and `bias_decay_mult` will be multiplied to\n                  the lr and weight decay respectively for all bias parameters\n                  (except for the normalization layers), and\n                  `norm_decay_mult` will be multiplied to the weight decay\n                  for all weight and bias parameters of normalization layers.\n\n    Returns:\n        torch.optim.Optimizer: The initialized optimizer.\n    \"\"\"\n    if hasattr(model, 'module'):\n        model = model.module\n\n    optimizer_cfg = optimizer_cfg.copy()\n    paramwise_options = optimizer_cfg.pop('paramwise_options', None)\n    # if no paramwise option is specified, just use the global setting\n    if paramwise_options is None:\n        return obj_from_dict(optimizer_cfg, torch.optim,\n                             dict(params=model.parameters()))\n    else:\n        assert isinstance(paramwise_options, dict)\n        # get base lr and weight decay\n        base_lr = optimizer_cfg['lr']\n        base_wd = optimizer_cfg.get('weight_decay', None)\n        # weight_decay must be explicitly specified if mult is specified\n        if ('bias_decay_mult' in paramwise_options\n                or 'norm_decay_mult' in paramwise_options):\n            assert base_wd is not None\n        # get param-wise options\n        bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)\n        bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)\n        norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)\n        # set param-wise lr and weight decay\n        params = []\n        for name, param in model.named_parameters():\n            if not param.requires_grad:\n                continue\n\n            param_group = {'params': [param]}\n            # for norm layers, overwrite the weight decay of weight and bias\n            # TODO: obtain the norm layer prefixes dynamically\n            if re.search(r'(bn|gn)(\\d+)?.(weight|bias)', name):\n                if base_wd is not None:\n                    param_group['weight_decay'] = base_wd * norm_decay_mult\n            # for other layers, overwrite both lr and weight decay of bias\n            elif name.endswith('.bias'):\n                param_group['lr'] = base_lr * bias_lr_mult\n                if base_wd is not None:\n                    param_group['weight_decay'] = base_wd * bias_decay_mult\n            # otherwise use the global settings\n\n            params.append(param_group)\n\n        optimizer_cls = getattr(torch.optim, optimizer_cfg.pop('type'))\n        return optimizer_cls(params, **optimizer_cfg)\n\n\ndef _dist_train(model, dataset, cfg, validate=False):\n    # prepare data loaders\n    data_loaders = [\n        build_dataloader(\n            dataset,\n            cfg.data.imgs_per_gpu,\n            cfg.data.workers_per_gpu,\n            dist=True)\n    ]\n    # put model on gpus\n    model = MMDistributedDataParallel(model.cuda())\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,\n                    cfg.log_level)\n    # register hooks\n    optimizer_config = DistOptimizerHook(**cfg.optimizer_config)\n    runner.register_training_hooks(cfg.lr_config, optimizer_config,\n                                   cfg.checkpoint_config, cfg.log_config)\n    runner.register_hook(DistSamplerSeedHook())\n    # register eval hooks\n    if validate:\n        val_dataset_cfg = cfg.data.val\n        if isinstance(model.module, RPN):\n            # TODO: implement recall hooks for other datasets\n            runner.register_hook(CocoDistEvalRecallHook(val_dataset_cfg))\n        else:\n            dataset_type = getattr(datasets, val_dataset_cfg.type)\n            if issubclass(dataset_type, datasets.CocoDataset):\n                runner.register_hook(CocoDistEvalmAPHook(val_dataset_cfg))\n            else:\n                runner.register_hook(DistEvalmAPHook(val_dataset_cfg))\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)\n\n\ndef _non_dist_train(model, dataset, cfg, validate=False):\n    # prepare data loaders\n    data_loaders = [\n        build_dataloader(\n            dataset,\n            cfg.data.imgs_per_gpu,\n            cfg.data.workers_per_gpu,\n            cfg.gpus,\n            dist=False)\n    ]\n    # put model on gpus\n    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n    runner = Runner(model, batch_processor, optimizer, cfg.work_dir,\n                    cfg.log_level)\n    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,\n                                   cfg.checkpoint_config, cfg.log_config)\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)\n"
  },
  {
    "path": "mmdetection/mmdet/core/__init__.py",
    "content": "from .anchor import *  # noqa: F401, F403\nfrom .bbox import *  # noqa: F401, F403\nfrom .mask import *  # noqa: F401, F403\nfrom .loss import *  # noqa: F401, F403\nfrom .evaluation import *  # noqa: F401, F403\nfrom .post_processing import *  # noqa: F401, F403\nfrom .utils import *  # noqa: F401, F403\n"
  },
  {
    "path": "mmdetection/mmdet/core/anchor/__init__.py",
    "content": "from .anchor_generator import AnchorGenerator\nfrom .anchor_target import anchor_target\n\n__all__ = ['AnchorGenerator', 'anchor_target']\n"
  },
  {
    "path": "mmdetection/mmdet/core/anchor/anchor_generator.py",
    "content": "import torch\n\n\nclass AnchorGenerator(object):\n\n    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):\n        self.base_size = base_size\n        self.scales = torch.Tensor(scales)\n        self.ratios = torch.Tensor(ratios)\n        self.scale_major = scale_major\n        self.ctr = ctr\n        self.base_anchors = self.gen_base_anchors()\n\n    @property\n    def num_base_anchors(self):\n        return self.base_anchors.size(0)\n\n    def gen_base_anchors(self):\n        w = self.base_size\n        h = self.base_size\n        if self.ctr is None:\n            x_ctr = 0.5 * (w - 1)\n            y_ctr = 0.5 * (h - 1)\n        else:\n            x_ctr, y_ctr = self.ctr\n\n        h_ratios = torch.sqrt(self.ratios)\n        w_ratios = 1 / h_ratios\n        if self.scale_major:\n            ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)\n            hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)\n        else:\n            ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)\n            hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)\n\n        base_anchors = torch.stack(\n            [\n                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),\n                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)\n            ],\n            dim=-1).round()\n\n        return base_anchors\n\n    def _meshgrid(self, x, y, row_major=True):\n        xx = x.repeat(len(y))\n        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)\n        if row_major:\n            return xx, yy\n        else:\n            return yy, xx\n\n    def grid_anchors(self, featmap_size, stride=16, device='cuda'):\n        base_anchors = self.base_anchors.to(device)\n\n        feat_h, feat_w = featmap_size\n        shift_x = torch.arange(0, feat_w, device=device) * stride\n        shift_y = torch.arange(0, feat_h, device=device) * stride\n        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)\n        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)\n        shifts = shifts.type_as(base_anchors)\n        # first feat_w elements correspond to the first row of shifts\n        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get\n        # shifted anchors (K, A, 4), reshape to (K*A, 4)\n\n        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]\n        all_anchors = all_anchors.view(-1, 4)\n        # first A rows correspond to A anchors of (0, 0) in feature map,\n        # then (0, 1), (0, 2), ...\n        return all_anchors\n\n    def valid_flags(self, featmap_size, valid_size, device='cuda'):\n        feat_h, feat_w = featmap_size\n        valid_h, valid_w = valid_size\n        assert valid_h <= feat_h and valid_w <= feat_w\n        valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)\n        valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)\n        valid_x[:valid_w] = 1\n        valid_y[:valid_h] = 1\n        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)\n        valid = valid_xx & valid_yy\n        valid = valid[:, None].expand(\n            valid.size(0), self.num_base_anchors).contiguous().view(-1)\n        return valid\n"
  },
  {
    "path": "mmdetection/mmdet/core/anchor/anchor_target.py",
    "content": "import torch\n\nfrom ..bbox import assign_and_sample, build_assigner, PseudoSampler, bbox2delta\nfrom ..utils import multi_apply\n\n\ndef anchor_target(anchor_list,\n                  valid_flag_list,\n                  gt_bboxes_list,\n                  img_metas,\n                  target_means,\n                  target_stds,\n                  cfg,\n                  gt_bboxes_ignore_list=None,\n                  gt_labels_list=None,\n                  label_channels=1,\n                  sampling=True,\n                  unmap_outputs=True):\n    \"\"\"Compute regression and classification targets for anchors.\n\n    Args:\n        anchor_list (list[list]): Multi level anchors of each image.\n        valid_flag_list (list[list]): Multi level valid flags of each image.\n        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n        img_metas (list[dict]): Meta info of each image.\n        target_means (Iterable): Mean value of regression targets.\n        target_stds (Iterable): Std value of regression targets.\n        cfg (dict): RPN train configs.\n\n    Returns:\n        tuple\n    \"\"\"\n    num_imgs = len(img_metas)\n    assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n    # anchor number of multi levels\n    num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n    # concat all level anchors and flags to a single tensor\n    for i in range(num_imgs):\n        assert len(anchor_list[i]) == len(valid_flag_list[i])\n        anchor_list[i] = torch.cat(anchor_list[i])\n        valid_flag_list[i] = torch.cat(valid_flag_list[i])\n\n    # compute targets for each image\n    if gt_bboxes_ignore_list is None:\n        gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n    if gt_labels_list is None:\n        gt_labels_list = [None for _ in range(num_imgs)]\n    (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,\n     pos_inds_list, neg_inds_list) = multi_apply(\n         anchor_target_single,\n         anchor_list,\n         valid_flag_list,\n         gt_bboxes_list,\n         gt_bboxes_ignore_list,\n         gt_labels_list,\n         img_metas,\n         target_means=target_means,\n         target_stds=target_stds,\n         cfg=cfg,\n         label_channels=label_channels,\n         sampling=sampling,\n         unmap_outputs=unmap_outputs)\n    # no valid anchors\n    if any([labels is None for labels in all_labels]):\n        return None\n    # sampled anchors of all images\n    num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n    num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n    # split targets to a list w.r.t. multiple levels\n    labels_list = images_to_levels(all_labels, num_level_anchors)\n    label_weights_list = images_to_levels(all_label_weights, num_level_anchors)\n    bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)\n    bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)\n    return (labels_list, label_weights_list, bbox_targets_list,\n            bbox_weights_list, num_total_pos, num_total_neg)\n\n\ndef images_to_levels(target, num_level_anchors):\n    \"\"\"Convert targets by image to targets by feature level.\n\n    [target_img0, target_img1] -> [target_level0, target_level1, ...]\n    \"\"\"\n    target = torch.stack(target, 0)\n    level_targets = []\n    start = 0\n    for n in num_level_anchors:\n        end = start + n\n        level_targets.append(target[:, start:end].squeeze(0))\n        start = end\n    return level_targets\n\n\ndef anchor_target_single(flat_anchors,\n                         valid_flags,\n                         gt_bboxes,\n                         gt_bboxes_ignore,\n                         gt_labels,\n                         img_meta,\n                         target_means,\n                         target_stds,\n                         cfg,\n                         label_channels=1,\n                         sampling=True,\n                         unmap_outputs=True):\n    inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                       img_meta['img_shape'][:2],\n                                       cfg.allowed_border)\n    if not inside_flags.any():\n        return (None, ) * 6\n    # assign gt and sample anchors\n    anchors = flat_anchors[inside_flags, :]\n\n    if sampling:\n        assign_result, sampling_result = assign_and_sample(\n            anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)\n    else:\n        bbox_assigner = build_assigner(cfg.assigner)\n        assign_result = bbox_assigner.assign(anchors, gt_bboxes,\n                                             gt_bboxes_ignore, gt_labels)\n        bbox_sampler = PseudoSampler()\n        sampling_result = bbox_sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n    num_valid_anchors = anchors.shape[0]\n    bbox_targets = torch.zeros_like(anchors)\n    bbox_weights = torch.zeros_like(anchors)\n    labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)\n    label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n    pos_inds = sampling_result.pos_inds\n    neg_inds = sampling_result.neg_inds\n    if len(pos_inds) > 0:\n        pos_bbox_targets = bbox2delta(sampling_result.pos_bboxes,\n                                      sampling_result.pos_gt_bboxes,\n                                      target_means, target_stds)\n        bbox_targets[pos_inds, :] = pos_bbox_targets\n        bbox_weights[pos_inds, :] = 1.0\n        if gt_labels is None:\n            labels[pos_inds] = 1\n        else:\n            labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        if cfg.pos_weight <= 0:\n            label_weights[pos_inds] = 1.0\n        else:\n            label_weights[pos_inds] = cfg.pos_weight\n    if len(neg_inds) > 0:\n        label_weights[neg_inds] = 1.0\n\n    # map up to original set of anchors\n    if unmap_outputs:\n        num_total_anchors = flat_anchors.size(0)\n        labels = unmap(labels, num_total_anchors, inside_flags)\n        label_weights = unmap(label_weights, num_total_anchors, inside_flags)\n        bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n        bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n    return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n            neg_inds)\n\n\ndef anchor_inside_flags(flat_anchors, valid_flags, img_shape,\n                        allowed_border=0):\n    img_h, img_w = img_shape[:2]\n    if allowed_border >= 0:\n        inside_flags = valid_flags & \\\n            (flat_anchors[:, 0] >= -allowed_border) & \\\n            (flat_anchors[:, 1] >= -allowed_border) & \\\n            (flat_anchors[:, 2] < img_w + allowed_border) & \\\n            (flat_anchors[:, 3] < img_h + allowed_border)\n    else:\n        inside_flags = valid_flags\n    return inside_flags\n\n\ndef unmap(data, count, inds, fill=0):\n    \"\"\" Unmap a subset of item (data) back to the original set of items (of\n    size count) \"\"\"\n    if data.dim() == 1:\n        ret = data.new_full((count, ), fill)\n        ret[inds] = data\n    else:\n        new_size = (count, ) + data.size()[1:]\n        ret = data.new_full(new_size, fill)\n        ret[inds, :] = data\n    return ret\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/__init__.py",
    "content": "from .geometry import bbox_overlaps\nfrom .assigners import BaseAssigner, MaxIoUAssigner, AssignResult\nfrom .samplers import (BaseSampler, PseudoSampler, RandomSampler,\n                       InstanceBalancedPosSampler, IoUBalancedNegSampler,\n                       CombinedSampler, SamplingResult)\nfrom .assign_sampling import build_assigner, build_sampler, assign_and_sample\nfrom .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,\n                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result,\n                         distance2bbox)\nfrom .bbox_target import bbox_target\n\n__all__ = [\n    'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',\n    'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',\n    'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',\n    'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',\n    'distance2bbox', 'bbox_target'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/assign_sampling.py",
    "content": "import mmcv\n\nfrom . import assigners, samplers\n\n\ndef build_assigner(cfg, **kwargs):\n    if isinstance(cfg, assigners.BaseAssigner):\n        return cfg\n    elif isinstance(cfg, dict):\n        return mmcv.runner.obj_from_dict(\n            cfg, assigners, default_args=kwargs)\n    else:\n        raise TypeError('Invalid type {} for building a sampler'.format(\n            type(cfg)))\n\n\ndef build_sampler(cfg, **kwargs):\n    if isinstance(cfg, samplers.BaseSampler):\n        return cfg\n    elif isinstance(cfg, dict):\n        return mmcv.runner.obj_from_dict(\n            cfg, samplers, default_args=kwargs)\n    else:\n        raise TypeError('Invalid type {} for building a sampler'.format(\n            type(cfg)))\n\n\ndef assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):\n    bbox_assigner = build_assigner(cfg.assigner)\n    bbox_sampler = build_sampler(cfg.sampler)\n    assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,\n                                         gt_labels)\n    sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,\n                                          gt_labels)\n    return assign_result, sampling_result\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/assigners/__init__.py",
    "content": "from .base_assigner import BaseAssigner\nfrom .max_iou_assigner import MaxIoUAssigner\nfrom .assign_result import AssignResult\n\n__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/assigners/assign_result.py",
    "content": "import torch\n\n\nclass AssignResult(object):\n\n    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):\n        self.num_gts = num_gts\n        self.gt_inds = gt_inds\n        self.max_overlaps = max_overlaps\n        self.labels = labels\n\n    def add_gt_(self, gt_labels):\n        self_inds = torch.arange(\n            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)\n        self.gt_inds = torch.cat([self_inds, self.gt_inds])\n        self.max_overlaps = torch.cat(\n            [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])\n        if self.labels is not None:\n            self.labels = torch.cat([gt_labels, self.labels])\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/assigners/base_assigner.py",
    "content": "from abc import ABCMeta, abstractmethod\n\n\nclass BaseAssigner(metaclass=ABCMeta):\n\n    @abstractmethod\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        pass\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py",
    "content": "import torch\n\nfrom .base_assigner import BaseAssigner\nfrom .assign_result import AssignResult\nfrom ..geometry import bbox_overlaps\n\n\nclass MaxIoUAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with `-1`, `0`, or a positive integer\n    indicating the ground truth index.\n\n    - -1: don't care\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        pos_iou_thr (float): IoU threshold for positive bboxes.\n        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.\n        min_pos_iou (float): Minimum iou for a bbox to be considered as a\n            positive bbox. Positive samples can have smaller IoU than\n            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).\n        gt_max_assign_all (bool): Whether to assign all bboxes with the same\n            highest overlap with some gt to that gt.\n        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if\n            `gt_bboxes_ignore` is specified). Negative values mean not\n            ignoring any bboxes.\n        ignore_wrt_candidates (bool): Whether to compute the iof between\n            `bboxes` and `gt_bboxes_ignore`, or the contrary.\n    \"\"\"\n\n    def __init__(self,\n                 pos_iou_thr,\n                 neg_iou_thr,\n                 min_pos_iou=.0,\n                 gt_max_assign_all=True,\n                 ignore_iof_thr=-1,\n                 ignore_wrt_candidates=True):\n        self.pos_iou_thr = pos_iou_thr\n        self.neg_iou_thr = neg_iou_thr\n        self.min_pos_iou = min_pos_iou\n        self.gt_max_assign_all = gt_max_assign_all\n        self.ignore_iof_thr = ignore_iof_thr\n        self.ignore_wrt_candidates = ignore_wrt_candidates\n\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        \"\"\"Assign gt to bboxes.\n\n        This method assign a gt bbox to every bbox (proposal/anchor), each bbox\n        will be assigned with -1, 0, or a positive number. -1 means don't care,\n        0 means negative sample, positive number is the index (1-based) of\n        assigned gt.\n        The assignment is done in following steps, the order matters.\n\n        1. assign every bbox to -1\n        2. assign proposals whose iou with all gts < neg_iou_thr to 0\n        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,\n           assign it to that bbox\n        4. for each gt bbox, assign its nearest proposals (may be more than\n           one) to itself\n\n        Args:\n            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:\n            raise ValueError('No gt or bboxes')\n        bboxes = bboxes[:, :4]\n        overlaps = bbox_overlaps(gt_bboxes, bboxes)\n\n        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (\n                gt_bboxes_ignore.numel() > 0):\n            if self.ignore_wrt_candidates:\n                ignore_overlaps = bbox_overlaps(\n                    bboxes, gt_bboxes_ignore, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)\n            else:\n                ignore_overlaps = bbox_overlaps(\n                    gt_bboxes_ignore, bboxes, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)\n            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1\n\n        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)\n        return assign_result\n\n    def assign_wrt_overlaps(self, overlaps, gt_labels=None):\n        \"\"\"Assign w.r.t. the overlaps of bboxes with gts.\n\n        Args:\n            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,\n                shape(k, n).\n            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        if overlaps.numel() == 0:\n            raise ValueError('No gt or proposals')\n\n        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = overlaps.new_full(\n            (num_bboxes, ), -1, dtype=torch.long)\n\n        # for each anchor, which gt best overlaps with it\n        # for each anchor, the max iou of all gts\n        max_overlaps, argmax_overlaps = overlaps.max(dim=0)\n        # for each gt, which anchor best overlaps with it\n        # for each gt, the max iou of all proposals\n        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)\n\n        # 2. assign negative: below\n        if isinstance(self.neg_iou_thr, float):\n            assigned_gt_inds[(max_overlaps >= 0)\n                             & (max_overlaps < self.neg_iou_thr)] = 0\n        elif isinstance(self.neg_iou_thr, tuple):\n            assert len(self.neg_iou_thr) == 2\n            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])\n                             & (max_overlaps < self.neg_iou_thr[1])] = 0\n\n        # 3. assign positive: above positive IoU threshold\n        pos_inds = max_overlaps >= self.pos_iou_thr\n        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1\n\n        # 4. assign fg: for each gt, proposals with highest IoU\n        for i in range(num_gts):\n            if gt_max_overlaps[i] >= self.min_pos_iou:\n                if self.gt_max_assign_all:\n                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]\n                    assigned_gt_inds[max_iou_inds] = i + 1\n                else:\n                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))\n            pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/bbox_target.py",
    "content": "import torch\n\nfrom .transforms import bbox2delta\nfrom ..utils import multi_apply\n\n\ndef bbox_target(pos_bboxes_list,\n                neg_bboxes_list,\n                pos_gt_bboxes_list,\n                pos_gt_labels_list,\n                cfg,\n                reg_classes=1,\n                target_means=[.0, .0, .0, .0],\n                target_stds=[1.0, 1.0, 1.0, 1.0],\n                concat=True):\n    labels, label_weights, bbox_targets, bbox_weights = multi_apply(\n        bbox_target_single,\n        pos_bboxes_list,\n        neg_bboxes_list,\n        pos_gt_bboxes_list,\n        pos_gt_labels_list,\n        cfg=cfg,\n        reg_classes=reg_classes,\n        target_means=target_means,\n        target_stds=target_stds)\n\n    if concat:\n        labels = torch.cat(labels, 0)\n        label_weights = torch.cat(label_weights, 0)\n        bbox_targets = torch.cat(bbox_targets, 0)\n        bbox_weights = torch.cat(bbox_weights, 0)\n    return labels, label_weights, bbox_targets, bbox_weights\n\n\ndef bbox_target_single(pos_bboxes,\n                       neg_bboxes,\n                       pos_gt_bboxes,\n                       pos_gt_labels,\n                       cfg,\n                       reg_classes=1,\n                       target_means=[.0, .0, .0, .0],\n                       target_stds=[1.0, 1.0, 1.0, 1.0]):\n    num_pos = pos_bboxes.size(0)\n    num_neg = neg_bboxes.size(0)\n    num_samples = num_pos + num_neg\n    labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)\n    label_weights = pos_bboxes.new_zeros(num_samples)\n    bbox_targets = pos_bboxes.new_zeros(num_samples, 4)\n    bbox_weights = pos_bboxes.new_zeros(num_samples, 4)\n    if num_pos > 0:\n        labels[:num_pos] = pos_gt_labels\n        pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight\n        label_weights[:num_pos] = pos_weight\n        pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,\n                                      target_stds)\n        bbox_targets[:num_pos, :] = pos_bbox_targets\n        bbox_weights[:num_pos, :] = 1\n    if num_neg > 0:\n        label_weights[-num_neg:] = 1.0\n\n    return labels, label_weights, bbox_targets, bbox_weights\n\n\ndef expand_target(bbox_targets, bbox_weights, labels, num_classes):\n    bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),\n                                                  4 * num_classes))\n    bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),\n                                                  4 * num_classes))\n    for i in torch.nonzero(labels > 0).squeeze(-1):\n        start, end = labels[i] * 4, (labels[i] + 1) * 4\n        bbox_targets_expand[i, start:end] = bbox_targets[i, :]\n        bbox_weights_expand[i, start:end] = bbox_weights[i, :]\n    return bbox_targets_expand, bbox_weights_expand\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/geometry.py",
    "content": "import torch\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):\n    \"\"\"Calculate overlap between two set of bboxes.\n\n    If ``is_aligned`` is ``False``, then calculate the ious between each bbox\n    of bboxes1 and bboxes2, otherwise the ious between each aligned pair of\n    bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (Tensor): shape (m, 4)\n        bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n\n            must be equal.\n        mode (str): \"iou\" (intersection over union) or iof (intersection over\n            foreground).\n\n    Returns:\n        ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)\n    \"\"\"\n\n    assert mode in ['iou', 'iof']\n\n    rows = bboxes1.size(0)\n    cols = bboxes2.size(0)\n    if is_aligned:\n        assert rows == cols\n\n    if rows * cols == 0:\n        return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)\n\n    if is_aligned:\n        lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]\n        rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]\n\n        wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]\n        overlap = wh[:, 0] * wh[:, 1]\n        area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (\n            bboxes1[:, 3] - bboxes1[:, 1] + 1)\n\n        if mode == 'iou':\n            area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (\n                bboxes2[:, 3] - bboxes2[:, 1] + 1)\n            ious = overlap / (area1 + area2 - overlap)\n        else:\n            ious = overlap / area1\n    else:\n        lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]\n        rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]\n\n        wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]\n        overlap = wh[:, :, 0] * wh[:, :, 1]\n        area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (\n            bboxes1[:, 3] - bboxes1[:, 1] + 1)\n\n        if mode == 'iou':\n            area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (\n                bboxes2[:, 3] - bboxes2[:, 1] + 1)\n            ious = overlap / (area1[:, None] + area2 - overlap)\n        else:\n            ious = overlap / (area1[:, None])\n\n    return ious\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/__init__.py",
    "content": "from .base_sampler import BaseSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSampler\nfrom .instance_balanced_pos_sampler import InstanceBalancedPosSampler\nfrom .iou_balanced_neg_sampler import IoUBalancedNegSampler\nfrom .combined_sampler import CombinedSampler\nfrom .ohem_sampler import OHEMSampler\nfrom .sampling_result import SamplingResult\n\n__all__ = [\n    'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'OHEMSampler', 'SamplingResult'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/base_sampler.py",
    "content": "from abc import ABCMeta, abstractmethod\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass BaseSampler(metaclass=ABCMeta):\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 **kwargs):\n        self.num = num\n        self.pos_fraction = pos_fraction\n        self.neg_pos_ub = neg_pos_ub\n        self.add_gt_as_proposals = add_gt_as_proposals\n        self.pos_sampler = self\n        self.neg_sampler = self\n\n    @abstractmethod\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        pass\n\n    @abstractmethod\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        pass\n\n    def sample(self,\n               assign_result,\n               bboxes,\n               gt_bboxes,\n               gt_labels=None,\n               **kwargs):\n        \"\"\"Sample positive and negative bboxes.\n\n        This is a simple implementation of bbox sampling given candidates,\n        assigning results and ground truth bboxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Bbox assigning results.\n            bboxes (Tensor): Boxes to be sampled from.\n            gt_bboxes (Tensor): Ground truth bboxes.\n            gt_labels (Tensor, optional): Class labels of ground truth bboxes.\n\n        Returns:\n            :obj:`SamplingResult`: Sampling result.\n        \"\"\"\n        bboxes = bboxes[:, :4]\n\n        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)\n        if self.add_gt_as_proposals:\n            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)\n            assign_result.add_gt_(gt_labels)\n            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)\n            gt_flags = torch.cat([gt_ones, gt_flags])\n\n        num_expected_pos = int(self.num * self.pos_fraction)\n        pos_inds = self.pos_sampler._sample_pos(\n            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)\n        # We found that sampled indices have duplicated items occasionally.\n        # (may be a bug of PyTorch)\n        pos_inds = pos_inds.unique()\n        num_sampled_pos = pos_inds.numel()\n        num_expected_neg = self.num - num_sampled_pos\n        if self.neg_pos_ub >= 0:\n            _pos = max(1, num_sampled_pos)\n            neg_upper_bound = int(self.neg_pos_ub * _pos)\n            if num_expected_neg > neg_upper_bound:\n                num_expected_neg = neg_upper_bound\n        neg_inds = self.neg_sampler._sample_neg(\n            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)\n        neg_inds = neg_inds.unique()\n\n        return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                              assign_result, gt_flags)\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/combined_sampler.py",
    "content": "from .base_sampler import BaseSampler\nfrom ..assign_sampling import build_sampler\n\n\nclass CombinedSampler(BaseSampler):\n\n    def __init__(self, pos_sampler, neg_sampler, **kwargs):\n        super(CombinedSampler, self).__init__(**kwargs)\n        self.pos_sampler = build_sampler(pos_sampler, **kwargs)\n        self.neg_sampler = build_sampler(neg_sampler, **kwargs)\n\n    def _sample_pos(self, **kwargs):\n        raise NotImplementedError\n\n    def _sample_neg(self, **kwargs):\n        raise NotImplementedError\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py",
    "content": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass InstanceBalancedPosSampler(RandomSampler):\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            unique_gt_inds = assign_result.gt_inds[pos_inds].unique()\n            num_gts = len(unique_gt_inds)\n            num_per_gt = int(round(num_expected / float(num_gts)) + 1)\n            sampled_inds = []\n            for i in unique_gt_inds:\n                inds = torch.nonzero(assign_result.gt_inds == i.item())\n                if inds.numel() != 0:\n                    inds = inds.squeeze(1)\n                else:\n                    continue\n                if len(inds) > num_per_gt:\n                    inds = self.random_choice(inds, num_per_gt)\n                sampled_inds.append(inds)\n            sampled_inds = torch.cat(sampled_inds)\n            if len(sampled_inds) < num_expected:\n                num_extra = num_expected - len(sampled_inds)\n                extra_inds = np.array(\n                    list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))\n                if len(extra_inds) > num_extra:\n                    extra_inds = self.random_choice(extra_inds, num_extra)\n                extra_inds = torch.from_numpy(extra_inds).to(\n                    assign_result.gt_inds.device).long()\n                sampled_inds = torch.cat([sampled_inds, extra_inds])\n            elif len(sampled_inds) > num_expected:\n                sampled_inds = self.random_choice(sampled_inds, num_expected)\n            return sampled_inds\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py",
    "content": "import numpy as np\nimport torch\n\nfrom .random_sampler import RandomSampler\n\n\nclass IoUBalancedNegSampler(RandomSampler):\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 hard_thr=0.1,\n                 hard_fraction=0.5,\n                 **kwargs):\n        super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,\n                                                    **kwargs)\n        assert hard_thr > 0\n        assert 0 < hard_fraction < 1\n        self.hard_thr = hard_thr\n        self.hard_fraction = hard_fraction\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            max_overlaps = assign_result.max_overlaps.cpu().numpy()\n            # balance sampling for negative samples\n            neg_set = set(neg_inds.cpu().numpy())\n            easy_set = set(\n                np.where(\n                    np.logical_and(max_overlaps >= 0,\n                                   max_overlaps < self.hard_thr))[0])\n            hard_set = set(np.where(max_overlaps >= self.hard_thr)[0])\n            easy_neg_inds = list(easy_set & neg_set)\n            hard_neg_inds = list(hard_set & neg_set)\n\n            num_expected_hard = int(num_expected * self.hard_fraction)\n            if len(hard_neg_inds) > num_expected_hard:\n                sampled_hard_inds = self.random_choice(hard_neg_inds,\n                                                       num_expected_hard)\n            else:\n                sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)\n            num_expected_easy = num_expected - len(sampled_hard_inds)\n            if len(easy_neg_inds) > num_expected_easy:\n                sampled_easy_inds = self.random_choice(easy_neg_inds,\n                                                       num_expected_easy)\n            else:\n                sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)\n            sampled_inds = np.concatenate((sampled_easy_inds,\n                                           sampled_hard_inds))\n            if len(sampled_inds) < num_expected:\n                num_extra = num_expected - len(sampled_inds)\n                extra_inds = np.array(list(neg_set - set(sampled_inds)))\n                if len(extra_inds) > num_extra:\n                    extra_inds = self.random_choice(extra_inds, num_extra)\n                sampled_inds = np.concatenate((sampled_inds, extra_inds))\n            sampled_inds = torch.from_numpy(sampled_inds).long().to(\n                assign_result.gt_inds.device)\n            return sampled_inds\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py",
    "content": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom ..transforms import bbox2roi\n\n\nclass OHEMSampler(BaseSampler):\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 context,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 **kwargs):\n        super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,\n                                          add_gt_as_proposals)\n        if not hasattr(context, 'num_stages'):\n            self.bbox_roi_extractor = context.bbox_roi_extractor\n            self.bbox_head = context.bbox_head\n        else:\n            self.bbox_roi_extractor = context.bbox_roi_extractor[\n                context.current_stage]\n            self.bbox_head = context.bbox_head[context.current_stage]\n\n    def hard_mining(self, inds, num_expected, bboxes, labels, feats):\n        with torch.no_grad():\n            rois = bbox2roi([bboxes])\n            bbox_feats = self.bbox_roi_extractor(\n                feats[:self.bbox_roi_extractor.num_inputs], rois)\n            cls_score, _ = self.bbox_head(bbox_feats)\n            loss = self.bbox_head.loss(\n                cls_score=cls_score,\n                bbox_pred=None,\n                labels=labels,\n                label_weights=cls_score.new_ones(cls_score.size(0)),\n                bbox_targets=None,\n                bbox_weights=None,\n                reduce=False)['loss_cls']\n            _, topk_loss_inds = loss.topk(num_expected)\n        return inds[topk_loss_inds]\n\n    def _sample_pos(self,\n                    assign_result,\n                    num_expected,\n                    bboxes=None,\n                    feats=None,\n                    **kwargs):\n        # Sample some hard positive samples\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],\n                                    assign_result.labels[pos_inds], feats)\n\n    def _sample_neg(self,\n                    assign_result,\n                    num_expected,\n                    bboxes=None,\n                    feats=None,\n                    **kwargs):\n        # Sample some hard negative samples\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],\n                                    assign_result.labels[neg_inds], feats)\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py",
    "content": "import torch\n\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\nclass PseudoSampler(BaseSampler):\n\n    def __init__(self, **kwargs):\n        pass\n\n    def _sample_pos(self, **kwargs):\n        raise NotImplementedError\n\n    def _sample_neg(self, **kwargs):\n        raise NotImplementedError\n\n    def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):\n        pos_inds = torch.nonzero(\n            assign_result.gt_inds > 0).squeeze(-1).unique()\n        neg_inds = torch.nonzero(\n            assign_result.gt_inds == 0).squeeze(-1).unique()\n        gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)\n        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                                         assign_result, gt_flags)\n        return sampling_result\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/random_sampler.py",
    "content": "import numpy as np\nimport torch\n\nfrom .base_sampler import BaseSampler\n\n\nclass RandomSampler(BaseSampler):\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 **kwargs):\n        super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,\n                                            add_gt_as_proposals)\n\n    @staticmethod\n    def random_choice(gallery, num):\n        \"\"\"Random select some elements from the gallery.\n\n        It seems that Pytorch's implementation is slower than numpy so we use\n        numpy to randperm the indices.\n        \"\"\"\n        assert len(gallery) >= num\n        if isinstance(gallery, list):\n            gallery = np.array(gallery)\n        cands = np.arange(len(gallery))\n        np.random.shuffle(cands)\n        rand_inds = cands[:num]\n        if not isinstance(gallery, np.ndarray):\n            rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)\n        return gallery[rand_inds]\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some positive samples.\"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.random_choice(pos_inds, num_expected)\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some negative samples.\"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            return self.random_choice(neg_inds, num_expected)\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/samplers/sampling_result.py",
    "content": "import torch\n\n\nclass SamplingResult(object):\n\n    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,\n                 gt_flags):\n        self.pos_inds = pos_inds\n        self.neg_inds = neg_inds\n        self.pos_bboxes = bboxes[pos_inds]\n        self.neg_bboxes = bboxes[neg_inds]\n        self.pos_is_gt = gt_flags[pos_inds]\n\n        self.num_gts = gt_bboxes.shape[0]\n        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1\n        self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]\n        if assign_result.labels is not None:\n            self.pos_gt_labels = assign_result.labels[pos_inds]\n        else:\n            self.pos_gt_labels = None\n\n    @property\n    def bboxes(self):\n        return torch.cat([self.pos_bboxes, self.neg_bboxes])\n"
  },
  {
    "path": "mmdetection/mmdet/core/bbox/transforms.py",
    "content": "import mmcv\nimport numpy as np\nimport torch\n\n\ndef bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):\n    assert proposals.size() == gt.size()\n\n    proposals = proposals.float()\n    gt = gt.float()\n    px = (proposals[..., 0] + proposals[..., 2]) * 0.5\n    py = (proposals[..., 1] + proposals[..., 3]) * 0.5\n    pw = proposals[..., 2] - proposals[..., 0] + 1.0\n    ph = proposals[..., 3] - proposals[..., 1] + 1.0\n\n    gx = (gt[..., 0] + gt[..., 2]) * 0.5\n    gy = (gt[..., 1] + gt[..., 3]) * 0.5\n    gw = gt[..., 2] - gt[..., 0] + 1.0\n    gh = gt[..., 3] - gt[..., 1] + 1.0\n\n    dx = (gx - px) / pw\n    dy = (gy - py) / ph\n    dw = torch.log(gw / pw)\n    dh = torch.log(gh / ph)\n    deltas = torch.stack([dx, dy, dw, dh], dim=-1)\n\n    means = deltas.new_tensor(means).unsqueeze(0)\n    stds = deltas.new_tensor(stds).unsqueeze(0)\n    deltas = deltas.sub_(means).div_(stds)\n\n    return deltas\n\n\ndef delta2bbox(rois,\n               deltas,\n               means=[0, 0, 0, 0],\n               stds=[1, 1, 1, 1],\n               max_shape=None,\n               wh_ratio_clip=16 / 1000):\n    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)\n    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)\n    denorm_deltas = deltas * stds + means\n    dx = denorm_deltas[:, 0::4]\n    dy = denorm_deltas[:, 1::4]\n    dw = denorm_deltas[:, 2::4]\n    dh = denorm_deltas[:, 3::4]\n    max_ratio = np.abs(np.log(wh_ratio_clip))\n    dw = dw.clamp(min=-max_ratio, max=max_ratio)\n    dh = dh.clamp(min=-max_ratio, max=max_ratio)\n    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)\n    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)\n    pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)\n    ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)\n    gw = pw * dw.exp()\n    gh = ph * dh.exp()\n    gx = torch.addcmul(px, 1, pw, dx)  # gx = px + pw * dx\n    gy = torch.addcmul(py, 1, ph, dy)  # gy = py + ph * dy\n    x1 = gx - gw * 0.5 + 0.5\n    y1 = gy - gh * 0.5 + 0.5\n    x2 = gx + gw * 0.5 - 0.5\n    y2 = gy + gh * 0.5 - 0.5\n    if max_shape is not None:\n        x1 = x1.clamp(min=0, max=max_shape[1] - 1)\n        y1 = y1.clamp(min=0, max=max_shape[0] - 1)\n        x2 = x2.clamp(min=0, max=max_shape[1] - 1)\n        y2 = y2.clamp(min=0, max=max_shape[0] - 1)\n    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)\n    return bboxes\n\n\ndef bbox_flip(bboxes, img_shape):\n    \"\"\"Flip bboxes horizontally.\n\n    Args:\n        bboxes(Tensor or ndarray): Shape (..., 4*k)\n        img_shape(tuple): Image shape.\n\n    Returns:\n        Same type as `bboxes`: Flipped bboxes.\n    \"\"\"\n    if isinstance(bboxes, torch.Tensor):\n        assert bboxes.shape[-1] % 4 == 0\n        flipped = bboxes.clone()\n        flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] - 1\n        flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] - 1\n        return flipped\n    elif isinstance(bboxes, np.ndarray):\n        return mmcv.bbox_flip(bboxes, img_shape)\n\n\ndef bbox_mapping(bboxes, img_shape, scale_factor, flip):\n    \"\"\"Map bboxes from the original image scale to testing scale\"\"\"\n    new_bboxes = bboxes * scale_factor\n    if flip:\n        new_bboxes = bbox_flip(new_bboxes, img_shape)\n    return new_bboxes\n\n\ndef bbox_mapping_back(bboxes, img_shape, scale_factor, flip):\n    \"\"\"Map bboxes from testing scale to original image scale\"\"\"\n    new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes\n    new_bboxes = new_bboxes / scale_factor\n    return new_bboxes\n\n\ndef bbox2roi(bbox_list):\n    \"\"\"Convert a list of bboxes to roi format.\n\n    Args:\n        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch\n            of images.\n\n    Returns:\n        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]\n    \"\"\"\n    rois_list = []\n    for img_id, bboxes in enumerate(bbox_list):\n        if bboxes.size(0) > 0:\n            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)\n            rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)\n        else:\n            rois = bboxes.new_zeros((0, 5))\n        rois_list.append(rois)\n    rois = torch.cat(rois_list, 0)\n    return rois\n\n\ndef roi2bbox(rois):\n    bbox_list = []\n    img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)\n    for img_id in img_ids:\n        inds = (rois[:, 0] == img_id.item())\n        bbox = rois[inds, 1:]\n        bbox_list.append(bbox)\n    return bbox_list\n\n\ndef bbox2result(bboxes, labels, num_classes):\n    \"\"\"Convert detection results to a list of numpy arrays.\n\n    Args:\n        bboxes (Tensor): shape (n, 5)\n        labels (Tensor): shape (n, )\n        num_classes (int): class number, including background class\n\n    Returns:\n        list(ndarray): bbox results of each class\n    \"\"\"\n    if bboxes.shape[0] == 0:\n        return [\n            np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)\n        ]\n    else:\n        bboxes = bboxes.cpu().numpy()\n        labels = labels.cpu().numpy()\n        return [bboxes[labels == i, :] for i in range(num_classes - 1)]\n\n\ndef distance2bbox(points, distance, max_shape=None):\n    \"\"\"Decode distance prediction to bounding box.\n\n    Args:\n        points (Tensor): Shape (n, 2), [x, y].\n        distance (Tensor): Distance from the given point to 4\n            boundaries (left, top, right, bottom).\n        max_shape (tuple): Shape of the image.\n\n    Returns:\n        Tensor: Decoded bboxes.\n    \"\"\"\n    x1 = points[:, 0] - distance[:, 0]\n    y1 = points[:, 1] - distance[:, 1]\n    x2 = points[:, 0] + distance[:, 2]\n    y2 = points[:, 1] + distance[:, 3]\n    if max_shape is not None:\n        x1 = x1.clamp(min=0, max=max_shape[1] - 1)\n        y1 = y1.clamp(min=0, max=max_shape[0] - 1)\n        x2 = x2.clamp(min=0, max=max_shape[1] - 1)\n        y2 = y2.clamp(min=0, max=max_shape[0] - 1)\n    return torch.stack([x1, y1, x2, y2], -1)\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/__init__.py",
    "content": "from .class_names import (voc_classes, imagenet_det_classes,\n                          imagenet_vid_classes, coco_classes, dataset_aliases,\n                          get_classes)\nfrom .coco_utils import coco_eval, fast_eval_recall, results2json\nfrom .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,\n                         CocoDistEvalmAPHook)\nfrom .mean_ap import average_precision, eval_map, print_map_summary\nfrom .recall import (eval_recalls, print_recall_summary, plot_num_recall,\n                     plot_iou_recall)\n\n__all__ = [\n    'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',\n    'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',\n    'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',\n    'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',\n    'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',\n    'plot_num_recall', 'plot_iou_recall'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/bbox_overlaps.py",
    "content": "import numpy as np\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou'):\n    \"\"\"Calculate the ious between each bbox of bboxes1 and bboxes2.\n\n    Args:\n        bboxes1(ndarray): shape (n, 4)\n        bboxes2(ndarray): shape (k, 4)\n        mode(str): iou (intersection over union) or iof (intersection\n            over foreground)\n\n    Returns:\n        ious(ndarray): shape (n, k)\n    \"\"\"\n\n    assert mode in ['iou', 'iof']\n\n    bboxes1 = bboxes1.astype(np.float32)\n    bboxes2 = bboxes2.astype(np.float32)\n    rows = bboxes1.shape[0]\n    cols = bboxes2.shape[0]\n    ious = np.zeros((rows, cols), dtype=np.float32)\n    if rows * cols == 0:\n        return ious\n    exchange = False\n    if bboxes1.shape[0] > bboxes2.shape[0]:\n        bboxes1, bboxes2 = bboxes2, bboxes1\n        ious = np.zeros((cols, rows), dtype=np.float32)\n        exchange = True\n    area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (\n        bboxes1[:, 3] - bboxes1[:, 1] + 1)\n    area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (\n        bboxes2[:, 3] - bboxes2[:, 1] + 1)\n    for i in range(bboxes1.shape[0]):\n        x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])\n        y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])\n        x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])\n        y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])\n        overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(\n            y_end - y_start + 1, 0)\n        if mode == 'iou':\n            union = area1[i] + area2 - overlap\n        else:\n            union = area1[i] if not exchange else area2\n        ious[i, :] = overlap / union\n    if exchange:\n        ious = ious.T\n    return ious\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/class_names.py",
    "content": "import mmcv\n\n\ndef voc_classes():\n    return [\n        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',\n        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',\n        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'\n    ]\n\n\ndef imagenet_det_classes():\n    return [\n        'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',\n        'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',\n        'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',\n        'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',\n        'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',\n        'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',\n        'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',\n        'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',\n        'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',\n        'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',\n        'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',\n        'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',\n        'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',\n        'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',\n        'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',\n        'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',\n        'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',\n        'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',\n        'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',\n        'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',\n        'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',\n        'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',\n        'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',\n        'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',\n        'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',\n        'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',\n        'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',\n        'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',\n        'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',\n        'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',\n        'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',\n        'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',\n        'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',\n        'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',\n        'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',\n        'whale', 'wine_bottle', 'zebra'\n    ]\n\n\ndef imagenet_vid_classes():\n    return [\n        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',\n        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',\n        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',\n        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',\n        'watercraft', 'whale', 'zebra'\n    ]\n\n\ndef coco_classes():\n    return [\n        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',\n        'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',\n        'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',\n        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',\n        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',\n        'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',\n        'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',\n        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',\n        'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',\n        'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',\n        'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',\n        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n        'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'\n    ]\n\n\ndataset_aliases = {\n    'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],\n    'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],\n    'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],\n    'coco': ['coco', 'mscoco', 'ms_coco']\n}\n\n\ndef get_classes(dataset):\n    \"\"\"Get class names of a dataset.\"\"\"\n    alias2name = {}\n    for name, aliases in dataset_aliases.items():\n        for alias in aliases:\n            alias2name[alias] = name\n\n    if mmcv.is_str(dataset):\n        if dataset in alias2name:\n            labels = eval(alias2name[dataset] + '_classes()')\n        else:\n            raise ValueError('Unrecognized dataset: {}'.format(dataset))\n    else:\n        raise TypeError('dataset must a str, but got {}'.format(type(dataset)))\n    return labels\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/coco_utils.py",
    "content": "import mmcv\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\nfrom .recall import eval_recalls\n\n\ndef coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):\n    for res_type in result_types:\n        assert res_type in [\n            'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'\n        ]\n\n    if mmcv.is_str(coco):\n        coco = COCO(coco)\n    assert isinstance(coco, COCO)\n\n    if result_types == ['proposal_fast']:\n        ar = fast_eval_recall(result_file, coco, np.array(max_dets))\n        for i, num in enumerate(max_dets):\n            print('AR@{}\\t= {:.4f}'.format(num, ar[i]))\n        return\n\n    assert result_file.endswith('.json')\n    coco_dets = coco.loadRes(result_file)\n\n    img_ids = coco.getImgIds()\n    for res_type in result_types:\n        iou_type = 'bbox' if res_type == 'proposal' else res_type\n        cocoEval = COCOeval(coco, coco_dets, iou_type)\n        cocoEval.params.imgIds = img_ids\n        if res_type == 'proposal':\n            cocoEval.params.useCats = 0\n            cocoEval.params.maxDets = list(max_dets)\n        cocoEval.evaluate()\n        cocoEval.accumulate()\n        cocoEval.summarize()\n\n\ndef fast_eval_recall(results,\n                     coco,\n                     max_dets,\n                     iou_thrs=np.arange(0.5, 0.96, 0.05)):\n    if mmcv.is_str(results):\n        assert results.endswith('.pkl')\n        results = mmcv.load(results)\n    elif not isinstance(results, list):\n        raise TypeError(\n            'results must be a list of numpy arrays or a filename, not {}'.\n            format(type(results)))\n\n    gt_bboxes = []\n    img_ids = coco.getImgIds()\n    for i in range(len(img_ids)):\n        ann_ids = coco.getAnnIds(imgIds=img_ids[i])\n        ann_info = coco.loadAnns(ann_ids)\n        if len(ann_info) == 0:\n            gt_bboxes.append(np.zeros((0, 4)))\n            continue\n        bboxes = []\n        for ann in ann_info:\n            if ann.get('ignore', False) or ann['iscrowd']:\n                continue\n            x1, y1, w, h = ann['bbox']\n            bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])\n        bboxes = np.array(bboxes, dtype=np.float32)\n        if bboxes.shape[0] == 0:\n            bboxes = np.zeros((0, 4))\n        gt_bboxes.append(bboxes)\n\n    recalls = eval_recalls(\n        gt_bboxes, results, max_dets, iou_thrs, print_summary=False)\n    ar = recalls.mean(axis=1)\n    return ar\n\n\ndef xyxy2xywh(bbox):\n    _bbox = bbox.tolist()\n    return [\n        _bbox[0],\n        _bbox[1],\n        _bbox[2] - _bbox[0] + 1,\n        _bbox[3] - _bbox[1] + 1,\n    ]\n\n\ndef proposal2json(dataset, results):\n    json_results = []\n    for idx in range(len(dataset)):\n        img_id = dataset.img_ids[idx]\n        bboxes = results[idx]\n        for i in range(bboxes.shape[0]):\n            data = dict()\n            data['image_id'] = img_id\n            data['bbox'] = xyxy2xywh(bboxes[i])\n            data['score'] = float(bboxes[i][4])\n            data['category_id'] = 1\n            json_results.append(data)\n    return json_results\n\n\ndef det2json(dataset, results):\n    json_results = []\n    for idx in range(len(dataset)):\n        img_id = dataset.img_ids[idx]\n        result = results[idx]\n        for label in range(len(result)):\n            bboxes = result[label]\n            for i in range(bboxes.shape[0]):\n                data = dict()\n                data['image_id'] = img_id\n                data['bbox'] = xyxy2xywh(bboxes[i])\n                data['score'] = float(bboxes[i][4])\n                data['category_id'] = dataset.cat_ids[label]\n                json_results.append(data)\n    return json_results\n\n\ndef segm2json(dataset, results):\n    json_results = []\n    for idx in range(len(dataset)):\n        img_id = dataset.img_ids[idx]\n        det, seg = results[idx]\n        for label in range(len(det)):\n            bboxes = det[label]\n            segms = seg[label]\n            for i in range(bboxes.shape[0]):\n                data = dict()\n                data['image_id'] = img_id\n                data['bbox'] = xyxy2xywh(bboxes[i])\n                data['score'] = float(bboxes[i][4])\n                data['category_id'] = dataset.cat_ids[label]\n                segms[i]['counts'] = segms[i]['counts'].decode()\n                data['segmentation'] = segms[i]\n                json_results.append(data)\n    return json_results\n\n\ndef results2json(dataset, results, out_file):\n    if isinstance(results[0], list):\n        json_results = det2json(dataset, results)\n    elif isinstance(results[0], tuple):\n        json_results = segm2json(dataset, results)\n    elif isinstance(results[0], np.ndarray):\n        json_results = proposal2json(dataset, results)\n    else:\n        raise TypeError('invalid type of results')\n    mmcv.dump(json_results, out_file)\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/eval_hooks.py",
    "content": "import os\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import Hook, obj_from_dict\nfrom mmcv.parallel import scatter, collate\nfrom pycocotools.cocoeval import COCOeval\nfrom torch.utils.data import Dataset\n\nfrom .coco_utils import results2json, fast_eval_recall\nfrom .mean_ap import eval_map\nfrom mmdet import datasets\n\n\nclass DistEvalHook(Hook):\n\n    def __init__(self, dataset, interval=1):\n        if isinstance(dataset, Dataset):\n            self.dataset = dataset\n        elif isinstance(dataset, dict):\n            self.dataset = obj_from_dict(dataset, datasets,\n                                         {'test_mode': True})\n        else:\n            raise TypeError(\n                'dataset must be a Dataset object or a dict, not {}'.format(\n                    type(dataset)))\n        self.interval = interval\n\n    def after_train_epoch(self, runner):\n        if not self.every_n_epochs(runner, self.interval):\n            return\n        runner.model.eval()\n        results = [None for _ in range(len(self.dataset))]\n        if runner.rank == 0:\n            prog_bar = mmcv.ProgressBar(len(self.dataset))\n        for idx in range(runner.rank, len(self.dataset), runner.world_size):\n            data = self.dataset[idx]\n            data_gpu = scatter(\n                collate([data], samples_per_gpu=1),\n                [torch.cuda.current_device()])[0]\n\n            # compute output\n            with torch.no_grad():\n                result = runner.model(\n                    return_loss=False, rescale=True, **data_gpu)\n            results[idx] = result\n\n            batch_size = runner.world_size\n            if runner.rank == 0:\n                for _ in range(batch_size):\n                    prog_bar.update()\n\n        if runner.rank == 0:\n            print('\\n')\n            dist.barrier()\n            for i in range(1, runner.world_size):\n                tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))\n                tmp_results = mmcv.load(tmp_file)\n                for idx in range(i, len(results), runner.world_size):\n                    results[idx] = tmp_results[idx]\n                os.remove(tmp_file)\n            self.evaluate(runner, results)\n        else:\n            tmp_file = osp.join(runner.work_dir,\n                                'temp_{}.pkl'.format(runner.rank))\n            mmcv.dump(results, tmp_file)\n            dist.barrier()\n        dist.barrier()\n\n    def evaluate(self):\n        raise NotImplementedError\n\n\nclass DistEvalmAPHook(DistEvalHook):\n\n    def evaluate(self, runner, results):\n        gt_bboxes = []\n        gt_labels = []\n        gt_ignore = [] if self.dataset.with_crowd else None\n        for i in range(len(self.dataset)):\n            ann = self.dataset.get_ann_info(i)\n            bboxes = ann['bboxes']\n            labels = ann['labels']\n            if gt_ignore is not None:\n                ignore = np.concatenate([\n                    np.zeros(bboxes.shape[0], dtype=np.bool),\n                    np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)\n                ])\n                gt_ignore.append(ignore)\n                bboxes = np.vstack([bboxes, ann['bboxes_ignore']])\n                labels = np.concatenate([labels, ann['labels_ignore']])\n            gt_bboxes.append(bboxes)\n            gt_labels.append(labels)\n        # If the dataset is VOC2007, then use 11 points mAP evaluation.\n        if hasattr(self.dataset, 'year') and self.dataset.year == 2007:\n            ds_name = 'voc07'\n        else:\n            ds_name = self.dataset.CLASSES\n        bbox_results = []\n        for result in results:\n            bbox_results.append(result[0])\n        mean_ap, eval_results = eval_map(\n            bbox_results,\n            gt_bboxes,\n            gt_labels,\n            gt_ignore=gt_ignore,\n            scale_ranges=None,\n            iou_thr=0.5,\n            dataset=ds_name,\n            print_summary=True)\n        runner.log_buffer.output['mAP'] = mean_ap\n        runner.log_buffer.ready = True\n\n\nclass CocoDistEvalRecallHook(DistEvalHook):\n\n    def __init__(self,\n                 dataset,\n                 proposal_nums=(100, 300, 1000),\n                 iou_thrs=np.arange(0.5, 0.96, 0.05)):\n        super(CocoDistEvalRecallHook, self).__init__(dataset)\n        self.proposal_nums = np.array(proposal_nums, dtype=np.int32)\n        self.iou_thrs = np.array(iou_thrs, dtype=np.float32)\n\n    def evaluate(self, runner, results):\n        # the official coco evaluation is too slow, here we use our own\n        # implementation instead, which may get slightly different results\n        ar = fast_eval_recall(results, self.dataset.coco, self.proposal_nums,\n                              self.iou_thrs)\n        for i, num in enumerate(self.proposal_nums):\n            runner.log_buffer.output['AR@{}'.format(num)] = ar[i]\n        runner.log_buffer.ready = True\n\n\nclass CocoDistEvalmAPHook(DistEvalHook):\n\n    def evaluate(self, runner, results):\n        tmp_file = osp.join(runner.work_dir, 'temp_0.json')\n        results2json(self.dataset, results, tmp_file)\n\n        res_types = ['bbox',\n                     'segm'] if runner.model.module.with_mask else ['bbox']\n        cocoGt = self.dataset.coco\n        cocoDt = cocoGt.loadRes(tmp_file)\n        imgIds = cocoGt.getImgIds()\n        for res_type in res_types:\n            iou_type = res_type\n            cocoEval = COCOeval(cocoGt, cocoDt, iou_type)\n            cocoEval.params.imgIds = imgIds\n            cocoEval.evaluate()\n            cocoEval.accumulate()\n            cocoEval.summarize()\n            metrics = ['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']\n            for i in range(len(metrics)):\n                key = '{}_{}'.format(res_type, metrics[i])\n                val = float('{:.3f}'.format(cocoEval.stats[i]))\n                runner.log_buffer.output[key] = val\n            runner.log_buffer.output['{}_mAP_copypaste'.format(res_type)] = (\n                '{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '\n                '{ap[4]:.3f} {ap[5]:.3f}').format(ap=cocoEval.stats[:6])\n        runner.log_buffer.ready = True\n        os.remove(tmp_file)\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/mean_ap.py",
    "content": "import mmcv\nimport numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\nfrom .class_names import get_classes\n\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision (for single or multiple scales).\n\n    Args:\n        recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float or ndarray: calculated average precision\n    \"\"\"\n    no_scale = False\n    if recalls.ndim == 1:\n        no_scale = True\n        recalls = recalls[np.newaxis, :]\n        precisions = precisions[np.newaxis, :]\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = np.zeros(num_scales, dtype=np.float32)\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        for i in range(num_scales):\n            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]\n            ap[i] = np.sum(\n                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])\n    elif mode == '11points':\n        for i in range(num_scales):\n            for thr in np.arange(0, 1 + 1e-3, 0.1):\n                precs = precisions[i, recalls[i, :] >= thr]\n                prec = precs.max() if precs.size > 0 else 0\n                ap[i] += prec\n            ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    if no_scale:\n        ap = ap[0]\n    return ap\n\n\ndef tpfp_imagenet(det_bboxes,\n                  gt_bboxes,\n                  gt_ignore,\n                  default_iou_thr,\n                  area_ranges=None):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): the detected bbox\n        gt_bboxes (ndarray): ground truth bboxes of this image\n        gt_ignore (ndarray): indicate if gts are ignored for evaluation or not\n        default_iou_thr (float): the iou thresholds for medium and large bboxes\n        area_ranges (list or None): gt bbox area ranges\n\n    Returns:\n        tuple: two arrays (tp, fp) whose elements are 0 and 1\n    \"\"\"\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n    if area_ranges is None:\n        area_ranges = [(None, None)]\n    num_scales = len(area_ranges)\n    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp\n    # of a certain scale.\n    tp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    fp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    if gt_bboxes.shape[0] == 0:\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * (\n                det_bboxes[:, 3] - det_bboxes[:, 1] + 1)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n        return tp, fp\n    ious = bbox_overlaps(det_bboxes, gt_bboxes - 1)\n    gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1\n    gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1\n    iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),\n                          default_iou_thr)\n    # sort all detections by scores in descending order\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n    for k, (min_area, max_area) in enumerate(area_ranges):\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        # if no area range is specified, gt_area_ignore is all False\n        if min_area is None:\n            gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool)\n        else:\n            gt_areas = gt_w * gt_h\n            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n        for i in sort_inds:\n            max_iou = -1\n            matched_gt = -1\n            # find best overlapped available gt\n            for j in range(num_gts):\n                # different from PASCAL VOC: allow finding other gts if the\n                # best overlaped ones are already matched by other det bboxes\n                if gt_covered[j]:\n                    continue\n                elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou:\n                    max_iou = ious[i, j]\n                    matched_gt = j\n            # there are 4 cases for a det bbox:\n            # 1. it matches a gt, tp = 1, fp = 0\n            # 2. it matches an ignored gt, tp = 0, fp = 0\n            # 3. it matches no gt and within area range, tp = 0, fp = 1\n            # 4. it matches no gt but is beyond area range, tp = 0, fp = 0\n            if matched_gt >= 0:\n                gt_covered[matched_gt] = 1\n                if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):\n                    tp[k, i] = 1\n            elif min_area is None:\n                fp[k, i] = 1\n            else:\n                bbox = det_bboxes[i, :4]\n                area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1)\n                if area >= min_area and area < max_area:\n                    fp[k, i] = 1\n    return tp, fp\n\n\ndef tpfp_default(det_bboxes, gt_bboxes, gt_ignore, iou_thr, area_ranges=None):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): the detected bbox\n        gt_bboxes (ndarray): ground truth bboxes of this image\n        gt_ignore (ndarray): indicate if gts are ignored for evaluation or not\n        iou_thr (float): the iou thresholds\n\n    Returns:\n        tuple: (tp, fp), two arrays whose elements are 0 and 1\n    \"\"\"\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n    if area_ranges is None:\n        area_ranges = [(None, None)]\n    num_scales = len(area_ranges)\n    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of\n    # a certain scale\n    tp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    fp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    if gt_bboxes.shape[0] == 0:\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0] + 1) * (\n                det_bboxes[:, 3] - det_bboxes[:, 1] + 1)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n        return tp, fp\n    ious = bbox_overlaps(det_bboxes, gt_bboxes)\n    ious_max = ious.max(axis=1)\n    ious_argmax = ious.argmax(axis=1)\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n    for k, (min_area, max_area) in enumerate(area_ranges):\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        # if no area range is specified, gt_area_ignore is all False\n        if min_area is None:\n            gt_area_ignore = np.zeros_like(gt_ignore, dtype=bool)\n        else:\n            gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (\n                gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)\n            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n        for i in sort_inds:\n            if ious_max[i] >= iou_thr:\n                matched_gt = ious_argmax[i]\n                if not (gt_ignore[matched_gt] or gt_area_ignore[matched_gt]):\n                    if not gt_covered[matched_gt]:\n                        gt_covered[matched_gt] = True\n                        tp[k, i] = 1\n                    else:\n                        fp[k, i] = 1\n                # otherwise ignore this detected bbox, tp = 0, fp = 0\n            elif min_area is None:\n                fp[k, i] = 1\n            else:\n                bbox = det_bboxes[i, :4]\n                area = (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1)\n                if area >= min_area and area < max_area:\n                    fp[k, i] = 1\n    return tp, fp\n\n\ndef get_cls_results(det_results, gt_bboxes, gt_labels, gt_ignore, class_id):\n    \"\"\"Get det results and gt information of a certain class.\"\"\"\n    cls_dets = [det[class_id]\n                for det in det_results]  # det bboxes of this class\n    cls_gts = []  # gt bboxes of this class\n    cls_gt_ignore = []\n    for j in range(len(gt_bboxes)):\n        gt_bbox = gt_bboxes[j]\n        cls_inds = (gt_labels[j] == class_id + 1)\n        cls_gt = gt_bbox[cls_inds, :] if gt_bbox.shape[0] > 0 else gt_bbox\n        cls_gts.append(cls_gt)\n        if gt_ignore is None:\n            cls_gt_ignore.append(np.zeros(cls_gt.shape[0], dtype=np.int32))\n        else:\n            cls_gt_ignore.append(gt_ignore[j][cls_inds])\n    return cls_dets, cls_gts, cls_gt_ignore\n\n\ndef eval_map(det_results,\n             gt_bboxes,\n             gt_labels,\n             gt_ignore=None,\n             scale_ranges=None,\n             iou_thr=0.5,\n             dataset=None,\n             print_summary=True):\n    \"\"\"Evaluate mAP of a dataset.\n\n    Args:\n        det_results (list): a list of list, [[cls1_det, cls2_det, ...], ...]\n        gt_bboxes (list): ground truth bboxes of each image, a list of K*4\n            array.\n        gt_labels (list): ground truth labels of each image, a list of K array\n        gt_ignore (list): gt ignore indicators of each image, a list of K array\n        scale_ranges (list, optional): [(min1, max1), (min2, max2), ...]\n        iou_thr (float): IoU threshold\n        dataset (None or str or list): dataset name or dataset classes, there\n            are minor differences in metrics for different datsets, e.g.\n            \"voc07\", \"imagenet_det\", etc.\n        print_summary (bool): whether to print the mAP summary\n\n    Returns:\n        tuple: (mAP, [dict, dict, ...])\n    \"\"\"\n    assert len(det_results) == len(gt_bboxes) == len(gt_labels)\n    if gt_ignore is not None:\n        assert len(gt_ignore) == len(gt_labels)\n        for i in range(len(gt_ignore)):\n            assert len(gt_labels[i]) == len(gt_ignore[i])\n    area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]\n                   if scale_ranges is not None else None)\n    num_scales = len(scale_ranges) if scale_ranges is not None else 1\n    eval_results = []\n    num_classes = len(det_results[0])  # positive class num\n    gt_labels = [\n        label if label.ndim == 1 else label[:, 0] for label in gt_labels\n    ]\n    for i in range(num_classes):\n        # get gt and det bboxes of this class\n        cls_dets, cls_gts, cls_gt_ignore = get_cls_results(\n            det_results, gt_bboxes, gt_labels, gt_ignore, i)\n        # calculate tp and fp for each image\n        tpfp_func = (tpfp_imagenet\n                     if dataset in ['det', 'vid'] else tpfp_default)\n        tpfp = [\n            tpfp_func(cls_dets[j], cls_gts[j], cls_gt_ignore[j], iou_thr,\n                      area_ranges) for j in range(len(cls_dets))\n        ]\n        tp, fp = tuple(zip(*tpfp))\n        # calculate gt number of each scale, gts ignored or beyond scale\n        # are not counted\n        num_gts = np.zeros(num_scales, dtype=int)\n        for j, bbox in enumerate(cls_gts):\n            if area_ranges is None:\n                num_gts[0] += np.sum(np.logical_not(cls_gt_ignore[j]))\n            else:\n                gt_areas = (bbox[:, 2] - bbox[:, 0] + 1) * (\n                    bbox[:, 3] - bbox[:, 1] + 1)\n                for k, (min_area, max_area) in enumerate(area_ranges):\n                    num_gts[k] += np.sum(\n                        np.logical_not(cls_gt_ignore[j]) &\n                        (gt_areas >= min_area) & (gt_areas < max_area))\n        # sort all det bboxes by score, also sort tp and fp\n        cls_dets = np.vstack(cls_dets)\n        num_dets = cls_dets.shape[0]\n        sort_inds = np.argsort(-cls_dets[:, -1])\n        tp = np.hstack(tp)[:, sort_inds]\n        fp = np.hstack(fp)[:, sort_inds]\n        # calculate recall and precision with tp and fp\n        tp = np.cumsum(tp, axis=1)\n        fp = np.cumsum(fp, axis=1)\n        eps = np.finfo(np.float32).eps\n        recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)\n        precisions = tp / np.maximum((tp + fp), eps)\n        # calculate AP\n        if scale_ranges is None:\n            recalls = recalls[0, :]\n            precisions = precisions[0, :]\n            num_gts = num_gts.item()\n        mode = 'area' if dataset != 'voc07' else '11points'\n        ap = average_precision(recalls, precisions, mode)\n        eval_results.append({\n            'num_gts': num_gts,\n            'num_dets': num_dets,\n            'recall': recalls,\n            'precision': precisions,\n            'ap': ap\n        })\n    if scale_ranges is not None:\n        # shape (num_classes, num_scales)\n        all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results])\n        all_num_gts = np.vstack(\n            [cls_result['num_gts'] for cls_result in eval_results])\n        mean_ap = [\n            all_ap[all_num_gts[:, i] > 0, i].mean()\n            if np.any(all_num_gts[:, i] > 0) else 0.0\n            for i in range(num_scales)\n        ]\n    else:\n        aps = []\n        for cls_result in eval_results:\n            if cls_result['num_gts'] > 0:\n                aps.append(cls_result['ap'])\n        mean_ap = np.array(aps).mean().item() if aps else 0.0\n    if print_summary:\n        print_map_summary(mean_ap, eval_results, dataset)\n\n    return mean_ap, eval_results\n\n\ndef print_map_summary(mean_ap, results, dataset=None):\n    \"\"\"Print mAP and results of each class.\n\n    Args:\n        mean_ap(float): calculated from `eval_map`\n        results(list): calculated from `eval_map`\n        dataset(None or str or list): dataset name or dataset classes.\n    \"\"\"\n    num_scales = len(results[0]['ap']) if isinstance(results[0]['ap'],\n                                                     np.ndarray) else 1\n    num_classes = len(results)\n\n    recalls = np.zeros((num_scales, num_classes), dtype=np.float32)\n    precisions = np.zeros((num_scales, num_classes), dtype=np.float32)\n    aps = np.zeros((num_scales, num_classes), dtype=np.float32)\n    num_gts = np.zeros((num_scales, num_classes), dtype=int)\n    for i, cls_result in enumerate(results):\n        if cls_result['recall'].size > 0:\n            recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1]\n            precisions[:, i] = np.array(\n                cls_result['precision'], ndmin=2)[:, -1]\n        aps[:, i] = cls_result['ap']\n        num_gts[:, i] = cls_result['num_gts']\n\n    if dataset is None:\n        label_names = [str(i) for i in range(1, num_classes + 1)]\n    elif mmcv.is_str(dataset):\n        label_names = get_classes(dataset)\n    else:\n        label_names = dataset\n\n    if not isinstance(mean_ap, list):\n        mean_ap = [mean_ap]\n    header = ['class', 'gts', 'dets', 'recall', 'precision', 'ap']\n    for i in range(num_scales):\n        table_data = [header]\n        for j in range(num_classes):\n            row_data = [\n                label_names[j], num_gts[i, j], results[j]['num_dets'],\n                '{:.3f}'.format(recalls[i, j]), '{:.3f}'.format(\n                    precisions[i, j]), '{:.3f}'.format(aps[i, j])\n            ]\n            table_data.append(row_data)\n        table_data.append(['mAP', '', '', '', '', '{:.3f}'.format(mean_ap[i])])\n        table = AsciiTable(table_data)\n        table.inner_footing_row_border = True\n        print(table.table)\n"
  },
  {
    "path": "mmdetection/mmdet/core/evaluation/recall.py",
    "content": "import numpy as np\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\n\n\ndef _recalls(all_ious, proposal_nums, thrs):\n\n    img_num = all_ious.shape[0]\n    total_gt_num = sum([ious.shape[0] for ious in all_ious])\n\n    _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)\n    for k, proposal_num in enumerate(proposal_nums):\n        tmp_ious = np.zeros(0)\n        for i in range(img_num):\n            ious = all_ious[i][:, :proposal_num].copy()\n            gt_ious = np.zeros((ious.shape[0]))\n            if ious.size == 0:\n                tmp_ious = np.hstack((tmp_ious, gt_ious))\n                continue\n            for j in range(ious.shape[0]):\n                gt_max_overlaps = ious.argmax(axis=1)\n                max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]\n                gt_idx = max_ious.argmax()\n                gt_ious[j] = max_ious[gt_idx]\n                box_idx = gt_max_overlaps[gt_idx]\n                ious[gt_idx, :] = -1\n                ious[:, box_idx] = -1\n            tmp_ious = np.hstack((tmp_ious, gt_ious))\n        _ious[k, :] = tmp_ious\n\n    _ious = np.fliplr(np.sort(_ious, axis=1))\n    recalls = np.zeros((proposal_nums.size, thrs.size))\n    for i, thr in enumerate(thrs):\n        recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)\n\n    return recalls\n\n\ndef set_recall_param(proposal_nums, iou_thrs):\n    \"\"\"Check proposal_nums and iou_thrs and set correct format.\n    \"\"\"\n    if isinstance(proposal_nums, list):\n        _proposal_nums = np.array(proposal_nums)\n    elif isinstance(proposal_nums, int):\n        _proposal_nums = np.array([proposal_nums])\n    else:\n        _proposal_nums = proposal_nums\n\n    if iou_thrs is None:\n        _iou_thrs = np.array([0.5])\n    elif isinstance(iou_thrs, list):\n        _iou_thrs = np.array(iou_thrs)\n    elif isinstance(iou_thrs, float):\n        _iou_thrs = np.array([iou_thrs])\n    else:\n        _iou_thrs = iou_thrs\n\n    return _proposal_nums, _iou_thrs\n\n\ndef eval_recalls(gts,\n                 proposals,\n                 proposal_nums=None,\n                 iou_thrs=None,\n                 print_summary=True):\n    \"\"\"Calculate recalls.\n\n    Args:\n        gts(list or ndarray): a list of arrays of shape (n, 4)\n        proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)\n        proposal_nums(int or list of int or ndarray): top N proposals\n        thrs(float or list or ndarray): iou thresholds\n\n    Returns:\n        ndarray: recalls of different ious and proposal nums\n    \"\"\"\n\n    img_num = len(gts)\n    assert img_num == len(proposals)\n\n    proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)\n\n    all_ious = []\n    for i in range(img_num):\n        if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:\n            scores = proposals[i][:, 4]\n            sort_idx = np.argsort(scores)[::-1]\n            img_proposal = proposals[i][sort_idx, :]\n        else:\n            img_proposal = proposals[i]\n        prop_num = min(img_proposal.shape[0], proposal_nums[-1])\n        if gts[i] is None or gts[i].shape[0] == 0:\n            ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)\n        else:\n            ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4])\n        all_ious.append(ious)\n    all_ious = np.array(all_ious)\n    recalls = _recalls(all_ious, proposal_nums, iou_thrs)\n    if print_summary:\n        print_recall_summary(recalls, proposal_nums, iou_thrs)\n    return recalls\n\n\ndef print_recall_summary(recalls,\n                         proposal_nums,\n                         iou_thrs,\n                         row_idxs=None,\n                         col_idxs=None):\n    \"\"\"Print recalls in a table.\n\n    Args:\n        recalls(ndarray): calculated from `bbox_recalls`\n        proposal_nums(ndarray or list): top N proposals\n        iou_thrs(ndarray or list): iou thresholds\n        row_idxs(ndarray): which rows(proposal nums) to print\n        col_idxs(ndarray): which cols(iou thresholds) to print\n    \"\"\"\n    proposal_nums = np.array(proposal_nums, dtype=np.int32)\n    iou_thrs = np.array(iou_thrs)\n    if row_idxs is None:\n        row_idxs = np.arange(proposal_nums.size)\n    if col_idxs is None:\n        col_idxs = np.arange(iou_thrs.size)\n    row_header = [''] + iou_thrs[col_idxs].tolist()\n    table_data = [row_header]\n    for i, num in enumerate(proposal_nums[row_idxs]):\n        row = [\n            '{:.3f}'.format(val)\n            for val in recalls[row_idxs[i], col_idxs].tolist()\n        ]\n        row.insert(0, num)\n        table_data.append(row)\n    table = AsciiTable(table_data)\n    print(table.table)\n\n\ndef plot_num_recall(recalls, proposal_nums):\n    \"\"\"Plot Proposal_num-Recalls curve.\n\n    Args:\n        recalls(ndarray or list): shape (k,)\n        proposal_nums(ndarray or list): same shape as `recalls`\n    \"\"\"\n    if isinstance(proposal_nums, np.ndarray):\n        _proposal_nums = proposal_nums.tolist()\n    else:\n        _proposal_nums = proposal_nums\n    if isinstance(recalls, np.ndarray):\n        _recalls = recalls.tolist()\n    else:\n        _recalls = recalls\n\n    import matplotlib.pyplot as plt\n    f = plt.figure()\n    plt.plot([0] + _proposal_nums, [0] + _recalls)\n    plt.xlabel('Proposal num')\n    plt.ylabel('Recall')\n    plt.axis([0, proposal_nums.max(), 0, 1])\n    f.show()\n\n\ndef plot_iou_recall(recalls, iou_thrs):\n    \"\"\"Plot IoU-Recalls curve.\n\n    Args:\n        recalls(ndarray or list): shape (k,)\n        iou_thrs(ndarray or list): same shape as `recalls`\n    \"\"\"\n    if isinstance(iou_thrs, np.ndarray):\n        _iou_thrs = iou_thrs.tolist()\n    else:\n        _iou_thrs = iou_thrs\n    if isinstance(recalls, np.ndarray):\n        _recalls = recalls.tolist()\n    else:\n        _recalls = recalls\n\n    import matplotlib.pyplot as plt\n    f = plt.figure()\n    plt.plot(_iou_thrs + [1.0], _recalls + [0.])\n    plt.xlabel('IoU')\n    plt.ylabel('Recall')\n    plt.axis([iou_thrs.min(), 1, 0, 1])\n    f.show()\n"
  },
  {
    "path": "mmdetection/mmdet/core/loss/__init__.py",
    "content": "from .losses import (\n    weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,\n    sigmoid_focal_loss, py_sigmoid_focal_loss, weighted_sigmoid_focal_loss,\n    mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy, iou_loss)\n\n__all__ = [\n    'weighted_nll_loss', 'weighted_cross_entropy',\n    'weighted_binary_cross_entropy', 'sigmoid_focal_loss',\n    'py_sigmoid_focal_loss', 'weighted_sigmoid_focal_loss',\n    'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 'accuracy',\n    'iou_loss'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/loss/losses.py",
    "content": "# TODO merge naive and weighted loss.\nimport torch\nimport torch.nn.functional as F\n\nfrom ..bbox import bbox_overlaps\nfrom ...ops import sigmoid_focal_loss\n\n\ndef weighted_nll_loss(pred, label, weight, avg_factor=None):\n    if avg_factor is None:\n        avg_factor = max(torch.sum(weight > 0).float().item(), 1.)\n    raw = F.nll_loss(pred, label, reduction='none')\n    return torch.sum(raw * weight)[None] / avg_factor\n\n\ndef weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True):\n    if avg_factor is None:\n        avg_factor = max(torch.sum(weight > 0).float().item(), 1.)\n    raw = F.cross_entropy(pred, label, reduction='none')\n    if reduce:\n        return torch.sum(raw * weight)[None] / avg_factor\n    else:\n        return raw * weight / avg_factor\n\n\ndef weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):\n    if pred.dim() != label.dim():\n        label, weight = _expand_binary_labels(label, weight, pred.size(-1))\n    if avg_factor is None:\n        avg_factor = max(torch.sum(weight > 0).float().item(), 1.)\n    return F.binary_cross_entropy_with_logits(\n        pred, label.float(), weight.float(),\n        reduction='sum')[None] / avg_factor\n\n\ndef py_sigmoid_focal_loss(pred,\n                          target,\n                          weight,\n                          gamma=2.0,\n                          alpha=0.25,\n                          reduction='mean'):\n    pred_sigmoid = pred.sigmoid()\n    target = target.type_as(pred)\n    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)\n    weight = (alpha * target + (1 - alpha) * (1 - target)) * weight\n    weight = weight * pt.pow(gamma)\n    loss = F.binary_cross_entropy_with_logits(\n        pred, target, reduction='none') * weight\n    reduction_enum = F._Reduction.get_enum(reduction)\n    # none: 0, mean:1, sum: 2\n    if reduction_enum == 0:\n        return loss\n    elif reduction_enum == 1:\n        return loss.mean()\n    elif reduction_enum == 2:\n        return loss.sum()\n\n\ndef weighted_sigmoid_focal_loss(pred,\n                                target,\n                                weight,\n                                gamma=2.0,\n                                alpha=0.25,\n                                avg_factor=None,\n                                num_classes=80):\n    if avg_factor is None:\n        avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6\n    return torch.sum(\n        sigmoid_focal_loss(pred, target, gamma, alpha, 'none') * weight.view(\n            -1, 1))[None] / avg_factor\n\n\ndef mask_cross_entropy(pred, target, label):\n    num_rois = pred.size()[0]\n    inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)\n    pred_slice = pred[inds, label].squeeze(1)\n    return F.binary_cross_entropy_with_logits(\n        pred_slice, target, reduction='mean')[None]\n\n\ndef smooth_l1_loss(pred, target, beta=1.0, reduction='mean'):\n    assert beta > 0\n    assert pred.size() == target.size() and target.numel() > 0\n    diff = torch.abs(pred - target)\n    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,\n                       diff - 0.5 * beta)\n    reduction_enum = F._Reduction.get_enum(reduction)\n    # none: 0, mean:1, sum: 2\n    if reduction_enum == 0:\n        return loss\n    elif reduction_enum == 1:\n        return loss.sum() / pred.numel()\n    elif reduction_enum == 2:\n        return loss.sum()\n\n\ndef weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):\n    if avg_factor is None:\n        avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6\n    loss = smooth_l1_loss(pred, target, beta, reduction='none')\n    return torch.sum(loss * weight)[None] / avg_factor\n\n\ndef accuracy(pred, target, topk=1):\n    if isinstance(topk, int):\n        topk = (topk, )\n        return_single = True\n    else:\n        return_single = False\n\n    maxk = max(topk)\n    _, pred_label = pred.topk(maxk, 1, True, True)\n    pred_label = pred_label.t()\n    correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))\n\n    res = []\n    for k in topk:\n        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)\n        res.append(correct_k.mul_(100.0 / pred.size(0)))\n    return res[0] if return_single else res\n\n\ndef _expand_binary_labels(labels, label_weights, label_channels):\n    bin_labels = labels.new_full((labels.size(0), label_channels), 0)\n    inds = torch.nonzero(labels >= 1).squeeze()\n    if inds.numel() > 0:\n        bin_labels[inds, labels[inds] - 1] = 1\n    bin_label_weights = label_weights.view(-1, 1).expand(\n        label_weights.size(0), label_channels)\n    return bin_labels, bin_label_weights\n\n\ndef iou_loss(pred_bboxes, target_bboxes, reduction='mean'):\n    ious = bbox_overlaps(pred_bboxes, target_bboxes, is_aligned=True)\n    loss = -ious.log()\n\n    reduction_enum = F._Reduction.get_enum(reduction)\n    if reduction_enum == 0:\n        return loss\n    elif reduction_enum == 1:\n        return loss.mean()\n    elif reduction_enum == 2:\n        return loss.sum()\n"
  },
  {
    "path": "mmdetection/mmdet/core/mask/__init__.py",
    "content": "from .utils import split_combined_polys\nfrom .mask_target import mask_target\n\n__all__ = ['split_combined_polys', 'mask_target']\n"
  },
  {
    "path": "mmdetection/mmdet/core/mask/mask_target.py",
    "content": "import torch\nimport numpy as np\nimport mmcv\n\n\ndef mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,\n                cfg):\n    cfg_list = [cfg for _ in range(len(pos_proposals_list))]\n    mask_targets = map(mask_target_single, pos_proposals_list,\n                       pos_assigned_gt_inds_list, gt_masks_list, cfg_list)\n    mask_targets = torch.cat(list(mask_targets))\n    return mask_targets\n\n\ndef mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):\n    mask_size = cfg.mask_size\n    num_pos = pos_proposals.size(0)\n    mask_targets = []\n    if num_pos > 0:\n        proposals_np = pos_proposals.cpu().numpy()\n        pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()\n        for i in range(num_pos):\n            gt_mask = gt_masks[pos_assigned_gt_inds[i]]\n            bbox = proposals_np[i, :].astype(np.int32)\n            x1, y1, x2, y2 = bbox\n            w = np.maximum(x2 - x1 + 1, 1)\n            h = np.maximum(y2 - y1 + 1, 1)\n            # mask is uint8 both before and after resizing\n            target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],\n                                   (mask_size, mask_size))\n            mask_targets.append(target)\n        mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(\n            pos_proposals.device)\n    else:\n        mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))\n    return mask_targets\n"
  },
  {
    "path": "mmdetection/mmdet/core/mask/utils.py",
    "content": "import mmcv\n\n\ndef split_combined_polys(polys, poly_lens, polys_per_mask):\n    \"\"\"Split the combined 1-D polys into masks.\n\n    A mask is represented as a list of polys, and a poly is represented as\n    a 1-D array. In dataset, all masks are concatenated into a single 1-D\n    tensor. Here we need to split the tensor into original representations.\n\n    Args:\n        polys (list): a list (length = image num) of 1-D tensors\n        poly_lens (list): a list (length = image num) of poly length\n        polys_per_mask (list): a list (length = image num) of poly number\n            of each mask\n\n    Returns:\n        list: a list (length = image num) of list (length = mask num) of\n            list (length = poly num) of numpy array\n    \"\"\"\n    mask_polys_list = []\n    for img_id in range(len(polys)):\n        polys_single = polys[img_id]\n        polys_lens_single = poly_lens[img_id].tolist()\n        polys_per_mask_single = polys_per_mask[img_id].tolist()\n\n        split_polys = mmcv.slice_list(polys_single, polys_lens_single)\n        mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)\n        mask_polys_list.append(mask_polys)\n    return mask_polys_list\n"
  },
  {
    "path": "mmdetection/mmdet/core/post_processing/__init__.py",
    "content": "from .bbox_nms import multiclass_nms\nfrom .merge_augs import (merge_aug_proposals, merge_aug_bboxes,\n                         merge_aug_scores, merge_aug_masks)\n\n__all__ = [\n    'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',\n    'merge_aug_scores', 'merge_aug_masks'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/post_processing/bbox_nms.py",
    "content": "import torch\n\nfrom mmdet.ops.nms import nms_wrapper\n\n\ndef multiclass_nms(multi_bboxes,\n                   multi_scores,\n                   score_thr,\n                   nms_cfg,\n                   max_num=-1,\n                   score_factors=None):\n    \"\"\"NMS for multi-class bboxes.\n\n    Args:\n        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)\n        multi_scores (Tensor): shape (n, #class)\n        score_thr (float): bbox threshold, bboxes with scores lower than it\n            will not be considered.\n        nms_thr (float): NMS IoU threshold\n        max_num (int): if there are more than max_num bboxes after NMS,\n            only top max_num will be kept.\n        score_factors (Tensor): The factors multiplied to scores before\n            applying NMS\n\n    Returns:\n        tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels\n            are 0-based.\n    \"\"\"\n    num_classes = multi_scores.shape[1]\n    bboxes, labels = [], []\n    nms_cfg_ = nms_cfg.copy()\n    nms_type = nms_cfg_.pop('type', 'nms')\n    nms_op = getattr(nms_wrapper, nms_type)\n    for i in range(1, num_classes):\n        cls_inds = multi_scores[:, i] > score_thr\n        if not cls_inds.any():\n            continue\n        # get bboxes and scores of this class\n        if multi_bboxes.shape[1] == 4:\n            _bboxes = multi_bboxes[cls_inds, :]\n        else:\n            _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]\n        _scores = multi_scores[cls_inds, i]\n        if score_factors is not None:\n            _scores *= score_factors[cls_inds]\n        cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)\n        cls_dets, _ = nms_op(cls_dets, **nms_cfg_)\n        cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),\n                                           i - 1,\n                                           dtype=torch.long)\n        bboxes.append(cls_dets)\n        labels.append(cls_labels)\n    if bboxes:\n        bboxes = torch.cat(bboxes)\n        labels = torch.cat(labels)\n        if bboxes.shape[0] > max_num:\n            _, inds = bboxes[:, -1].sort(descending=True)\n            inds = inds[:max_num]\n            bboxes = bboxes[inds]\n            labels = labels[inds]\n    else:\n        bboxes = multi_bboxes.new_zeros((0, 5))\n        labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)\n\n    return bboxes, labels\n"
  },
  {
    "path": "mmdetection/mmdet/core/post_processing/merge_augs.py",
    "content": "import torch\n\nimport numpy as np\n\nfrom mmdet.ops import nms\nfrom ..bbox import bbox_mapping_back\n\n\ndef merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):\n    \"\"\"Merge augmented proposals (multiscale, flip, etc.)\n\n    Args:\n        aug_proposals (list[Tensor]): proposals from different testing\n            schemes, shape (n, 5). Note that they are not rescaled to the\n            original image size.\n        img_metas (list[dict]): image info including \"shape_scale\" and \"flip\".\n        rpn_test_cfg (dict): rpn test config.\n\n    Returns:\n        Tensor: shape (n, 4), proposals corresponding to original image scale.\n    \"\"\"\n    recovered_proposals = []\n    for proposals, img_info in zip(aug_proposals, img_metas):\n        img_shape = img_info['img_shape']\n        scale_factor = img_info['scale_factor']\n        flip = img_info['flip']\n        _proposals = proposals.clone()\n        _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,\n                                              scale_factor, flip)\n        recovered_proposals.append(_proposals)\n    aug_proposals = torch.cat(recovered_proposals, dim=0)\n    merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)\n    scores = merged_proposals[:, 4]\n    _, order = scores.sort(0, descending=True)\n    num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])\n    order = order[:num]\n    merged_proposals = merged_proposals[order, :]\n    return merged_proposals\n\n\ndef merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):\n    \"\"\"Merge augmented detection bboxes and scores.\n\n    Args:\n        aug_bboxes (list[Tensor]): shape (n, 4*#class)\n        aug_scores (list[Tensor] or None): shape (n, #class)\n        img_shapes (list[Tensor]): shape (3, ).\n        rcnn_test_cfg (dict): rcnn test config.\n\n    Returns:\n        tuple: (bboxes, scores)\n    \"\"\"\n    recovered_bboxes = []\n    for bboxes, img_info in zip(aug_bboxes, img_metas):\n        img_shape = img_info[0]['img_shape']\n        scale_factor = img_info[0]['scale_factor']\n        flip = img_info[0]['flip']\n        bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)\n        recovered_bboxes.append(bboxes)\n    bboxes = torch.stack(recovered_bboxes).mean(dim=0)\n    if aug_scores is None:\n        return bboxes\n    else:\n        scores = torch.stack(aug_scores).mean(dim=0)\n        return bboxes, scores\n\n\ndef merge_aug_scores(aug_scores):\n    \"\"\"Merge augmented bbox scores.\"\"\"\n    if isinstance(aug_scores[0], torch.Tensor):\n        return torch.mean(torch.stack(aug_scores), dim=0)\n    else:\n        return np.mean(aug_scores, axis=0)\n\n\ndef merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):\n    \"\"\"Merge augmented mask prediction.\n\n    Args:\n        aug_masks (list[ndarray]): shape (n, #class, h, w)\n        img_shapes (list[ndarray]): shape (3, ).\n        rcnn_test_cfg (dict): rcnn test config.\n\n    Returns:\n        tuple: (bboxes, scores)\n    \"\"\"\n    recovered_masks = [\n        mask if not img_info[0]['flip'] else mask[..., ::-1]\n        for mask, img_info in zip(aug_masks, img_metas)\n    ]\n    if weights is None:\n        merged_masks = np.mean(recovered_masks, axis=0)\n    else:\n        merged_masks = np.average(\n            np.array(recovered_masks), axis=0, weights=np.array(weights))\n    return merged_masks\n"
  },
  {
    "path": "mmdetection/mmdet/core/utils/__init__.py",
    "content": "from .dist_utils import allreduce_grads, DistOptimizerHook\nfrom .misc import tensor2imgs, unmap, multi_apply\n\n__all__ = [\n    'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',\n    'multi_apply'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/core/utils/dist_utils.py",
    "content": "from collections import OrderedDict\n\nimport torch.distributed as dist\nfrom torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,\n                          _take_tensors)\nfrom mmcv.runner import OptimizerHook\n\n\ndef _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):\n    if bucket_size_mb > 0:\n        bucket_size_bytes = bucket_size_mb * 1024 * 1024\n        buckets = _take_tensors(tensors, bucket_size_bytes)\n    else:\n        buckets = OrderedDict()\n        for tensor in tensors:\n            tp = tensor.type()\n            if tp not in buckets:\n                buckets[tp] = []\n            buckets[tp].append(tensor)\n        buckets = buckets.values()\n\n    for bucket in buckets:\n        flat_tensors = _flatten_dense_tensors(bucket)\n        dist.all_reduce(flat_tensors)\n        flat_tensors.div_(world_size)\n        for tensor, synced in zip(\n                bucket, _unflatten_dense_tensors(flat_tensors, bucket)):\n            tensor.copy_(synced)\n\n\ndef allreduce_grads(model, coalesce=True, bucket_size_mb=-1):\n    grads = [\n        param.grad.data for param in model.parameters()\n        if param.requires_grad and param.grad is not None\n    ]\n    world_size = dist.get_world_size()\n    if coalesce:\n        _allreduce_coalesced(grads, world_size, bucket_size_mb)\n    else:\n        for tensor in grads:\n            dist.all_reduce(tensor.div_(world_size))\n\n\nclass DistOptimizerHook(OptimizerHook):\n\n    def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):\n        self.grad_clip = grad_clip\n        self.coalesce = coalesce\n        self.bucket_size_mb = bucket_size_mb\n\n    def after_train_iter(self, runner):\n        runner.optimizer.zero_grad()\n        runner.outputs['loss'].backward()\n        allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)\n        if self.grad_clip is not None:\n            self.clip_grads(runner.model.parameters())\n        runner.optimizer.step()\n"
  },
  {
    "path": "mmdetection/mmdet/core/utils/misc.py",
    "content": "from functools import partial\n\nimport mmcv\nimport numpy as np\nfrom six.moves import map, zip\n\n\ndef tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):\n    num_imgs = tensor.size(0)\n    mean = np.array(mean, dtype=np.float32)\n    std = np.array(std, dtype=np.float32)\n    imgs = []\n    for img_id in range(num_imgs):\n        img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)\n        img = mmcv.imdenormalize(\n            img, mean, std, to_bgr=to_rgb).astype(np.uint8)\n        imgs.append(np.ascontiguousarray(img))\n    return imgs\n\n\ndef multi_apply(func, *args, **kwargs):\n    pfunc = partial(func, **kwargs) if kwargs else func\n    map_results = map(pfunc, *args)\n    return tuple(map(list, zip(*map_results)))\n\n\ndef unmap(data, count, inds, fill=0):\n    \"\"\" Unmap a subset of item (data) back to the original set of items (of\n    size count) \"\"\"\n    if data.dim() == 1:\n        ret = data.new_full((count, ), fill)\n        ret[inds] = data\n    else:\n        new_size = (count, ) + data.size()[1:]\n        ret = data.new_full(new_size, fill)\n        ret[inds, :] = data\n    return ret\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/__init__.py",
    "content": "from .custom import CustomDataset\nfrom .xml_style import XMLDataset\nfrom .coco import CocoDataset\nfrom .voc import VOCDataset\nfrom .loader import GroupSampler, DistributedGroupSampler, build_dataloader\nfrom .utils import to_tensor, random_scale, show_ann, get_dataset\nfrom .concat_dataset import ConcatDataset\nfrom .repeat_dataset import RepeatDataset\nfrom .extra_aug import ExtraAugmentation\n\n__all__ = [\n    'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',\n    'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',\n    'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',\n    'ExtraAugmentation'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/coco.py",
    "content": "import numpy as np\nfrom pycocotools.coco import COCO\n\nfrom .custom import CustomDataset\n\n\nclass CocoDataset(CustomDataset):\n\n    CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n               'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',\n               'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',\n               'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',\n               'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n               'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',\n               'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',\n               'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n               'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',\n               'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n               'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',\n               'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',\n               'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',\n               'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')\n\n    def load_annotations(self, ann_file):\n        self.coco = COCO(ann_file)\n        self.cat_ids = self.coco.getCatIds()\n        self.cat2label = {\n            cat_id: i + 1\n            for i, cat_id in enumerate(self.cat_ids)\n        }\n        self.img_ids = self.coco.getImgIds()\n        img_infos = []\n        for i in self.img_ids:\n            info = self.coco.loadImgs([i])[0]\n            info['filename'] = info['file_name']\n            img_infos.append(info)\n        return img_infos\n\n    def get_ann_info(self, idx):\n        img_id = self.img_infos[idx]['id']\n        ann_ids = self.coco.getAnnIds(imgIds=[img_id])\n        ann_info = self.coco.loadAnns(ann_ids)\n        return self._parse_ann_info(ann_info, self.with_mask)\n\n    def _filter_imgs(self, min_size=32):\n        \"\"\"Filter images too small or without ground truths.\"\"\"\n        valid_inds = []\n        ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())\n        for i, img_info in enumerate(self.img_infos):\n            if self.img_ids[i] not in ids_with_ann:\n                continue\n            if min(img_info['width'], img_info['height']) >= min_size:\n                valid_inds.append(i)\n        return valid_inds\n\n    def _parse_ann_info(self, ann_info, with_mask=True):\n        \"\"\"Parse bbox and mask annotation.\n\n        Args:\n            ann_info (list[dict]): Annotation info of an image.\n            with_mask (bool): Whether to parse mask annotations.\n\n        Returns:\n            dict: A dict containing the following keys: bboxes, bboxes_ignore,\n                labels, masks, mask_polys, poly_lens.\n        \"\"\"\n        gt_bboxes = []\n        gt_labels = []\n        gt_bboxes_ignore = []\n        # Two formats are provided.\n        # 1. mask: a binary map of the same size of the image.\n        # 2. polys: each mask consists of one or several polys, each poly is a\n        # list of float.\n        if with_mask:\n            gt_masks = []\n            gt_mask_polys = []\n            gt_poly_lens = []\n        for i, ann in enumerate(ann_info):\n            if ann.get('ignore', False):\n                continue\n            x1, y1, w, h = ann['bbox']\n            if ann['area'] <= 0 or w < 1 or h < 1:\n                continue\n            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]\n            if ann['iscrowd']:\n                gt_bboxes_ignore.append(bbox)\n            else:\n                gt_bboxes.append(bbox)\n                gt_labels.append(self.cat2label[ann['category_id']])\n            if with_mask:\n                gt_masks.append(self.coco.annToMask(ann))\n                mask_polys = [\n                    p for p in ann['segmentation'] if len(p) >= 6\n                ]  # valid polygons have >= 3 points (6 coordinates)\n                poly_lens = [len(p) for p in mask_polys]\n                gt_mask_polys.append(mask_polys)\n                gt_poly_lens.extend(poly_lens)\n        if gt_bboxes:\n            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)\n            gt_labels = np.array(gt_labels, dtype=np.int64)\n        else:\n            gt_bboxes = np.zeros((0, 4), dtype=np.float32)\n            gt_labels = np.array([], dtype=np.int64)\n\n        if gt_bboxes_ignore:\n            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)\n        else:\n            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)\n\n        ann = dict(\n            bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)\n\n        if with_mask:\n            ann['masks'] = gt_masks\n            # poly format is not used in the current implementation\n            ann['mask_polys'] = gt_mask_polys\n            ann['poly_lens'] = gt_poly_lens\n        return ann\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/concat_dataset.py",
    "content": "import numpy as np\nfrom torch.utils.data.dataset import ConcatDataset as _ConcatDataset\n\n\nclass ConcatDataset(_ConcatDataset):\n    \"\"\"A wrapper of concatenated dataset.\n\n    Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but\n    concat the group flag for image aspect ratio.\n\n    Args:\n        datasets (list[:obj:`Dataset`]): A list of datasets.\n    \"\"\"\n\n    def __init__(self, datasets):\n        super(ConcatDataset, self).__init__(datasets)\n        self.CLASSES = datasets[0].CLASSES\n        if hasattr(datasets[0], 'flag'):\n            flags = []\n            for i in range(0, len(datasets)):\n                flags.append(datasets[i].flag)\n            self.flag = np.concatenate(flags)\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/custom.py",
    "content": "import os.path as osp\n\nimport mmcv\nimport numpy as np\nfrom mmcv.parallel import DataContainer as DC\nfrom torch.utils.data import Dataset\nfrom pycocotools.mask import decode\n\nfrom .transforms import (ImageTransform, BboxTransform, MaskTransform,\n                         SegMapTransform, Numpy2Tensor)\nfrom .utils import to_tensor, random_scale\nfrom .extra_aug import ExtraAugmentation\n\n\nclass CustomDataset(Dataset):\n    \"\"\"Custom dataset for detection.\n\n    Annotation format:\n    [\n        {\n            'filename': 'a.jpg',\n            'width': 1280,\n            'height': 720,\n            'ann': {\n                'bboxes': <np.ndarray> (n, 4),\n                'labels': <np.ndarray> (n, ),\n                'bboxes_ignore': <np.ndarray> (k, 4),\n                'labels_ignore': <np.ndarray> (k, 4) (optional field)\n            }\n        },\n        ...\n    ]\n\n    The `ann` field is optional for testing.\n    \"\"\"\n\n    CLASSES = None\n\n    def __init__(self,\n                 ann_file,\n                 img_prefix,\n                 img_scale,\n                 img_norm_cfg,\n                 multiscale_mode='value',\n                 size_divisor=None,\n                 proposal_file=None,\n                 num_max_proposals=1000,\n                 flip_ratio=0,\n                 with_mask=True,\n                 with_crowd=True,\n                 with_label=True,\n                 with_semantic_seg=False,\n                 seg_prefix=None,\n                 seg_scale_factor=1,\n                 extra_aug=None,\n                 resize_keep_ratio=True,\n                 test_mode=False):\n        # prefix of images path\n        self.img_prefix = img_prefix\n\n        # load annotations (and proposals)\n        self.img_infos = self.load_annotations(ann_file)\n        if proposal_file is not None:\n            self.proposals = self.load_proposals(proposal_file)\n        else:\n            self.proposals = None\n        # filter images with no annotation during training\n        if not test_mode:\n            valid_inds = self._filter_imgs()\n            self.img_infos = [self.img_infos[i] for i in valid_inds]\n            if self.proposals is not None:\n                self.proposals = [self.proposals[i] for i in valid_inds]\n\n        # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...]\n        self.img_scales = img_scale if isinstance(img_scale,\n                                                  list) else [img_scale]\n        assert mmcv.is_list_of(self.img_scales, tuple)\n        # normalization configs\n        self.img_norm_cfg = img_norm_cfg\n\n        # multi-scale mode (only applicable for multi-scale training)\n        self.multiscale_mode = multiscale_mode\n        assert multiscale_mode in ['value', 'range']\n\n        # max proposals per image\n        self.num_max_proposals = num_max_proposals\n        # flip ratio\n        self.flip_ratio = flip_ratio\n        assert flip_ratio >= 0 and flip_ratio <= 1\n        # padding border to ensure the image size can be divided by\n        # size_divisor (used for FPN)\n        self.size_divisor = size_divisor\n\n        # with mask or not (reserved field, takes no effect)\n        self.with_mask = with_mask\n        # some datasets provide bbox annotations as ignore/crowd/difficult,\n        # if `with_crowd` is True, then these info is returned.\n        self.with_crowd = with_crowd\n        # with label is False for RPN\n        self.with_label = with_label\n        # with semantic segmentation (stuff) annotation or not\n        self.with_seg = with_semantic_seg\n        # prefix of semantic segmentation map path\n        self.seg_prefix = seg_prefix\n        # rescale factor for segmentation maps\n        self.seg_scale_factor = seg_scale_factor\n        # in test mode or not\n        self.test_mode = test_mode\n\n        # set group flag for the sampler\n        if not self.test_mode:\n            self._set_group_flag()\n        # transforms\n        self.img_transform = ImageTransform(\n            size_divisor=self.size_divisor, **self.img_norm_cfg)\n        self.bbox_transform = BboxTransform()\n        self.mask_transform = MaskTransform()\n        self.seg_transform = SegMapTransform(self.size_divisor)\n        self.numpy2tensor = Numpy2Tensor()\n\n        # if use extra augmentation\n        if extra_aug is not None:\n            self.extra_aug = ExtraAugmentation(**extra_aug)\n        else:\n            self.extra_aug = None\n\n        # image rescale if keep ratio\n        self.resize_keep_ratio = resize_keep_ratio\n\n    def __len__(self):\n        return len(self.img_infos)\n\n    def load_annotations(self, ann_file):\n        return mmcv.load(ann_file)\n\n    def load_proposals(self, proposal_file):\n        return mmcv.load(proposal_file)\n\n    def get_ann_info(self, idx):\n        return self.img_infos[idx]['ann']\n\n    def _filter_imgs(self, min_size=32):\n        \"\"\"Filter images too small.\"\"\"\n        valid_inds = []\n        for i, img_info in enumerate(self.img_infos):\n            if min(img_info['width'], img_info['height']) >= min_size:\n                valid_inds.append(i)\n        return valid_inds\n\n    def _set_group_flag(self):\n        \"\"\"Set flag according to image aspect ratio.\n\n        Images with aspect ratio greater than 1 will be set as group 1,\n        otherwise group 0.\n        \"\"\"\n        self.flag = np.zeros(len(self), dtype=np.uint8)\n        for i in range(len(self)):\n            img_info = self.img_infos[i]\n            if img_info['width'] / img_info['height'] > 1:\n                self.flag[i] = 1\n\n    def _rand_another(self, idx):\n        pool = np.where(self.flag == self.flag[idx])[0]\n        return np.random.choice(pool)\n\n    def __getitem__(self, idx):\n        if self.test_mode:\n            return self.prepare_test_img(idx)\n        while True:\n            data = self.prepare_train_img(idx)\n            if data is None:\n                idx = self._rand_another(idx)\n                continue\n            return data\n\n    def prepare_train_img(self, idx):\n        img_info = self.img_infos[idx]\n        # load image\n        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))\n        # load proposals if necessary\n        if self.proposals is not None:\n            proposals = self.proposals[idx][:self.num_max_proposals]\n            # TODO: Handle empty proposals properly. Currently images with\n            # no proposals are just ignored, but they can be used for\n            # training in concept.\n            if len(proposals) == 0:\n                return None\n            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):\n                raise AssertionError(\n                    'proposals should have shapes (n, 4) or (n, 5), '\n                    'but found {}'.format(proposals.shape))\n            if proposals.shape[1] == 5:\n                scores = proposals[:, 4, None]\n                proposals = proposals[:, :4]\n            else:\n                scores = None\n\n        ann = self.get_ann_info(idx)\n        gt_bboxes = ann['bboxes']\n        gt_labels = ann['labels']\n        if self.with_crowd:\n            gt_bboxes_ignore = ann['bboxes_ignore']\n\n        if self.with_mask:\n            gt_masks = decode(ann['masks'])\n            gt_masks = [gt_masks[..., i] for i in range(gt_masks.shape[-1])]\n\n        # extra augmentation\n        if self.extra_aug is not None:\n            img = self.extra_aug(img)\n\n        # skip the image if there is no valid gt bbox\n        if len(gt_bboxes) == 0:\n            return None\n\n        # apply transforms\n        flip = True if np.random.rand() < self.flip_ratio else False\n        # randomly sample a scale\n        img_scale = random_scale(self.img_scales, self.multiscale_mode)\n        img, img_shape, pad_shape, scale_factor = self.img_transform(\n            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)\n        img = img.copy()\n        if self.with_seg:\n            gt_seg = mmcv.imread(\n                osp.join(self.seg_prefix, img_info['file_name'].replace(\n                    'jpg', 'png')),\n                flag='unchanged')\n            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)\n            gt_seg = mmcv.imrescale(\n                gt_seg, self.seg_scale_factor, interpolation='nearest')\n            gt_seg = gt_seg[None, ...]\n        if self.proposals is not None:\n            proposals = self.bbox_transform(proposals, img_shape, scale_factor,\n                                            flip)\n            proposals = np.hstack(\n                [proposals, scores]) if scores is not None else proposals\n        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,\n                                        flip)\n        if self.with_crowd:\n            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,\n                                                   scale_factor, flip)\n        if self.with_mask:\n            gt_masks = self.mask_transform(gt_masks, pad_shape, scale_factor, flip)\n\n        ori_shape = (img_info['height'], img_info['width'], 3)\n        img_meta = dict(\n            ori_shape=ori_shape,\n            img_shape=img_shape,\n            pad_shape=pad_shape,\n            scale_factor=scale_factor,\n            flip=flip)\n\n        data = dict(\n            img=DC(to_tensor(img), stack=True),\n            img_meta=DC(img_meta, cpu_only=True),\n            gt_bboxes=DC(to_tensor(gt_bboxes)))\n        if self.proposals is not None:\n            data['proposals'] = DC(to_tensor(proposals))\n        if self.with_label:\n            data['gt_labels'] = DC(to_tensor(gt_labels))\n        if self.with_crowd:\n            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))\n        if self.with_mask:\n            data['gt_masks'] = DC(gt_masks, cpu_only=True)\n        if self.with_seg:\n            data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)\n        return data\n\n    def prepare_test_img(self, idx):\n        \"\"\"Prepare an image for testing (multi-scale and flipping)\"\"\"\n        img_info = self.img_infos[idx]\n        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))\n        if self.proposals is not None:\n            proposal = self.proposals[idx][:self.num_max_proposals]\n            if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):\n                raise AssertionError(\n                    'proposals should have shapes (n, 4) or (n, 5), '\n                    'but found {}'.format(proposal.shape))\n        else:\n            proposal = None\n\n        def prepare_single(img, scale, flip, proposal=None):\n            _img, img_shape, pad_shape, scale_factor = self.img_transform(\n                img, scale, flip, keep_ratio=self.resize_keep_ratio)\n            _img = to_tensor(_img)\n            _img_meta = dict(\n                ori_shape=(img_info['height'], img_info['width'], 3),\n                img_shape=img_shape,\n                pad_shape=pad_shape,\n                scale_factor=scale_factor,\n                flip=flip)\n            if proposal is not None:\n                if proposal.shape[1] == 5:\n                    score = proposal[:, 4, None]\n                    proposal = proposal[:, :4]\n                else:\n                    score = None\n                _proposal = self.bbox_transform(proposal, img_shape,\n                                                scale_factor, flip)\n                _proposal = np.hstack(\n                    [_proposal, score]) if score is not None else _proposal\n                _proposal = to_tensor(_proposal)\n            else:\n                _proposal = None\n            return _img, _img_meta, _proposal\n\n        imgs = []\n        img_metas = []\n        proposals = []\n        for scale in self.img_scales:\n            _img, _img_meta, _proposal = prepare_single(\n                img, scale, False, proposal)\n            imgs.append(_img)\n            img_metas.append(DC(_img_meta, cpu_only=True))\n            proposals.append(_proposal)\n            if self.flip_ratio > 0:\n                _img, _img_meta, _proposal = prepare_single(\n                    img, scale, True, proposal)\n                imgs.append(_img)\n                img_metas.append(DC(_img_meta, cpu_only=True))\n                proposals.append(_proposal)\n        data = dict(img=imgs, img_meta=img_metas)\n        if self.proposals is not None:\n            data['proposals'] = proposals\n        return data\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/extra_aug.py",
    "content": "import numpy as np\n\nimport albumentations as A\nfrom mmcv.runner import obj_from_dict\nfrom . import transforms\n\n\nclass ExtraAugmentation(object):\n\n    def __init__(self, **kwargs):\n        self.transform = self.transform_from_dict(**kwargs)\n\n    def transform_from_dict(self, **kwargs):\n        if 'transforms' in kwargs:\n            kwargs['transforms'] = [self.transform_from_dict(**transform) for transform in kwargs['transforms']]\n        try:\n            return obj_from_dict(kwargs, transforms)\n        except AttributeError:\n            return obj_from_dict(kwargs, A)\n\n    def __call__(self, img):\n        data = self.transform(\n            image=img,\n        )\n        return data['image']\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/loader/__init__.py",
    "content": "from .build_loader import build_dataloader\nfrom .sampler import GroupSampler, DistributedGroupSampler\n\n__all__ = [\n    'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/loader/build_loader.py",
    "content": "from functools import partial\n\nfrom mmcv.runner import get_dist_info\nfrom mmcv.parallel import collate\nfrom torch.utils.data import DataLoader\n\nfrom .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler\n\n# https://github.com/pytorch/pytorch/issues/973\nimport resource\nrlimit = resource.getrlimit(resource.RLIMIT_NOFILE)\nresource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))\n\n\ndef build_dataloader(dataset,\n                     imgs_per_gpu,\n                     workers_per_gpu,\n                     num_gpus=1,\n                     dist=True,\n                     **kwargs):\n    shuffle = kwargs.get('shuffle', True)\n    if dist:\n        rank, world_size = get_dist_info()\n        if shuffle:\n            sampler = DistributedGroupSampler(dataset, imgs_per_gpu,\n                                              world_size, rank)\n        else:\n            sampler = DistributedSampler(dataset,\n                                         world_size,\n                                         rank,\n                                         shuffle=False)\n        batch_size = imgs_per_gpu\n        num_workers = workers_per_gpu\n    else:\n        sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None\n        batch_size = num_gpus * imgs_per_gpu\n        num_workers = num_gpus * workers_per_gpu\n\n    data_loader = DataLoader(dataset,\n                             batch_size=batch_size,\n                             sampler=sampler,\n                             num_workers=num_workers,\n                             collate_fn=partial(collate,\n                                                samples_per_gpu=imgs_per_gpu),\n                             pin_memory=False,\n                             **kwargs)\n\n    return data_loader\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/loader/sampler.py",
    "content": "from __future__ import division\n\nimport math\nimport torch\nimport numpy as np\n\nfrom torch.distributed import get_world_size, get_rank\nfrom torch.utils.data import Sampler\nfrom torch.utils.data import DistributedSampler as _DistributedSampler\n\n\nclass DistributedSampler(_DistributedSampler):\n\n    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):\n        super().__init__(dataset, num_replicas=num_replicas, rank=rank)\n        self.shuffle = shuffle\n\n    def __iter__(self):\n        # deterministically shuffle based on epoch\n        if self.shuffle:\n            g = torch.Generator()\n            g.manual_seed(self.epoch)\n            indices = torch.randperm(len(self.dataset), generator=g).tolist()\n        else:\n            indices = torch.arange(len(self.dataset)).tolist()\n\n        # add extra samples to make it evenly divisible\n        indices += indices[:(self.total_size - len(indices))]\n        assert len(indices) == self.total_size\n\n        # subsample\n        indices = indices[self.rank:self.total_size:self.num_replicas]\n        assert len(indices) == self.num_samples\n\n        return iter(indices)\n\n\nclass GroupSampler(Sampler):\n\n    def __init__(self, dataset, samples_per_gpu=1):\n        assert hasattr(dataset, 'flag')\n        self.dataset = dataset\n        self.samples_per_gpu = samples_per_gpu\n        self.flag = dataset.flag.astype(np.int64)\n        self.group_sizes = np.bincount(self.flag)\n        self.num_samples = 0\n        for i, size in enumerate(self.group_sizes):\n            self.num_samples += int(np.ceil(\n                size / self.samples_per_gpu)) * self.samples_per_gpu\n\n    def __iter__(self):\n        indices = []\n        for i, size in enumerate(self.group_sizes):\n            if size == 0:\n                continue\n            indice = np.where(self.flag == i)[0]\n            assert len(indice) == size\n            np.random.shuffle(indice)\n            num_extra = int(np.ceil(size / self.samples_per_gpu)\n                            ) * self.samples_per_gpu - len(indice)\n            indice = np.concatenate([indice, indice[:num_extra]])\n            indices.append(indice)\n        indices = np.concatenate(indices)\n        indices = [\n            indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]\n            for i in np.random.permutation(\n                range(len(indices) // self.samples_per_gpu))\n        ]\n        indices = np.concatenate(indices)\n        indices = torch.from_numpy(indices).long()\n        assert len(indices) == self.num_samples\n        return iter(indices)\n\n    def __len__(self):\n        return self.num_samples\n\n\nclass DistributedGroupSampler(Sampler):\n    \"\"\"Sampler that restricts data loading to a subset of the dataset.\n    It is especially useful in conjunction with\n    :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each\n    process can pass a DistributedSampler instance as a DataLoader sampler,\n    and load a subset of the original dataset that is exclusive to it.\n    .. note::\n        Dataset is assumed to be of constant size.\n    Arguments:\n        dataset: Dataset used for sampling.\n        num_replicas (optional): Number of processes participating in\n            distributed training.\n        rank (optional): Rank of the current process within num_replicas.\n    \"\"\"\n\n    def __init__(self,\n                 dataset,\n                 samples_per_gpu=1,\n                 num_replicas=None,\n                 rank=None):\n        if num_replicas is None:\n            num_replicas = get_world_size()\n        if rank is None:\n            rank = get_rank()\n        self.dataset = dataset\n        self.samples_per_gpu = samples_per_gpu\n        self.num_replicas = num_replicas\n        self.rank = rank\n        self.epoch = 0\n\n        assert hasattr(self.dataset, 'flag')\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n\n        self.num_samples = 0\n        for i, j in enumerate(self.group_sizes):\n            self.num_samples += int(\n                math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /\n                          self.num_replicas)) * self.samples_per_gpu\n        self.total_size = self.num_samples * self.num_replicas\n\n    def __iter__(self):\n        # deterministically shuffle based on epoch\n        g = torch.Generator()\n        g.manual_seed(self.epoch)\n\n        indices = []\n        for i, size in enumerate(self.group_sizes):\n            if size > 0:\n                indice = np.where(self.flag == i)[0]\n                assert len(indice) == size\n                indice = indice[list(torch.randperm(int(size),\n                                                    generator=g))].tolist()\n                extra = int(\n                    math.ceil(\n                        size * 1.0 / self.samples_per_gpu / self.num_replicas)\n                ) * self.samples_per_gpu * self.num_replicas - len(indice)\n                indice += indice[:extra]\n                indices += indice\n\n        assert len(indices) == self.total_size\n\n        indices = [\n            indices[j] for i in list(\n                torch.randperm(len(indices) // self.samples_per_gpu,\n                               generator=g))\n            for j in range(i * self.samples_per_gpu, (i + 1) *\n                           self.samples_per_gpu)\n        ]\n\n        # subsample\n        offset = self.num_samples * self.rank\n        indices = indices[offset:offset + self.num_samples]\n        assert len(indices) == self.num_samples\n\n        return iter(indices)\n\n    def __len__(self):\n        return self.num_samples\n\n    def set_epoch(self, epoch):\n        self.epoch = epoch\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/repeat_dataset.py",
    "content": "import numpy as np\n\n\nclass RepeatDataset(object):\n\n    def __init__(self, dataset, times):\n        self.dataset = dataset\n        self.times = times\n        self.CLASSES = dataset.CLASSES\n        if hasattr(self.dataset, 'flag'):\n            self.flag = np.tile(self.dataset.flag, times)\n\n        self._ori_len = len(self.dataset)\n\n    def __getitem__(self, idx):\n        return self.dataset[idx % self._ori_len]\n\n    def __len__(self):\n        return self.times * self._ori_len\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/transforms.py",
    "content": "import random\n\nimport albumentations as A\nimport albumentations.augmentations.functional as F\nimport mmcv\nimport numpy as np\nimport torch\nfrom numpy import random\n\n__all__ = [\n    'ImageTransform', 'BboxTransform', 'MaskTransform', 'SegMapTransform',\n    'Numpy2Tensor'\n]\n\n\nclass ImageTransform(object):\n    \"\"\"Preprocess an image.\n\n    1. rescale the image to expected size\n    2. normalize the image\n    3. flip the image (if needed)\n    4. pad the image (if needed)\n    5. transpose to (c, h, w)\n    \"\"\"\n\n    def __init__(self,\n                 mean=(0, 0, 0),\n                 std=(1, 1, 1),\n                 to_rgb=True,\n                 size_divisor=None):\n        self.mean = np.array(mean, dtype=np.float32)\n        self.std = np.array(std, dtype=np.float32)\n        self.to_rgb = to_rgb\n        self.size_divisor = size_divisor\n\n    def __call__(self, img, scale, flip=False, keep_ratio=True):\n        if keep_ratio:\n            img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)\n        else:\n            img, w_scale, h_scale = mmcv.imresize(\n                img, scale, return_scale=True)\n            scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],\n                                    dtype=np.float32)\n        img_shape = img.shape\n        img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)\n        if flip:\n            img = mmcv.imflip(img)\n        if self.size_divisor is not None:\n            img = mmcv.impad_to_multiple(img, self.size_divisor)\n            pad_shape = img.shape\n        else:\n            pad_shape = img_shape\n        img = img.transpose(2, 0, 1)\n        return img, img_shape, pad_shape, scale_factor\n\n\ndef bbox_flip(bboxes, img_shape):\n    \"\"\"Flip bboxes horizontally.\n\n    Args:\n        bboxes(ndarray): shape (..., 4*k)\n        img_shape(tuple): (height, width)\n    \"\"\"\n    assert bboxes.shape[-1] % 4 == 0\n    w = img_shape[1]\n    flipped = bboxes.copy()\n    flipped[..., 0::4] = w - bboxes[..., 2::4] - 1\n    flipped[..., 2::4] = w - bboxes[..., 0::4] - 1\n    return flipped\n\n\nclass BboxTransform(object):\n    \"\"\"Preprocess gt bboxes.\n\n    1. rescale bboxes according to image size\n    2. flip bboxes (if needed)\n    3. pad the first dimension to `max_num_gts`\n    \"\"\"\n\n    def __init__(self, max_num_gts=None):\n        self.max_num_gts = max_num_gts\n\n    def __call__(self, bboxes, img_shape, scale_factor, flip=False):\n        gt_bboxes = bboxes * scale_factor\n        if flip:\n            gt_bboxes = bbox_flip(gt_bboxes, img_shape)\n        gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)\n        gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)\n        if self.max_num_gts is None:\n            return gt_bboxes\n        else:\n            num_gts = gt_bboxes.shape[0]\n            padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)\n            padded_bboxes[:num_gts, :] = gt_bboxes\n            return padded_bboxes\n\n\nclass MaskTransform(object):\n    \"\"\"Preprocess masks.\n\n    1. resize masks to expected size and stack to a single array\n    2. flip the masks (if needed)\n    3. pad the masks (if needed)\n    \"\"\"\n\n    def __call__(self, masks, pad_shape, scale_factor, flip=False):\n        masks = [\n            mmcv.imrescale(mask, scale_factor, interpolation='nearest')\n            for mask in masks\n        ]\n        if flip:\n            masks = [mask[:, ::-1] for mask in masks]\n        padded_masks = [\n            mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks\n        ]\n        padded_masks = np.stack(padded_masks, axis=0)\n        return padded_masks\n\n\nclass SegMapTransform(object):\n    \"\"\"Preprocess semantic segmentation maps.\n\n    1. rescale the segmentation map to expected size\n    3. flip the image (if needed)\n    4. pad the image (if needed)\n    \"\"\"\n\n    def __init__(self, size_divisor=None):\n        self.size_divisor = size_divisor\n\n    def __call__(self, img, scale, flip=False, keep_ratio=True):\n        if keep_ratio:\n            img = mmcv.imrescale(img, scale, interpolation='nearest')\n        else:\n            img = mmcv.imresize(img, scale, interpolation='nearest')\n        if flip:\n            img = mmcv.imflip(img)\n        if self.size_divisor is not None:\n            img = mmcv.impad_to_multiple(img, self.size_divisor)\n        return img\n\n\nclass Numpy2Tensor(object):\n\n    def __init__(self):\n        pass\n\n    def __call__(self, *args):\n        if len(args) == 1:\n            return torch.from_numpy(args[0])\n        else:\n            return tuple([torch.from_numpy(np.array(array)) for array in args])\n\n\nclass RandomCropNearBBox(A.DualTransform):\n    \"\"\"Crop bbox from image with random shift by x,y coordinates\n    Args:\n        max_part_shift (float): float value in (0.0, 1.0) range. Default 0.3\n        p (float): probability of applying the transform. Default: 1.\n    Targets:\n        image\n    Image types:\n        uint8, float32\n    \"\"\"\n\n    def __init__(self, max_part_shift=0.3, always_apply=False, p=1.0):\n        super(RandomCropNearBBox, self).__init__(always_apply, p)\n        self.max_part_shift = max_part_shift\n\n    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):\n        return F.clamping_crop(img, x_min, y_min, x_max, y_max)\n\n    def get_params_dependent_on_targets(self, params):\n        bbox = params['cropping_bbox']\n        h_max_shift = int((bbox[3] - bbox[1]) * self.max_part_shift)\n        w_max_shift = int((bbox[2] - bbox[0]) * self.max_part_shift)\n\n        x_min = bbox[0] - random.randint(-w_max_shift, w_max_shift)\n        x_max = bbox[2] + random.randint(-w_max_shift, w_max_shift)\n\n        y_min = bbox[1] - random.randint(-h_max_shift, h_max_shift)\n        y_max = bbox[3] + random.randint(-h_max_shift, h_max_shift)\n\n        return {'x_min': x_min,\n                'x_max': x_max,\n                'y_min': y_min,\n                'y_max': y_max\n                }\n\n    def apply_to_bbox(self, bbox, x_min=0, x_max=0, y_min=0, y_max=0, **params):\n        h, w = params['rows'], params['cols']\n        if x_min < 0:\n            x_min = 0\n        if y_min < 0:\n            y_min = 0\n        if y_max >= h:\n            y_max = h - 1\n        if x_max >= w:\n            x_max = w - 1\n        return F.bbox_crop(bbox, x_min, y_min, x_max, y_max, **params)\n\n    @property\n    def targets_as_params(self):\n        return ['cropping_bbox']\n\n    def get_transform_init_args_names(self):\n        return 'max_part_shift',\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/utils.py",
    "content": "import copy\nfrom collections import Sequence\n\nimport mmcv\nfrom mmcv.runner import obj_from_dict\nimport torch\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom .concat_dataset import ConcatDataset\nfrom .repeat_dataset import RepeatDataset\nfrom .. import datasets\n\n\ndef to_tensor(data):\n    \"\"\"Convert objects of various python types to :obj:`torch.Tensor`.\n\n    Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,\n    :class:`Sequence`, :class:`int` and :class:`float`.\n    \"\"\"\n    if isinstance(data, torch.Tensor):\n        return data\n    elif isinstance(data, np.ndarray):\n        return torch.from_numpy(data)\n    elif isinstance(data, Sequence) and not mmcv.is_str(data):\n        return torch.tensor(data)\n    elif isinstance(data, int):\n        return torch.LongTensor([data])\n    elif isinstance(data, float):\n        return torch.FloatTensor([data])\n    else:\n        raise TypeError('type {} cannot be converted to tensor.'.format(\n            type(data)))\n\n\ndef random_scale(img_scales, mode='range'):\n    \"\"\"Randomly select a scale from a list of scales or scale ranges.\n\n    Args:\n        img_scales (list[tuple]): Image scale or scale range.\n        mode (str): \"range\" or \"value\".\n\n    Returns:\n        tuple: Sampled image scale.\n    \"\"\"\n    num_scales = len(img_scales)\n    if num_scales == 1:  # fixed scale is specified\n        img_scale = img_scales[0]\n    elif num_scales == 2:  # randomly sample a scale\n        if mode == 'range':\n            img_scale_long = [max(s) for s in img_scales]\n            img_scale_short = [min(s) for s in img_scales]\n            long_edge = np.random.randint(\n                min(img_scale_long),\n                max(img_scale_long) + 1)\n            short_edge = np.random.randint(\n                min(img_scale_short),\n                max(img_scale_short) + 1)\n            img_scale = (long_edge, short_edge)\n        elif mode == 'value':\n            img_scale = img_scales[np.random.randint(num_scales)]\n    else:\n        if mode != 'value':\n            raise ValueError(\n                'Only \"value\" mode supports more than 2 image scales')\n        img_scale = img_scales[np.random.randint(num_scales)]\n    return img_scale\n\n\ndef show_ann(coco, img, ann_info):\n    plt.imshow(mmcv.bgr2rgb(img))\n    plt.axis('off')\n    coco.showAnns(ann_info)\n    plt.show()\n\n\ndef get_dataset(data_cfg):\n    if data_cfg['type'] == 'RepeatDataset':\n        return RepeatDataset(\n            get_dataset(data_cfg['dataset']), data_cfg['times'])\n\n    if isinstance(data_cfg['ann_file'], (list, tuple)):\n        ann_files = data_cfg['ann_file']\n        num_dset = len(ann_files)\n    else:\n        ann_files = [data_cfg['ann_file']]\n        num_dset = 1\n\n    if 'proposal_file' in data_cfg.keys():\n        if isinstance(data_cfg['proposal_file'], (list, tuple)):\n            proposal_files = data_cfg['proposal_file']\n        else:\n            proposal_files = [data_cfg['proposal_file']]\n    else:\n        proposal_files = [None] * num_dset\n    assert len(proposal_files) == num_dset\n\n    if isinstance(data_cfg['img_prefix'], (list, tuple)):\n        img_prefixes = data_cfg['img_prefix']\n    else:\n        img_prefixes = [data_cfg['img_prefix']] * num_dset\n    assert len(img_prefixes) == num_dset\n\n    dsets = []\n    for i in range(num_dset):\n        data_info = copy.deepcopy(data_cfg)\n        data_info['ann_file'] = ann_files[i]\n        data_info['proposal_file'] = proposal_files[i]\n        data_info['img_prefix'] = img_prefixes[i]\n        dset = obj_from_dict(data_info, datasets)\n        dsets.append(dset)\n    if len(dsets) > 1:\n        dset = ConcatDataset(dsets)\n    else:\n        dset = dsets[0]\n    return dset\n\n\ndef rle_decode(rle, h: int, w: int):\n    s = rle.split()\n    starts, lengths = map(np.asarray, (s[0::2], s[1::2]))\n    starts -= 1\n    ends = starts + lengths\n\n    img = np.zeros(h * w, dtype=np.uint8)\n    for start, end in zip(starts, ends):\n        img[start:end] = 1\n    return img.reshape((h, w))\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/voc.py",
    "content": "from .xml_style import XMLDataset\n\n\nclass VOCDataset(XMLDataset):\n\n    CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',\n               'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',\n               'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',\n               'tvmonitor')\n\n    def __init__(self, **kwargs):\n        super(VOCDataset, self).__init__(**kwargs)\n        if 'VOC2007' in self.img_prefix:\n            self.year = 2007\n        elif 'VOC2012' in self.img_prefix:\n            self.year = 2012\n        else:\n            raise ValueError('Cannot infer dataset year from img_prefix')\n"
  },
  {
    "path": "mmdetection/mmdet/datasets/xml_style.py",
    "content": "import os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom .custom import CustomDataset\n\n\nclass XMLDataset(CustomDataset):\n\n    def __init__(self, **kwargs):\n        super(XMLDataset, self).__init__(**kwargs)\n        self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}\n\n    def load_annotations(self, ann_file):\n        img_infos = []\n        img_ids = mmcv.list_from_file(ann_file)\n        for img_id in img_ids:\n            filename = 'JPEGImages/{}.jpg'.format(img_id)\n            xml_path = osp.join(self.img_prefix, 'Annotations',\n                                '{}.xml'.format(img_id))\n            tree = ET.parse(xml_path)\n            root = tree.getroot()\n            size = root.find('size')\n            width = int(size.find('width').text)\n            height = int(size.find('height').text)\n            img_infos.append(\n                dict(id=img_id, filename=filename, width=width, height=height))\n        return img_infos\n\n    def get_ann_info(self, idx):\n        img_id = self.img_infos[idx]['id']\n        xml_path = osp.join(self.img_prefix, 'Annotations',\n                            '{}.xml'.format(img_id))\n        tree = ET.parse(xml_path)\n        root = tree.getroot()\n        bboxes = []\n        labels = []\n        bboxes_ignore = []\n        labels_ignore = []\n        for obj in root.findall('object'):\n            name = obj.find('name').text\n            label = self.cat2label[name]\n            difficult = int(obj.find('difficult').text)\n            bnd_box = obj.find('bndbox')\n            bbox = [\n                int(bnd_box.find('xmin').text),\n                int(bnd_box.find('ymin').text),\n                int(bnd_box.find('xmax').text),\n                int(bnd_box.find('ymax').text)\n            ]\n            if difficult:\n                bboxes_ignore.append(bbox)\n                labels_ignore.append(label)\n            else:\n                bboxes.append(bbox)\n                labels.append(label)\n        if not bboxes:\n            bboxes = np.zeros((0, 4))\n            labels = np.zeros((0, ))\n        else:\n            bboxes = np.array(bboxes, ndmin=2) - 1\n            labels = np.array(labels)\n        if not bboxes_ignore:\n            bboxes_ignore = np.zeros((0, 4))\n            labels_ignore = np.zeros((0, ))\n        else:\n            bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1\n            labels_ignore = np.array(labels_ignore)\n        ann = dict(\n            bboxes=bboxes.astype(np.float32),\n            labels=labels.astype(np.int64),\n            bboxes_ignore=bboxes_ignore.astype(np.float32),\n            labels_ignore=labels_ignore.astype(np.int64))\n        return ann\n"
  },
  {
    "path": "mmdetection/mmdet/models/__init__.py",
    "content": "from .backbones import *  # noqa: F401,F403\nfrom .necks import *  # noqa: F401,F403\nfrom .roi_extractors import *  # noqa: F401,F403\nfrom .anchor_heads import *  # noqa: F401,F403\nfrom .shared_heads import *  # noqa: F401,F403\nfrom .bbox_heads import *  # noqa: F401,F403\nfrom .mask_heads import *  # noqa: F401,F403\nfrom .detectors import *  # noqa: F401,F403\nfrom .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,\n                       DETECTORS)\nfrom .builder import (build_backbone, build_neck, build_roi_extractor,\n                      build_shared_head, build_head, build_detector)\n\n__all__ = [\n    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS',\n    'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',\n    'build_shared_head', 'build_head', 'build_detector'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/__init__.py",
    "content": "from .anchor_head import AnchorHead\nfrom .fcos_head import FCOSHead\nfrom .retina_head import RetinaHead\nfrom .rpn_head import RPNHead\nfrom .ssd_head import SSDHead\n\n__all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead', 'FCOSHead']\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/anchor_head.py",
    "content": "from __future__ import division\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core import (AnchorGenerator, anchor_target, delta2bbox,\n                        multi_apply, weighted_cross_entropy, weighted_smoothl1,\n                        weighted_binary_cross_entropy,\n                        weighted_sigmoid_focal_loss, multiclass_nms)\nfrom ..registry import HEADS\n\n\n@HEADS.register_module\nclass AnchorHead(nn.Module):\n    \"\"\"Anchor-based head (RPN, RetinaNet, SSD, etc.).\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of channels of the feature map.\n        anchor_scales (Iterable): Anchor scales.\n        anchor_ratios (Iterable): Anchor aspect ratios.\n        anchor_strides (Iterable): Anchor strides.\n        anchor_base_sizes (Iterable): Anchor base sizes.\n        target_means (Iterable): Mean values of regression targets.\n        target_stds (Iterable): Std values of regression targets.\n        use_sigmoid_cls (bool): Whether to use sigmoid loss for\n            classification. (softmax by default)\n        cls_focal_loss (bool): Whether to use focal loss for classification.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 anchor_scales=[8, 16, 32],\n                 anchor_ratios=[0.5, 1.0, 2.0],\n                 anchor_strides=[4, 8, 16, 32, 64],\n                 anchor_base_sizes=None,\n                 target_means=(.0, .0, .0, .0),\n                 target_stds=(1.0, 1.0, 1.0, 1.0),\n                 use_sigmoid_cls=False,\n                 cls_focal_loss=False):\n        super(AnchorHead, self).__init__()\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.anchor_scales = anchor_scales\n        self.anchor_ratios = anchor_ratios\n        self.anchor_strides = anchor_strides\n        self.anchor_base_sizes = list(\n            anchor_strides) if anchor_base_sizes is None else anchor_base_sizes\n        self.target_means = target_means\n        self.target_stds = target_stds\n        self.use_sigmoid_cls = use_sigmoid_cls\n        self.cls_focal_loss = cls_focal_loss\n\n        self.anchor_generators = []\n        for anchor_base in self.anchor_base_sizes:\n            self.anchor_generators.append(\n                AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))\n\n        self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = self.num_classes - 1\n        else:\n            self.cls_out_channels = self.num_classes\n\n        self._init_layers()\n\n    def _init_layers(self):\n        self.conv_cls = nn.Conv2d(self.feat_channels,\n                                  self.num_anchors * self.cls_out_channels, 1)\n        self.conv_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)\n\n    def init_weights(self):\n        normal_init(self.conv_cls, std=0.01)\n        normal_init(self.conv_reg, std=0.01)\n\n    def forward_single(self, x):\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        return cls_score, bbox_pred\n\n    def forward(self, feats):\n        return multi_apply(self.forward_single, feats)\n\n    def get_anchors(self, featmap_sizes, img_metas):\n        \"\"\"Get anchors according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n\n        Returns:\n            tuple: anchors of each image, valid flags of each image\n        \"\"\"\n        num_imgs = len(img_metas)\n        num_levels = len(featmap_sizes)\n\n        # since feature map sizes of all images are the same, we only compute\n        # anchors for one time\n        multi_level_anchors = []\n        for i in range(num_levels):\n            anchors = self.anchor_generators[i].grid_anchors(\n                featmap_sizes[i], self.anchor_strides[i])\n            multi_level_anchors.append(anchors)\n        anchor_list = [multi_level_anchors for _ in range(num_imgs)]\n\n        # for each image, we compute valid flags of multi level anchors\n        valid_flag_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_flags = []\n            for i in range(num_levels):\n                anchor_stride = self.anchor_strides[i]\n                feat_h, feat_w = featmap_sizes[i]\n                h, w, _ = img_meta['pad_shape']\n                valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)\n                valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)\n                flags = self.anchor_generators[i].valid_flags(\n                    (feat_h, feat_w), (valid_feat_h, valid_feat_w))\n                multi_level_flags.append(flags)\n            valid_flag_list.append(multi_level_flags)\n\n        return anchor_list, valid_flag_list\n\n    def loss_single(self, cls_score, bbox_pred, labels, label_weights,\n                    bbox_targets, bbox_weights, num_total_samples, cfg):\n        # classification loss\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(\n            -1, self.cls_out_channels)\n        if self.use_sigmoid_cls:\n            if self.cls_focal_loss:\n                cls_criterion = weighted_sigmoid_focal_loss\n            else:\n                cls_criterion = weighted_binary_cross_entropy\n        else:\n            if self.cls_focal_loss:\n                raise NotImplementedError\n            else:\n                cls_criterion = weighted_cross_entropy\n        if self.cls_focal_loss:\n            loss_cls = cls_criterion(\n                cls_score,\n                labels,\n                label_weights,\n                gamma=cfg.gamma,\n                alpha=cfg.alpha,\n                avg_factor=num_total_samples)\n        else:\n            loss_cls = cls_criterion(\n                cls_score, labels, label_weights, avg_factor=num_total_samples)\n        # regression loss\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        bbox_weights = bbox_weights.reshape(-1, 4)\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        loss_reg = weighted_smoothl1(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            beta=cfg.smoothl1_beta,\n            avg_factor=num_total_samples)\n        return loss_cls, loss_reg\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             cfg,\n             gt_bboxes_ignore=None):\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == len(self.anchor_generators)\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas)\n        sampling = False if self.cls_focal_loss else True\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = anchor_target(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            self.target_means,\n            self.target_stds,\n            cfg,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n            sampling=sampling)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        num_total_samples = (num_total_pos if self.cls_focal_loss else\n                             num_total_pos + num_total_neg)\n        losses_cls, losses_reg = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            num_total_samples=num_total_samples,\n            cfg=cfg)\n        return dict(loss_cls=losses_cls, loss_reg=losses_reg)\n\n    def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,\n                   rescale=False):\n        assert len(cls_scores) == len(bbox_preds)\n        num_levels = len(cls_scores)\n\n        mlvl_anchors = [\n            self.anchor_generators[i].grid_anchors(cls_scores[i].size()[-2:],\n                                                   self.anchor_strides[i])\n            for i in range(num_levels)\n        ]\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               mlvl_anchors, img_shape,\n                                               scale_factor, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          mlvl_anchors,\n                          img_shape,\n                          scale_factor,\n                          cfg,\n                          rescale=False):\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,\n                                                 mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            cls_score = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    max_scores, _ = scores[:, 1:].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,\n                                self.target_stds, img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        if self.use_sigmoid_cls:\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)\n        det_bboxes, det_labels = multiclass_nms(\n            mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img)\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/fcos_head.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core import (sigmoid_focal_loss, iou_loss, multi_apply,\n                        multiclass_nms, distance2bbox)\nfrom ..registry import HEADS\nfrom ..utils import bias_init_with_prob, Scale, ConvModule\n\nINF = 1e8\n\n\n@HEADS.register_module\nclass FCOSHead(nn.Module):\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 stacked_convs=4,\n                 strides=(4, 8, 16, 32, 64),\n                 regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512),\n                                 (512, INF)),\n                 conv_cfg=None,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)):\n        super(FCOSHead, self).__init__()\n\n        self.num_classes = num_classes\n        self.cls_out_channels = num_classes - 1\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.regress_ranges = regress_ranges\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self._init_layers()\n\n    def _init_layers(self):\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.norm_cfg is None))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.norm_cfg is None))\n        self.fcos_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n        self.fcos_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n        self.fcos_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)\n\n        self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])\n\n    def init_weights(self):\n        for m in self.cls_convs:\n            normal_init(m.conv, std=0.01)\n        for m in self.reg_convs:\n            normal_init(m.conv, std=0.01)\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.fcos_cls, std=0.01, bias=bias_cls)\n        normal_init(self.fcos_reg, std=0.01)\n        normal_init(self.fcos_centerness, std=0.01)\n\n    def forward(self, feats):\n        return multi_apply(self.forward_single, feats, self.scales)\n\n    def forward_single(self, x, scale):\n        cls_feat = x\n        reg_feat = x\n\n        for cls_layer in self.cls_convs:\n            cls_feat = cls_layer(cls_feat)\n        cls_score = self.fcos_cls(cls_feat)\n        centerness = self.fcos_centerness(cls_feat)\n\n        for reg_layer in self.reg_convs:\n            reg_feat = reg_layer(reg_feat)\n        # scale the bbox_pred of different level\n        bbox_pred = scale(self.fcos_reg(reg_feat)).exp()\n        return cls_score, bbox_pred, centerness\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             centernesses,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             cfg,\n             gt_bboxes_ignore=None):\n        assert len(cls_scores) == len(bbox_preds) == len(centernesses)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                           bbox_preds[0].device)\n        labels, bbox_targets = self.fcos_target(all_level_points, gt_bboxes,\n                                                gt_labels)\n\n        num_imgs = cls_scores[0].size(0)\n        # flatten cls_scores, bbox_preds and centerness\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_centerness = [\n            centerness.permute(0, 2, 3, 1).reshape(-1)\n            for centerness in centernesses\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_centerness = torch.cat(flatten_centerness)\n        flatten_labels = torch.cat(labels)\n        flatten_bbox_targets = torch.cat(bbox_targets)\n        # repeat points to align with bbox_preds\n        flatten_points = torch.cat(\n            [points.repeat(num_imgs, 1) for points in all_level_points])\n\n        pos_inds = flatten_labels.nonzero().reshape(-1)\n        num_pos = len(pos_inds)\n        loss_cls = sigmoid_focal_loss(\n            flatten_cls_scores, flatten_labels, cfg.gamma, cfg.alpha,\n            'none').sum()[None] / (num_pos + num_imgs)  # avoid num_pos is 0\n\n        pos_bbox_preds = flatten_bbox_preds[pos_inds]\n        pos_bbox_targets = flatten_bbox_targets[pos_inds]\n        pos_centerness = flatten_centerness[pos_inds]\n        pos_centerness_targets = self.centerness_target(pos_bbox_targets)\n\n        if num_pos > 0:\n            pos_points = flatten_points[pos_inds]\n            pos_decoded_bbox_preds = distance2bbox(pos_points, pos_bbox_preds)\n            pos_decoded_target_preds = distance2bbox(pos_points,\n                                                     pos_bbox_targets)\n            # centerness weighted iou loss\n            loss_reg = ((iou_loss(\n                pos_decoded_bbox_preds,\n                pos_decoded_target_preds,\n                reduction='none') * pos_centerness_targets).sum() /\n                        pos_centerness_targets.sum())[None]\n            loss_centerness = F.binary_cross_entropy_with_logits(\n                pos_centerness, pos_centerness_targets, reduction='mean')[None]\n        else:\n            loss_reg = pos_bbox_preds.sum()[None]\n            loss_centerness = pos_centerness.sum()[None]\n\n        return dict(\n            loss_cls=loss_cls,\n            loss_reg=loss_reg,\n            loss_centerness=loss_centerness)\n\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   centernesses,\n                   img_metas,\n                   cfg,\n                   rescale=None):\n        assert len(cls_scores) == len(bbox_preds)\n        num_levels = len(cls_scores)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,\n                                      bbox_preds[0].device)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            centerness_pred_list = [\n                centernesses[i][img_id].detach() for i in range(num_levels)\n            ]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            det_bboxes = self.get_bboxes_single(\n                cls_score_list, bbox_pred_list, centerness_pred_list,\n                mlvl_points, img_shape, scale_factor, cfg, rescale)\n            result_list.append(det_bboxes)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          centernesses,\n                          mlvl_points,\n                          img_shape,\n                          scale_factor,\n                          cfg,\n                          rescale=False):\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_centerness = []\n        for cls_score, bbox_pred, centerness, points in zip(\n                cls_scores, bbox_preds, centernesses, mlvl_points):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()\n\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                max_scores, _ = (scores * centerness[:, None]).max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                points = points[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                centerness = centerness[topk_inds]\n            bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_centerness.append(centerness)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)\n        mlvl_centerness = torch.cat(mlvl_centerness)\n        det_bboxes, det_labels = multiclass_nms(\n            mlvl_bboxes,\n            mlvl_scores,\n            cfg.score_thr,\n            cfg.nms,\n            cfg.max_per_img,\n            score_factors=mlvl_centerness)\n        return det_bboxes, det_labels\n\n    def get_points(self, featmap_sizes, dtype, device):\n        \"\"\"Get points according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            dtype (torch.dtype): Type of points.\n            device (torch.device): Device of points.\n\n        Returns:\n            tuple: points of each image.\n        \"\"\"\n        mlvl_points = []\n        for i in range(len(featmap_sizes)):\n            mlvl_points.append(\n                self.get_points_single(featmap_sizes[i], self.strides[i],\n                                       dtype, device))\n        return mlvl_points\n\n    def get_points_single(self, featmap_size, stride, dtype, device):\n        h, w = featmap_size\n        x_range = torch.arange(\n            0, w * stride, stride, dtype=dtype, device=device)\n        y_range = torch.arange(\n            0, h * stride, stride, dtype=dtype, device=device)\n        y, x = torch.meshgrid(y_range, x_range)\n        points = torch.stack(\n            (x.reshape(-1), y.reshape(-1)), dim=-1) + stride // 2\n        return points\n\n    def fcos_target(self, points, gt_bboxes_list, gt_labels_list):\n        assert len(points) == len(self.regress_ranges)\n        num_levels = len(points)\n        # expand regress ranges to align with points\n        expanded_regress_ranges = [\n            points[i].new_tensor(self.regress_ranges[i])[None].expand_as(\n                points[i]) for i in range(num_levels)\n        ]\n        # concat all levels points and regress ranges\n        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)\n        concat_points = torch.cat(points, dim=0)\n        # get labels and bbox_targets of each image\n        labels_list, bbox_targets_list = multi_apply(\n            self.fcos_target_single,\n            gt_bboxes_list,\n            gt_labels_list,\n            points=concat_points,\n            regress_ranges=concat_regress_ranges)\n\n        # split to per img, per level\n        num_points = [center.size(0) for center in points]\n        labels_list = [labels.split(num_points, 0) for labels in labels_list]\n        bbox_targets_list = [\n            bbox_targets.split(num_points, 0)\n            for bbox_targets in bbox_targets_list\n        ]\n\n        # concat per level image\n        concat_lvl_labels = []\n        concat_lvl_bbox_targets = []\n        for i in range(num_levels):\n            concat_lvl_labels.append(\n                torch.cat([labels[i] for labels in labels_list]))\n            concat_lvl_bbox_targets.append(\n                torch.cat(\n                    [bbox_targets[i] for bbox_targets in bbox_targets_list]))\n        return concat_lvl_labels, concat_lvl_bbox_targets\n\n    def fcos_target_single(self, gt_bboxes, gt_labels, points, regress_ranges):\n        num_points = points.size(0)\n        num_gts = gt_labels.size(0)\n\n        areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1) * (\n            gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1)\n        # TODO: figure out why these two are different\n        # areas = areas[None].expand(num_points, num_gts)\n        areas = areas[None].repeat(num_points, 1)\n        regress_ranges = regress_ranges[:, None, :].expand(\n            num_points, num_gts, 2)\n        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)\n        xs, ys = points[:, 0], points[:, 1]\n        xs = xs[:, None].expand(num_points, num_gts)\n        ys = ys[:, None].expand(num_points, num_gts)\n\n        left = xs - gt_bboxes[..., 0]\n        right = gt_bboxes[..., 2] - xs\n        top = ys - gt_bboxes[..., 1]\n        bottom = gt_bboxes[..., 3] - ys\n        bbox_targets = torch.stack((left, top, right, bottom), -1)\n\n        # condition1: inside a gt bbox\n        inside_gt_bbox_mask = bbox_targets.min(-1)[0] > 0\n\n        # condition2: limit the regression range for each location\n        max_regress_distance = bbox_targets.max(-1)[0]\n        inside_regress_range = (\n            max_regress_distance >= regress_ranges[..., 0]) & (\n                max_regress_distance <= regress_ranges[..., 1])\n\n        # if there are still more than one objects for a location,\n        # we choose the one with minimal area\n        areas[inside_gt_bbox_mask == 0] = INF\n        areas[inside_regress_range == 0] = INF\n        min_area, min_area_inds = areas.min(dim=1)\n\n        labels = gt_labels[min_area_inds]\n        labels[min_area == INF] = 0\n        bbox_targets = bbox_targets[range(num_points), min_area_inds]\n\n        return labels, bbox_targets\n\n    def centerness_target(self, pos_bbox_targets):\n        # only calculate pos centerness targets, otherwise there may be nan\n        left_right = pos_bbox_targets[:, [0, 2]]\n        top_bottom = pos_bbox_targets[:, [1, 3]]\n        centerness_targets = (\n            left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * (\n                top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])\n        return torch.sqrt(centerness_targets)\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/retina_head.py",
    "content": "import numpy as np\nimport torch.nn as nn\nfrom mmcv.cnn import normal_init\n\nfrom .anchor_head import AnchorHead\nfrom ..registry import HEADS\nfrom ..utils import bias_init_with_prob, ConvModule\n\n\n@HEADS.register_module\nclass RetinaHead(AnchorHead):\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 octave_base_scale=4,\n                 scales_per_octave=3,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 **kwargs):\n        self.stacked_convs = stacked_convs\n        self.octave_base_scale = octave_base_scale\n        self.scales_per_octave = scales_per_octave\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        octave_scales = np.array(\n            [2**(i / scales_per_octave) for i in range(scales_per_octave)])\n        anchor_scales = octave_scales * octave_base_scale\n        super(RetinaHead, self).__init__(\n            num_classes,\n            in_channels,\n            anchor_scales=anchor_scales,\n            use_sigmoid_cls=True,\n            cls_focal_loss=True,\n            **kwargs)\n\n    def _init_layers(self):\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.retina_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_anchors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.retina_reg = nn.Conv2d(\n            self.feat_channels, self.num_anchors * 4, 3, padding=1)\n\n    def init_weights(self):\n        for m in self.cls_convs:\n            normal_init(m.conv, std=0.01)\n        for m in self.reg_convs:\n            normal_init(m.conv, std=0.01)\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.retina_cls, std=0.01, bias=bias_cls)\n        normal_init(self.retina_reg, std=0.01)\n\n    def forward_single(self, x):\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.retina_cls(cls_feat)\n        bbox_pred = self.retina_reg(reg_feat)\n        return cls_score, bbox_pred\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/rpn_head.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import normal_init\n\nfrom mmdet.core import delta2bbox\nfrom mmdet.ops import nms\nfrom .anchor_head import AnchorHead\nfrom ..registry import HEADS\n\n\n@HEADS.register_module\nclass RPNHead(AnchorHead):\n\n    def __init__(self, in_channels, **kwargs):\n        super(RPNHead, self).__init__(2, in_channels, **kwargs)\n\n    def _init_layers(self):\n        self.rpn_conv = nn.Conv2d(\n            self.in_channels, self.feat_channels, 3, padding=1)\n        self.rpn_cls = nn.Conv2d(self.feat_channels,\n                                 self.num_anchors * self.cls_out_channels, 1)\n        self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)\n\n    def init_weights(self):\n        normal_init(self.rpn_conv, std=0.01)\n        normal_init(self.rpn_cls, std=0.01)\n        normal_init(self.rpn_reg, std=0.01)\n\n    def forward_single(self, x):\n        x = self.rpn_conv(x)\n        x = F.relu(x, inplace=True)\n        rpn_cls_score = self.rpn_cls(x)\n        rpn_bbox_pred = self.rpn_reg(x)\n        return rpn_cls_score, rpn_bbox_pred\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             img_metas,\n             cfg,\n             gt_bboxes_ignore=None):\n        losses = super(RPNHead, self).loss(\n            cls_scores,\n            bbox_preds,\n            gt_bboxes,\n            None,\n            img_metas,\n            cfg,\n            gt_bboxes_ignore=gt_bboxes_ignore)\n        return dict(\n            loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg'])\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          mlvl_anchors,\n                          img_shape,\n                          scale_factor,\n                          cfg,\n                          rescale=False):\n        mlvl_proposals = []\n        for idx in range(len(cls_scores)):\n            rpn_cls_score = cls_scores[idx]\n            rpn_bbox_pred = bbox_preds[idx]\n            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]\n            anchors = mlvl_anchors[idx]\n            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)\n            if self.use_sigmoid_cls:\n                rpn_cls_score = rpn_cls_score.reshape(-1)\n                scores = rpn_cls_score.sigmoid()\n            else:\n                rpn_cls_score = rpn_cls_score.reshape(-1, 2)\n                scores = rpn_cls_score.softmax(dim=1)[:, 1]\n            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:\n                _, topk_inds = scores.topk(cfg.nms_pre)\n                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]\n                anchors = anchors[topk_inds, :]\n                scores = scores[topk_inds]\n            proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,\n                                   self.target_stds, img_shape)\n            if cfg.min_bbox_size > 0:\n                w = proposals[:, 2] - proposals[:, 0] + 1\n                h = proposals[:, 3] - proposals[:, 1] + 1\n                valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &\n                                           (h >= cfg.min_bbox_size)).squeeze()\n                proposals = proposals[valid_inds, :]\n                scores = scores[valid_inds]\n            proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)\n            proposals, _ = nms(proposals, cfg.nms_thr)\n            proposals = proposals[:cfg.nms_post, :]\n            mlvl_proposals.append(proposals)\n        proposals = torch.cat(mlvl_proposals, 0)\n        if cfg.nms_across_levels:\n            proposals, _ = nms(proposals, cfg.nms_thr)\n            proposals = proposals[:cfg.max_num, :]\n        else:\n            scores = proposals[:, 4]\n            num = min(cfg.max_num, proposals.shape[0])\n            _, topk_inds = scores.topk(num)\n            proposals = proposals[topk_inds, :]\n        return proposals\n"
  },
  {
    "path": "mmdetection/mmdet/models/anchor_heads/ssd_head.py",
    "content": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\nfrom mmdet.core import (AnchorGenerator, anchor_target, weighted_smoothl1,\n                        multi_apply)\nfrom .anchor_head import AnchorHead\nfrom ..registry import HEADS\n\n\n@HEADS.register_module\nclass SSDHead(AnchorHead):\n\n    def __init__(self,\n                 input_size=300,\n                 num_classes=81,\n                 in_channels=(512, 1024, 512, 256, 256, 256),\n                 anchor_strides=(8, 16, 32, 64, 100, 300),\n                 basesize_ratio_range=(0.1, 0.9),\n                 anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),\n                 target_means=(.0, .0, .0, .0),\n                 target_stds=(1.0, 1.0, 1.0, 1.0)):\n        super(AnchorHead, self).__init__()\n        self.input_size = input_size\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.cls_out_channels = num_classes\n        num_anchors = [len(ratios) * 2 + 2 for ratios in anchor_ratios]\n        reg_convs = []\n        cls_convs = []\n        for i in range(len(in_channels)):\n            reg_convs.append(\n                nn.Conv2d(\n                    in_channels[i],\n                    num_anchors[i] * 4,\n                    kernel_size=3,\n                    padding=1))\n            cls_convs.append(\n                nn.Conv2d(\n                    in_channels[i],\n                    num_anchors[i] * num_classes,\n                    kernel_size=3,\n                    padding=1))\n        self.reg_convs = nn.ModuleList(reg_convs)\n        self.cls_convs = nn.ModuleList(cls_convs)\n\n        min_ratio, max_ratio = basesize_ratio_range\n        min_ratio = int(min_ratio * 100)\n        max_ratio = int(max_ratio * 100)\n        step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2))\n        min_sizes = []\n        max_sizes = []\n        for r in range(int(min_ratio), int(max_ratio) + 1, step):\n            min_sizes.append(int(input_size * r / 100))\n            max_sizes.append(int(input_size * (r + step) / 100))\n        if input_size == 300:\n            if basesize_ratio_range[0] == 0.15:  # SSD300 COCO\n                min_sizes.insert(0, int(input_size * 7 / 100))\n                max_sizes.insert(0, int(input_size * 15 / 100))\n            elif basesize_ratio_range[0] == 0.2:  # SSD300 VOC\n                min_sizes.insert(0, int(input_size * 10 / 100))\n                max_sizes.insert(0, int(input_size * 20 / 100))\n        elif input_size == 512:\n            if basesize_ratio_range[0] == 0.1:  # SSD512 COCO\n                min_sizes.insert(0, int(input_size * 4 / 100))\n                max_sizes.insert(0, int(input_size * 10 / 100))\n            elif basesize_ratio_range[0] == 0.15:  # SSD512 VOC\n                min_sizes.insert(0, int(input_size * 7 / 100))\n                max_sizes.insert(0, int(input_size * 15 / 100))\n        self.anchor_generators = []\n        self.anchor_strides = anchor_strides\n        for k in range(len(anchor_strides)):\n            base_size = min_sizes[k]\n            stride = anchor_strides[k]\n            ctr = ((stride - 1) / 2., (stride - 1) / 2.)\n            scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]\n            ratios = [1.]\n            for r in anchor_ratios[k]:\n                ratios += [1 / r, r]  # 4 or 6 ratio\n            anchor_generator = AnchorGenerator(\n                base_size, scales, ratios, scale_major=False, ctr=ctr)\n            indices = list(range(len(ratios)))\n            indices.insert(1, len(indices))\n            anchor_generator.base_anchors = torch.index_select(\n                anchor_generator.base_anchors, 0, torch.LongTensor(indices))\n            self.anchor_generators.append(anchor_generator)\n\n        self.target_means = target_means\n        self.target_stds = target_stds\n        self.use_sigmoid_cls = False\n        self.cls_focal_loss = False\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                xavier_init(m, distribution='uniform', bias=0)\n\n    def forward(self, feats):\n        cls_scores = []\n        bbox_preds = []\n        for feat, reg_conv, cls_conv in zip(feats, self.reg_convs,\n                                            self.cls_convs):\n            cls_scores.append(cls_conv(feat))\n            bbox_preds.append(reg_conv(feat))\n        return cls_scores, bbox_preds\n\n    def loss_single(self, cls_score, bbox_pred, labels, label_weights,\n                    bbox_targets, bbox_weights, num_total_samples, cfg):\n        loss_cls_all = F.cross_entropy(\n            cls_score, labels, reduction='none') * label_weights\n        pos_inds = (labels > 0).nonzero().view(-1)\n        neg_inds = (labels == 0).nonzero().view(-1)\n\n        num_pos_samples = pos_inds.size(0)\n        num_neg_samples = cfg.neg_pos_ratio * num_pos_samples\n        if num_neg_samples > neg_inds.size(0):\n            num_neg_samples = neg_inds.size(0)\n        topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples)\n        loss_cls_pos = loss_cls_all[pos_inds].sum()\n        loss_cls_neg = topk_loss_cls_neg.sum()\n        loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples\n\n        loss_reg = weighted_smoothl1(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            beta=cfg.smoothl1_beta,\n            avg_factor=num_total_samples)\n        return loss_cls[None], loss_reg\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             cfg,\n             gt_bboxes_ignore=None):\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == len(self.anchor_generators)\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas)\n        cls_reg_targets = anchor_target(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            self.target_means,\n            self.target_stds,\n            cfg,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=1,\n            sampling=False,\n            unmap_outputs=False)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n\n        num_images = len(img_metas)\n        all_cls_scores = torch.cat([\n            s.permute(0, 2, 3, 1).reshape(\n                num_images, -1, self.cls_out_channels) for s in cls_scores\n        ], 1)\n        all_labels = torch.cat(labels_list, -1).view(num_images, -1)\n        all_label_weights = torch.cat(label_weights_list, -1).view(\n            num_images, -1)\n        all_bbox_preds = torch.cat([\n            b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)\n            for b in bbox_preds\n        ], -2)\n        all_bbox_targets = torch.cat(bbox_targets_list, -2).view(\n            num_images, -1, 4)\n        all_bbox_weights = torch.cat(bbox_weights_list, -2).view(\n            num_images, -1, 4)\n\n        losses_cls, losses_reg = multi_apply(\n            self.loss_single,\n            all_cls_scores,\n            all_bbox_preds,\n            all_labels,\n            all_label_weights,\n            all_bbox_targets,\n            all_bbox_weights,\n            num_total_samples=num_total_pos,\n            cfg=cfg)\n        return dict(loss_cls=losses_cls, loss_reg=losses_reg)\n"
  },
  {
    "path": "mmdetection/mmdet/models/backbones/__init__.py",
    "content": "from .resnet import ResNet, make_res_layer\nfrom .resnext import ResNeXt\nfrom .ssd_vgg import SSDVGG\n\n__all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG']\n"
  },
  {
    "path": "mmdetection/mmdet/models/backbones/resnet.py",
    "content": "import logging\n\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmcv.cnn import constant_init, kaiming_init\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet.ops import DeformConv, ModulatedDeformConv\nfrom ..registry import BACKBONES\nfrom ..utils import build_conv_layer, build_norm_layer\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 dilation=1,\n                 downsample=None,\n                 style='pytorch',\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None):\n        super(BasicBlock, self).__init__()\n        assert dcn is None, \"Not implemented yet.\"\n\n        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)\n\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            inplanes,\n            planes,\n            3,\n            stride=stride,\n            padding=dilation,\n            dilation=dilation,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        self.conv2 = build_conv_layer(\n            conv_cfg,\n            planes,\n            planes,\n            3,\n            padding=1,\n            bias=False)\n        self.add_module(self.norm2_name, norm2)\n\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n        self.dilation = dilation\n        assert not with_cp\n\n    @property\n    def norm1(self):\n        return getattr(self, self.norm1_name)\n\n    @property\n    def norm2(self):\n        return getattr(self, self.norm2_name)\n\n    def forward(self, x):\n        identity = x\n\n        out = self.conv1(x)\n        out = self.norm1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.norm2(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 dilation=1,\n                 downsample=None,\n                 style='pytorch',\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None):\n        \"\"\"Bottleneck block for ResNet.\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer,\n        if it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottleneck, self).__init__()\n        assert style in ['pytorch', 'caffe']\n        assert dcn is None or isinstance(dcn, dict)\n        self.inplanes = inplanes\n        self.planes = planes\n        self.stride = stride\n        self.dilation = dilation\n        self.style = style\n        self.with_cp = with_cp\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.dcn = dcn\n        self.with_dcn = dcn is not None\n        if self.style == 'pytorch':\n            self.conv1_stride = 1\n            self.conv2_stride = stride\n        else:\n            self.conv1_stride = stride\n            self.conv2_stride = 1\n\n        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)\n        self.norm3_name, norm3 = build_norm_layer(\n            norm_cfg, planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            inplanes,\n            planes,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        fallback_on_stride = False\n        self.with_modulated_dcn = False\n        if self.with_dcn:\n            fallback_on_stride = dcn.get('fallback_on_stride', False)\n            self.with_modulated_dcn = dcn.get('modulated', False)\n        if not self.with_dcn or fallback_on_stride:\n            self.conv2 = build_conv_layer(\n                conv_cfg,\n                planes,\n                planes,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=dilation,\n                dilation=dilation,\n                bias=False)\n        else:\n            assert conv_cfg is None, 'conv_cfg must be None for DCN'\n            deformable_groups = dcn.get('deformable_groups', 1)\n            if not self.with_modulated_dcn:\n                conv_op = DeformConv\n                offset_channels = 18\n            else:\n                conv_op = ModulatedDeformConv\n                offset_channels = 27\n            self.conv2_offset = nn.Conv2d(\n                planes,\n                deformable_groups * offset_channels,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=dilation,\n                dilation=dilation)\n            self.conv2 = conv_op(\n                planes,\n                planes,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=dilation,\n                dilation=dilation,\n                deformable_groups=deformable_groups,\n                bias=False)\n        self.add_module(self.norm2_name, norm2)\n        self.conv3 = build_conv_layer(\n            conv_cfg,\n            planes,\n            planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    @property\n    def norm1(self):\n        return getattr(self, self.norm1_name)\n\n    @property\n    def norm2(self):\n        return getattr(self, self.norm2_name)\n\n    @property\n    def norm3(self):\n        return getattr(self, self.norm3_name)\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            identity = x\n\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            if not self.with_dcn:\n                out = self.conv2(out)\n            elif self.with_modulated_dcn:\n                offset_mask = self.conv2_offset(out)\n                offset = offset_mask[:, :18, :, :]\n                mask = offset_mask[:, -9:, :, :].sigmoid()\n                out = self.conv2(out, offset, mask)\n            else:\n                offset = self.conv2_offset(out)\n                out = self.conv2(out, offset)\n            out = self.norm2(out)\n            out = self.relu(out)\n\n            out = self.conv3(out)\n            out = self.norm3(out)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = self.relu(out)\n\n        return out\n\n\ndef make_res_layer(block,\n                   inplanes,\n                   planes,\n                   blocks,\n                   stride=1,\n                   dilation=1,\n                   style='pytorch',\n                   with_cp=False,\n                   conv_cfg=None,\n                   norm_cfg=dict(type='BN'),\n                   dcn=None):\n    downsample = None\n    if stride != 1 or inplanes != planes * block.expansion:\n        downsample = nn.Sequential(\n            build_conv_layer(\n                conv_cfg,\n                inplanes,\n                planes * block.expansion,\n                kernel_size=1,\n                stride=stride,\n                bias=False),\n            build_norm_layer(norm_cfg, planes * block.expansion)[1],\n        )\n\n    layers = []\n    layers.append(\n        block(\n            inplanes,\n            planes,\n            stride,\n            dilation,\n            downsample,\n            style=style,\n            with_cp=with_cp,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            dcn=dcn))\n    inplanes = planes * block.expansion\n    for i in range(1, blocks):\n        layers.append(\n            block(\n                inplanes,\n                planes,\n                1,\n                dilation,\n                style=style,\n                with_cp=with_cp,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                dcn=dcn))\n\n    return nn.Sequential(*layers)\n\n\n@BACKBONES.register_module\nclass ResNet(nn.Module):\n    \"\"\"ResNet backbone.\n\n    Args:\n        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.\n        num_stages (int): Resnet stages, normally 4.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\n            -1 means not freezing any parameters.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n    \"\"\"\n\n    arch_settings = {\n        18: (BasicBlock, (2, 2, 2, 2)),\n        34: (BasicBlock, (3, 4, 6, 3)),\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self,\n                 depth,\n                 num_stages=4,\n                 strides=(1, 2, 2, 2),\n                 dilations=(1, 1, 1, 1),\n                 out_indices=(0, 1, 2, 3),\n                 style='pytorch',\n                 frozen_stages=-1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 norm_eval=True,\n                 dcn=None,\n                 stage_with_dcn=(False, False, False, False),\n                 with_cp=False,\n                 zero_init_residual=True):\n        super(ResNet, self).__init__()\n        if depth not in self.arch_settings:\n            raise KeyError('invalid depth {} for resnet'.format(depth))\n        self.depth = depth\n        self.num_stages = num_stages\n        assert num_stages >= 1 and num_stages <= 4\n        self.strides = strides\n        self.dilations = dilations\n        assert len(strides) == len(dilations) == num_stages\n        self.out_indices = out_indices\n        assert max(out_indices) < num_stages\n        self.style = style\n        self.frozen_stages = frozen_stages\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.with_cp = with_cp\n        self.norm_eval = norm_eval\n        self.dcn = dcn\n        self.stage_with_dcn = stage_with_dcn\n        if dcn is not None:\n            assert len(stage_with_dcn) == num_stages\n        self.zero_init_residual = zero_init_residual\n        self.block, stage_blocks = self.arch_settings[depth]\n        self.stage_blocks = stage_blocks[:num_stages]\n        self.inplanes = 64\n\n        self._make_stem_layer()\n\n        self.res_layers = []\n        for i, num_blocks in enumerate(self.stage_blocks):\n            stride = strides[i]\n            dilation = dilations[i]\n            dcn = self.dcn if self.stage_with_dcn[i] else None\n            planes = 64 * 2**i\n            res_layer = make_res_layer(\n                self.block,\n                self.inplanes,\n                planes,\n                num_blocks,\n                stride=stride,\n                dilation=dilation,\n                style=self.style,\n                with_cp=with_cp,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                dcn=dcn)\n            self.inplanes = planes * self.block.expansion\n            layer_name = 'layer{}'.format(i + 1)\n            self.add_module(layer_name, res_layer)\n            self.res_layers.append(layer_name)\n\n        self._freeze_stages()\n\n        self.feat_dim = self.block.expansion * 64 * 2**(\n            len(self.stage_blocks) - 1)\n\n    @property\n    def norm1(self):\n        return getattr(self, self.norm1_name)\n\n    def _make_stem_layer(self):\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            3,\n            64,\n            kernel_size=7,\n            stride=2,\n            padding=3,\n            bias=False)\n        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)\n        self.add_module(self.norm1_name, norm1)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            self.norm1.eval()\n            for m in [self.conv1, self.norm1]:\n                for param in m.parameters():\n                    param.requires_grad = False\n\n        for i in range(1, self.frozen_stages + 1):\n            m = getattr(self, 'layer{}'.format(i))\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def init_weights(self, pretrained=None):\n        if isinstance(pretrained, str):\n            logger = logging.getLogger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n        elif pretrained is None:\n            for m in self.modules():\n                if isinstance(m, nn.Conv2d):\n                    kaiming_init(m)\n                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):\n                    constant_init(m, 1)\n\n            if self.dcn is not None:\n                for m in self.modules():\n                    if isinstance(m, Bottleneck) and hasattr(\n                            m, 'conv2_offset'):\n                        constant_init(m.conv2_offset, 0)\n\n            if self.zero_init_residual:\n                for m in self.modules():\n                    if isinstance(m, Bottleneck):\n                        constant_init(m.norm3, 0)\n                    elif isinstance(m, BasicBlock):\n                        constant_init(m.norm2, 0)\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.norm1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            x = res_layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n\n    def train(self, mode=True):\n        super(ResNet, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n"
  },
  {
    "path": "mmdetection/mmdet/models/backbones/resnext.py",
    "content": "import math\n\nimport torch.nn as nn\n\nfrom mmdet.ops import DeformConv, ModulatedDeformConv\nfrom .resnet import Bottleneck as _Bottleneck\nfrom .resnet import ResNet\nfrom ..registry import BACKBONES\nfrom ..utils import build_conv_layer, build_norm_layer\n\n\nclass Bottleneck(_Bottleneck):\n\n    def __init__(self, *args, groups=1, base_width=4, **kwargs):\n        \"\"\"Bottleneck block for ResNeXt.\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer,\n        if it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottleneck, self).__init__(*args, **kwargs)\n\n        if groups == 1:\n            width = self.planes\n        else:\n            width = math.floor(self.planes * (base_width / 64)) * groups\n\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, width, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(\n            self.norm_cfg, width, postfix=2)\n        self.norm3_name, norm3 = build_norm_layer(\n            self.norm_cfg, self.planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            self.inplanes,\n            width,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        fallback_on_stride = False\n        self.with_modulated_dcn = False\n        if self.with_dcn:\n            fallback_on_stride = self.dcn.get('fallback_on_stride', False)\n            self.with_modulated_dcn = self.dcn.get('modulated', False)\n        if not self.with_dcn or fallback_on_stride:\n            self.conv2 = build_conv_layer(\n                self.conv_cfg,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n        else:\n            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'\n            groups = self.dcn.get('groups', 1)\n            deformable_groups = self.dcn.get('deformable_groups', 1)\n            if not self.with_modulated_dcn:\n                conv_op = DeformConv\n                offset_channels = 18\n            else:\n                conv_op = ModulatedDeformConv\n                offset_channels = 27\n            self.conv2_offset = nn.Conv2d(\n                width,\n                deformable_groups * offset_channels,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation)\n            self.conv2 = conv_op(\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                deformable_groups=deformable_groups,\n                bias=False)\n        self.add_module(self.norm2_name, norm2)\n        self.conv3 = build_conv_layer(\n            self.conv_cfg,\n            width,\n            self.planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n\ndef make_res_layer(block,\n                   inplanes,\n                   planes,\n                   blocks,\n                   stride=1,\n                   dilation=1,\n                   groups=1,\n                   base_width=4,\n                   style='pytorch',\n                   with_cp=False,\n                   conv_cfg=None,\n                   norm_cfg=dict(type='BN'),\n                   dcn=None):\n    downsample = None\n    if stride != 1 or inplanes != planes * block.expansion:\n        downsample = nn.Sequential(\n            build_conv_layer(\n                conv_cfg,\n                inplanes,\n                planes * block.expansion,\n                kernel_size=1,\n                stride=stride,\n                bias=False),\n            build_norm_layer(norm_cfg, planes * block.expansion)[1],\n        )\n\n    layers = []\n    layers.append(\n        block(\n            inplanes,\n            planes,\n            stride=stride,\n            dilation=dilation,\n            downsample=downsample,\n            groups=groups,\n            base_width=base_width,\n            style=style,\n            with_cp=with_cp,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            dcn=dcn))\n    inplanes = planes * block.expansion\n    for i in range(1, blocks):\n        layers.append(\n            block(\n                inplanes,\n                planes,\n                stride=1,\n                dilation=dilation,\n                groups=groups,\n                base_width=base_width,\n                style=style,\n                with_cp=with_cp,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                dcn=dcn))\n\n    return nn.Sequential(*layers)\n\n\n@BACKBONES.register_module\nclass ResNeXt(ResNet):\n    \"\"\"ResNeXt backbone.\n\n    Args:\n        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.\n        num_stages (int): Resnet stages, normally 4.\n        groups (int): Group of resnext.\n        base_width (int): Base width of resnext.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (all param fixed). -1 means\n            not freezing any parameters.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self, groups=1, base_width=4, **kwargs):\n        super(ResNeXt, self).__init__(**kwargs)\n        self.groups = groups\n        self.base_width = base_width\n\n        self.inplanes = 64\n        self.res_layers = []\n        for i, num_blocks in enumerate(self.stage_blocks):\n            stride = self.strides[i]\n            dilation = self.dilations[i]\n            dcn = self.dcn if self.stage_with_dcn[i] else None\n            planes = 64 * 2**i\n            res_layer = make_res_layer(\n                self.block,\n                self.inplanes,\n                planes,\n                num_blocks,\n                stride=stride,\n                dilation=dilation,\n                groups=self.groups,\n                base_width=self.base_width,\n                style=self.style,\n                with_cp=self.with_cp,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg,\n                dcn=dcn)\n            self.inplanes = planes * self.block.expansion\n            layer_name = 'layer{}'.format(i + 1)\n            self.add_module(layer_name, res_layer)\n            self.res_layers.append(layer_name)\n\n        self._freeze_stages()\n"
  },
  {
    "path": "mmdetection/mmdet/models/backbones/ssd_vgg.py",
    "content": "import logging\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init,\n                      normal_init)\nfrom mmcv.runner import load_checkpoint\nfrom ..registry import BACKBONES\n\n\n@BACKBONES.register_module\nclass SSDVGG(VGG):\n    extra_setting = {\n        300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),\n        512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128),\n    }\n\n    def __init__(self,\n                 input_size,\n                 depth,\n                 with_last_pool=False,\n                 ceil_mode=True,\n                 out_indices=(3, 4),\n                 out_feature_indices=(22, 34),\n                 l2_norm_scale=20.):\n        super(SSDVGG, self).__init__(\n            depth,\n            with_last_pool=with_last_pool,\n            ceil_mode=ceil_mode,\n            out_indices=out_indices)\n        assert input_size in (300, 512)\n        self.input_size = input_size\n\n        self.features.add_module(\n            str(len(self.features)),\n            nn.MaxPool2d(kernel_size=3, stride=1, padding=1))\n        self.features.add_module(\n            str(len(self.features)),\n            nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6))\n        self.features.add_module(\n            str(len(self.features)), nn.ReLU(inplace=True))\n        self.features.add_module(\n            str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1))\n        self.features.add_module(\n            str(len(self.features)), nn.ReLU(inplace=True))\n        self.out_feature_indices = out_feature_indices\n\n        self.inplanes = 1024\n        self.extra = self._make_extra_layers(self.extra_setting[input_size])\n        self.l2_norm = L2Norm(\n            self.features[out_feature_indices[0] - 1].out_channels,\n            l2_norm_scale)\n\n    def init_weights(self, pretrained=None):\n        if isinstance(pretrained, str):\n            logger = logging.getLogger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n        elif pretrained is None:\n            for m in self.features.modules():\n                if isinstance(m, nn.Conv2d):\n                    kaiming_init(m)\n                elif isinstance(m, nn.BatchNorm2d):\n                    constant_init(m, 1)\n                elif isinstance(m, nn.Linear):\n                    normal_init(m, std=0.01)\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        for m in self.extra.modules():\n            if isinstance(m, nn.Conv2d):\n                xavier_init(m, distribution='uniform')\n\n        constant_init(self.l2_norm, self.l2_norm.scale)\n\n    def forward(self, x):\n        outs = []\n        for i, layer in enumerate(self.features):\n            x = layer(x)\n            if i in self.out_feature_indices:\n                outs.append(x)\n        for i, layer in enumerate(self.extra):\n            x = F.relu(layer(x), inplace=True)\n            if i % 2 == 1:\n                outs.append(x)\n        outs[0] = self.l2_norm(outs[0])\n        if len(outs) == 1:\n            return outs[0]\n        else:\n            return tuple(outs)\n\n    def _make_extra_layers(self, outplanes):\n        layers = []\n        kernel_sizes = (1, 3)\n        num_layers = 0\n        outplane = None\n        for i in range(len(outplanes)):\n            if self.inplanes == 'S':\n                self.inplanes = outplane\n                continue\n            k = kernel_sizes[num_layers % 2]\n            if outplanes[i] == 'S':\n                outplane = outplanes[i + 1]\n                conv = nn.Conv2d(\n                    self.inplanes, outplane, k, stride=2, padding=1)\n            else:\n                outplane = outplanes[i]\n                conv = nn.Conv2d(\n                    self.inplanes, outplane, k, stride=1, padding=0)\n            layers.append(conv)\n            self.inplanes = outplanes[i]\n            num_layers += 1\n        if self.input_size == 512:\n            layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1))\n\n        return nn.Sequential(*layers)\n\n\nclass L2Norm(nn.Module):\n\n    def __init__(self, n_dims, scale=20., eps=1e-10):\n        super(L2Norm, self).__init__()\n        self.n_dims = n_dims\n        self.weight = nn.Parameter(torch.Tensor(self.n_dims))\n        self.eps = eps\n        self.scale = scale\n\n    def forward(self, x):\n        norm = x.pow(2).sum(1, keepdim=True).sqrt() + self.eps\n        return self.weight[None, :, None, None].expand_as(x) * x / norm\n"
  },
  {
    "path": "mmdetection/mmdet/models/bbox_heads/__init__.py",
    "content": "from .bbox_head import BBoxHead\nfrom .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead\n\n__all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead']\n"
  },
  {
    "path": "mmdetection/mmdet/models/bbox_heads/bbox_head.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.core import (delta2bbox, multiclass_nms, bbox_target,\n                        weighted_cross_entropy, weighted_smoothl1, accuracy)\nfrom ..registry import HEADS\n\n\n@HEADS.register_module\nclass BBoxHead(nn.Module):\n    \"\"\"Simplest RoI head, with only two fc layers for classification and\n    regression respectively\"\"\"\n\n    def __init__(self,\n                 with_avg_pool=False,\n                 with_cls=True,\n                 with_reg=True,\n                 roi_feat_size=7,\n                 in_channels=256,\n                 num_classes=81,\n                 target_means=[0., 0., 0., 0.],\n                 target_stds=[0.1, 0.1, 0.2, 0.2],\n                 reg_class_agnostic=False):\n        super(BBoxHead, self).__init__()\n        assert with_cls or with_reg\n        self.with_avg_pool = with_avg_pool\n        self.with_cls = with_cls\n        self.with_reg = with_reg\n        self.roi_feat_size = roi_feat_size\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.target_means = target_means\n        self.target_stds = target_stds\n        self.reg_class_agnostic = reg_class_agnostic\n\n        in_channels = self.in_channels\n        if self.with_avg_pool:\n            self.avg_pool = nn.AvgPool2d(roi_feat_size)\n        else:\n            in_channels *= (self.roi_feat_size * self.roi_feat_size)\n        if self.with_cls:\n            self.fc_cls = nn.Linear(in_channels, num_classes)\n        if self.with_reg:\n            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes\n            self.fc_reg = nn.Linear(in_channels, out_dim_reg)\n        self.debug_imgs = None\n\n    def init_weights(self):\n        if self.with_cls:\n            nn.init.normal_(self.fc_cls.weight, 0, 0.01)\n            nn.init.constant_(self.fc_cls.bias, 0)\n        if self.with_reg:\n            nn.init.normal_(self.fc_reg.weight, 0, 0.001)\n            nn.init.constant_(self.fc_reg.bias, 0)\n\n    def forward(self, x):\n        if self.with_avg_pool:\n            x = self.avg_pool(x)\n        x = x.view(x.size(0), -1)\n        cls_score = self.fc_cls(x) if self.with_cls else None\n        bbox_pred = self.fc_reg(x) if self.with_reg else None\n        return cls_score, bbox_pred\n\n    def get_target(self, sampling_results, gt_bboxes, gt_labels,\n                   rcnn_train_cfg):\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        neg_proposals = [res.neg_bboxes for res in sampling_results]\n        pos_gt_bboxes = [res.pos_gt_bboxes for res in sampling_results]\n        pos_gt_labels = [res.pos_gt_labels for res in sampling_results]\n        reg_classes = 1 if self.reg_class_agnostic else self.num_classes\n        cls_reg_targets = bbox_target(\n            pos_proposals,\n            neg_proposals,\n            pos_gt_bboxes,\n            pos_gt_labels,\n            rcnn_train_cfg,\n            reg_classes,\n            target_means=self.target_means,\n            target_stds=self.target_stds)\n        return cls_reg_targets\n\n    def loss(self,\n             cls_score,\n             bbox_pred,\n             labels,\n             label_weights,\n             bbox_targets,\n             bbox_weights,\n             reduce=True):\n        losses = dict()\n        if cls_score is not None:\n            losses['loss_cls'] = weighted_cross_entropy(\n                cls_score, labels, label_weights, reduce=reduce)\n            losses['acc'] = accuracy(cls_score, labels)\n        if bbox_pred is not None:\n            pos_inds = labels > 0\n            if self.reg_class_agnostic:\n                pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds]\n            else:\n                pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1,\n                                               4)[pos_inds, labels[pos_inds]]\n            losses['loss_reg'] = weighted_smoothl1(\n                pos_bbox_pred,\n                bbox_targets[pos_inds],\n                bbox_weights[pos_inds],\n                avg_factor=bbox_targets.size(0))\n        return losses\n\n    def get_det_bboxes(self,\n                       rois,\n                       cls_score,\n                       bbox_pred,\n                       img_shape,\n                       scale_factor,\n                       rescale=False,\n                       cfg=None):\n        if isinstance(cls_score, list):\n            cls_score = sum(cls_score) / float(len(cls_score))\n        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None\n\n        if bbox_pred is not None:\n            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,\n                                self.target_stds, img_shape)\n        else:\n            bboxes = rois[:, 1:]\n            # TODO: add clip here\n\n        if rescale:\n            bboxes /= scale_factor\n\n        if cfg is None:\n            return bboxes, scores\n        else:\n            det_bboxes, det_labels = multiclass_nms(\n                bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img)\n\n            return det_bboxes, det_labels\n\n    def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):\n        \"\"\"Refine bboxes during training.\n\n        Args:\n            rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,\n                and bs is the sampled RoIs per image.\n            labels (Tensor): Shape (n*bs, ).\n            bbox_preds (Tensor): Shape (n*bs, 4) or (n*bs, 4*#class).\n            pos_is_gts (list[Tensor]): Flags indicating if each positive bbox\n                is a gt bbox.\n            img_metas (list[dict]): Meta info of each image.\n\n        Returns:\n            list[Tensor]: Refined bboxes of each image in a mini-batch.\n        \"\"\"\n        img_ids = rois[:, 0].long().unique(sorted=True)\n        assert img_ids.numel() == len(img_metas)\n\n        bboxes_list = []\n        for i in range(len(img_metas)):\n            inds = torch.nonzero(rois[:, 0] == i).squeeze()\n            num_rois = inds.numel()\n\n            bboxes_ = rois[inds, 1:]\n            label_ = labels[inds]\n            bbox_pred_ = bbox_preds[inds]\n            img_meta_ = img_metas[i]\n            pos_is_gts_ = pos_is_gts[i]\n\n            bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,\n                                           img_meta_)\n            # filter gt bboxes\n            pos_keep = 1 - pos_is_gts_\n            keep_inds = pos_is_gts_.new_ones(num_rois)\n            keep_inds[:len(pos_is_gts_)] = pos_keep\n\n            bboxes_list.append(bboxes[keep_inds])\n\n        return bboxes_list\n\n    def regress_by_class(self, rois, label, bbox_pred, img_meta):\n        \"\"\"Regress the bbox for the predicted class. Used in Cascade R-CNN.\n\n        Args:\n            rois (Tensor): shape (n, 4) or (n, 5)\n            label (Tensor): shape (n, )\n            bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4)\n            img_meta (dict): Image meta info.\n\n        Returns:\n            Tensor: Regressed bboxes, the same shape as input rois.\n        \"\"\"\n        assert rois.size(1) == 4 or rois.size(1) == 5\n\n        if not self.reg_class_agnostic:\n            label = label * 4\n            inds = torch.stack((label, label + 1, label + 2, label + 3), 1)\n            bbox_pred = torch.gather(bbox_pred, 1, inds)\n        assert bbox_pred.size(1) == 4\n\n        if rois.size(1) == 4:\n            new_rois = delta2bbox(rois, bbox_pred, self.target_means,\n                                  self.target_stds, img_meta['img_shape'])\n        else:\n            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,\n                                self.target_stds, img_meta['img_shape'])\n            new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)\n\n        return new_rois\n"
  },
  {
    "path": "mmdetection/mmdet/models/bbox_heads/convfc_bbox_head.py",
    "content": "import torch.nn as nn\n\nfrom .bbox_head import BBoxHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HEADS.register_module\nclass ConvFCBBoxHead(BBoxHead):\n    \"\"\"More general bbox head, with shared conv and fc layers and two optional\n    separated branches.\n\n                                /-> cls convs -> cls fcs -> cls\n    shared convs -> shared fcs\n                                \\-> reg convs -> reg fcs -> reg\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_shared_convs=0,\n                 num_shared_fcs=0,\n                 num_cls_convs=0,\n                 num_cls_fcs=0,\n                 num_reg_convs=0,\n                 num_reg_fcs=0,\n                 conv_out_channels=256,\n                 fc_out_channels=1024,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 *args,\n                 **kwargs):\n        super(ConvFCBBoxHead, self).__init__(*args, **kwargs)\n        assert (num_shared_convs + num_shared_fcs + num_cls_convs + num_cls_fcs\n                + num_reg_convs + num_reg_fcs > 0)\n        if num_cls_convs > 0 or num_reg_convs > 0:\n            assert num_shared_fcs == 0\n        if not self.with_cls:\n            assert num_cls_convs == 0 and num_cls_fcs == 0\n        if not self.with_reg:\n            assert num_reg_convs == 0 and num_reg_fcs == 0\n        self.num_shared_convs = num_shared_convs\n        self.num_shared_fcs = num_shared_fcs\n        self.num_cls_convs = num_cls_convs\n        self.num_cls_fcs = num_cls_fcs\n        self.num_reg_convs = num_reg_convs\n        self.num_reg_fcs = num_reg_fcs\n        self.conv_out_channels = conv_out_channels\n        self.fc_out_channels = fc_out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        # add shared convs and fcs\n        self.shared_convs, self.shared_fcs, last_layer_dim = \\\n            self._add_conv_fc_branch(\n                self.num_shared_convs, self.num_shared_fcs, self.in_channels,\n                True)\n        self.shared_out_channels = last_layer_dim\n\n        # add cls specific branch\n        self.cls_convs, self.cls_fcs, self.cls_last_dim = \\\n            self._add_conv_fc_branch(\n                self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)\n\n        # add reg specific branch\n        self.reg_convs, self.reg_fcs, self.reg_last_dim = \\\n            self._add_conv_fc_branch(\n                self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)\n\n        if self.num_shared_fcs == 0 and not self.with_avg_pool:\n            if self.num_cls_fcs == 0:\n                self.cls_last_dim *= (self.roi_feat_size * self.roi_feat_size)\n            if self.num_reg_fcs == 0:\n                self.reg_last_dim *= (self.roi_feat_size * self.roi_feat_size)\n\n        self.relu = nn.ReLU(inplace=True)\n        # reconstruct fc_cls and fc_reg since input channels are changed\n        if self.with_cls:\n            self.fc_cls = nn.Linear(self.cls_last_dim, self.num_classes)\n        if self.with_reg:\n            out_dim_reg = (4 if self.reg_class_agnostic else\n                           4 * self.num_classes)\n            self.fc_reg = nn.Linear(self.reg_last_dim, out_dim_reg)\n\n    def _add_conv_fc_branch(self,\n                            num_branch_convs,\n                            num_branch_fcs,\n                            in_channels,\n                            is_shared=False):\n        \"\"\"Add shared or separable branch\n\n        convs -> avg pool (optional) -> fcs\n        \"\"\"\n        last_layer_dim = in_channels\n        # add branch specific conv layers\n        branch_convs = nn.ModuleList()\n        if num_branch_convs > 0:\n            for i in range(num_branch_convs):\n                conv_in_channels = (last_layer_dim\n                                    if i == 0 else self.conv_out_channels)\n                branch_convs.append(\n                    ConvModule(\n                        conv_in_channels,\n                        self.conv_out_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg))\n            last_layer_dim = self.conv_out_channels\n        # add branch specific fc layers\n        branch_fcs = nn.ModuleList()\n        if num_branch_fcs > 0:\n            # for shared branch, only consider self.with_avg_pool\n            # for separated branches, also consider self.num_shared_fcs\n            if (is_shared\n                    or self.num_shared_fcs == 0) and not self.with_avg_pool:\n                last_layer_dim *= (self.roi_feat_size * self.roi_feat_size)\n            for i in range(num_branch_fcs):\n                fc_in_channels = (last_layer_dim\n                                  if i == 0 else self.fc_out_channels)\n                branch_fcs.append(\n                    nn.Linear(fc_in_channels, self.fc_out_channels))\n            last_layer_dim = self.fc_out_channels\n        return branch_convs, branch_fcs, last_layer_dim\n\n    def init_weights(self):\n        super(ConvFCBBoxHead, self).init_weights()\n        for module_list in [self.shared_fcs, self.cls_fcs, self.reg_fcs]:\n            for m in module_list.modules():\n                if isinstance(m, nn.Linear):\n                    nn.init.xavier_uniform_(m.weight)\n                    nn.init.constant_(m.bias, 0)\n\n    def forward(self, x):\n        # shared part\n        if self.num_shared_convs > 0:\n            for conv in self.shared_convs:\n                x = conv(x)\n\n        if self.num_shared_fcs > 0:\n            if self.with_avg_pool:\n                x = self.avg_pool(x)\n            x = x.view(x.size(0), -1)\n            for fc in self.shared_fcs:\n                x = self.relu(fc(x))\n        # separate branches\n        x_cls = x\n        x_reg = x\n\n        for conv in self.cls_convs:\n            x_cls = conv(x_cls)\n        if x_cls.dim() > 2:\n            if self.with_avg_pool:\n                x_cls = self.avg_pool(x_cls)\n            x_cls = x_cls.view(x_cls.size(0), -1)\n        for fc in self.cls_fcs:\n            x_cls = self.relu(fc(x_cls))\n\n        for conv in self.reg_convs:\n            x_reg = conv(x_reg)\n        if x_reg.dim() > 2:\n            if self.with_avg_pool:\n                x_reg = self.avg_pool(x_reg)\n            x_reg = x_reg.view(x_reg.size(0), -1)\n        for fc in self.reg_fcs:\n            x_reg = self.relu(fc(x_reg))\n\n        cls_score = self.fc_cls(x_cls) if self.with_cls else None\n        bbox_pred = self.fc_reg(x_reg) if self.with_reg else None\n        return cls_score, bbox_pred\n\n\n@HEADS.register_module\nclass SharedFCBBoxHead(ConvFCBBoxHead):\n\n    def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):\n        assert num_fcs >= 1\n        super(SharedFCBBoxHead, self).__init__(\n            num_shared_convs=0,\n            num_shared_fcs=num_fcs,\n            num_cls_convs=0,\n            num_cls_fcs=0,\n            num_reg_convs=0,\n            num_reg_fcs=0,\n            fc_out_channels=fc_out_channels,\n            *args,\n            **kwargs)\n"
  },
  {
    "path": "mmdetection/mmdet/models/builder.py",
    "content": "import mmcv\nfrom torch import nn\n\nfrom .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,\n                       DETECTORS)\n\n\ndef _build_module(cfg, registry, default_args):\n    assert isinstance(cfg, dict) and 'type' in cfg\n    assert isinstance(default_args, dict) or default_args is None\n    args = cfg.copy()\n    obj_type = args.pop('type')\n    if mmcv.is_str(obj_type):\n        if obj_type not in registry.module_dict:\n            raise KeyError('{} is not in the {} registry'.format(\n                obj_type, registry.name))\n        obj_type = registry.module_dict[obj_type]\n    elif not isinstance(obj_type, type):\n        raise TypeError('type must be a str or valid type, but got {}'.format(\n            type(obj_type)))\n    if default_args is not None:\n        for name, value in default_args.items():\n            args.setdefault(name, value)\n    return obj_type(**args)\n\n\ndef build(cfg, registry, default_args=None):\n    if isinstance(cfg, list):\n        modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]\n        return nn.Sequential(*modules)\n    else:\n        return _build_module(cfg, registry, default_args)\n\n\ndef build_backbone(cfg):\n    return build(cfg, BACKBONES)\n\n\ndef build_neck(cfg):\n    return build(cfg, NECKS)\n\n\ndef build_roi_extractor(cfg):\n    return build(cfg, ROI_EXTRACTORS)\n\n\ndef build_shared_head(cfg):\n    return build(cfg, SHARED_HEADS)\n\n\ndef build_head(cfg):\n    return build(cfg, HEADS)\n\n\ndef build_detector(cfg, train_cfg=None, test_cfg=None):\n    return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/__init__.py",
    "content": "from .base import BaseDetector\nfrom .single_stage import SingleStageDetector\nfrom .two_stage import TwoStageDetector\nfrom .rpn import RPN\nfrom .fast_rcnn import FastRCNN\nfrom .faster_rcnn import FasterRCNN\nfrom .mask_rcnn import MaskRCNN\nfrom .cascade_rcnn import CascadeRCNN\nfrom .htc import HybridTaskCascade\nfrom .retinanet import RetinaNet\nfrom .fcos import FCOS\n\n__all__ = [\n    'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',\n    'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',\n    'RetinaNet', 'FCOS'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/base.py",
    "content": "import logging\nfrom abc import ABCMeta, abstractmethod\n\nimport mmcv\nimport numpy as np\nimport torch.nn as nn\nimport pycocotools.mask as maskUtils\n\nfrom mmdet.core import tensor2imgs, get_classes\n\n\nclass BaseDetector(nn.Module):\n    \"\"\"Base class for detectors\"\"\"\n\n    __metaclass__ = ABCMeta\n\n    def __init__(self):\n        super(BaseDetector, self).__init__()\n\n    @property\n    def with_neck(self):\n        return hasattr(self, 'neck') and self.neck is not None\n\n    @property\n    def with_shared_head(self):\n        return hasattr(self, 'shared_head') and self.shared_head is not None\n\n    @property\n    def with_bbox(self):\n        return hasattr(self, 'bbox_head') and self.bbox_head is not None\n\n    @property\n    def with_mask(self):\n        return hasattr(self, 'mask_head') and self.mask_head is not None\n\n    @abstractmethod\n    def extract_feat(self, imgs):\n        pass\n\n    def extract_feats(self, imgs):\n        assert isinstance(imgs, list)\n        for img in imgs:\n            yield self.extract_feat(img)\n\n    @abstractmethod\n    def forward_train(self, imgs, img_metas, **kwargs):\n        pass\n\n    @abstractmethod\n    def simple_test(self, img, img_meta, **kwargs):\n        pass\n\n    @abstractmethod\n    def aug_test(self, imgs, img_metas, **kwargs):\n        pass\n\n    def init_weights(self, pretrained=None):\n        if pretrained is not None:\n            logger = logging.getLogger()\n            logger.info('load model from: {}'.format(pretrained))\n\n    def forward_test(self, imgs, img_metas, **kwargs):\n        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError('{} must be a list, but got {}'.format(\n                    name, type(var)))\n\n        num_augs = len(imgs)\n        if num_augs != len(img_metas):\n            raise ValueError(\n                'num of augmentations ({}) != num of image meta ({})'.format(\n                    len(imgs), len(img_metas)))\n        # TODO: remove the restriction of imgs_per_gpu == 1 when prepared\n        imgs_per_gpu = imgs[0].size(0)\n        assert imgs_per_gpu == 1\n\n        if num_augs == 1:\n            return self.simple_test(imgs[0], img_metas[0], **kwargs)\n        else:\n            return self.aug_test(imgs, img_metas, **kwargs)\n\n    def forward(self, img, img_meta, return_loss=True, **kwargs):\n        if return_loss:\n            return self.forward_train(img, img_meta, **kwargs)\n        else:\n            return self.forward_test(img, img_meta, **kwargs)\n\n    def show_result(self,\n                    data,\n                    result,\n                    img_norm_cfg,\n                    dataset=None,\n                    score_thr=0.3):\n        if isinstance(result, tuple):\n            bbox_result, segm_result = result\n        else:\n            bbox_result, segm_result = result, None\n\n        img_tensor = data['img'][0]\n        img_metas = data['img_meta'][0].data[0]\n        imgs = tensor2imgs(img_tensor, **img_norm_cfg)\n        assert len(imgs) == len(img_metas)\n\n        if dataset is None:\n            class_names = self.CLASSES\n        elif isinstance(dataset, str):\n            class_names = get_classes(dataset)\n        elif isinstance(dataset, (list, tuple)):\n            class_names = dataset\n        else:\n            raise TypeError(\n                'dataset must be a valid dataset name or a sequence'\n                ' of class names, not {}'.format(type(dataset)))\n\n        for img, img_meta in zip(imgs, img_metas):\n            h, w, _ = img_meta['img_shape']\n            img_show = img[:h, :w, :]\n\n            bboxes = np.vstack(bbox_result)\n            # draw segmentation masks\n            if segm_result is not None:\n                segms = mmcv.concat_list(segm_result)\n                inds = np.where(bboxes[:, -1] > score_thr)[0]\n                for i in inds:\n                    color_mask = np.random.randint(\n                        0, 256, (1, 3), dtype=np.uint8)\n                    mask = maskUtils.decode(segms[i]).astype(np.bool)\n                    img_show[mask] = img_show[mask] * 0.5 + color_mask * 0.5\n            # draw bounding boxes\n            labels = [\n                np.full(bbox.shape[0], i, dtype=np.int32)\n                for i, bbox in enumerate(bbox_result)\n            ]\n            labels = np.concatenate(labels)\n            mmcv.imshow_det_bboxes(\n                img_show,\n                bboxes,\n                labels,\n                class_names=class_names,\n                score_thr=score_thr)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/cascade_rcnn.py",
    "content": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet.core import (build_assigner, bbox2roi, bbox2result, build_sampler,\n                        merge_aug_masks)\n\n\n@DETECTORS.register_module\nclass CascadeRCNN(BaseDetector, RPNTestMixin):\n\n    def __init__(self,\n                 num_stages,\n                 backbone,\n                 neck=None,\n                 shared_head=None,\n                 rpn_head=None,\n                 bbox_roi_extractor=None,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        assert bbox_roi_extractor is not None\n        assert bbox_head is not None\n        super(CascadeRCNN, self).__init__()\n\n        self.num_stages = num_stages\n        self.backbone = builder.build_backbone(backbone)\n\n        if neck is not None:\n            self.neck = builder.build_neck(neck)\n\n        if rpn_head is not None:\n            self.rpn_head = builder.build_head(rpn_head)\n\n        if shared_head is not None:\n            self.shared_head = builder.build_shared_head(shared_head)\n\n        if bbox_head is not None:\n            self.bbox_roi_extractor = nn.ModuleList()\n            self.bbox_head = nn.ModuleList()\n            if not isinstance(bbox_roi_extractor, list):\n                bbox_roi_extractor = [\n                    bbox_roi_extractor for _ in range(num_stages)\n                ]\n            if not isinstance(bbox_head, list):\n                bbox_head = [bbox_head for _ in range(num_stages)]\n            assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages\n            for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):\n                self.bbox_roi_extractor.append(\n                    builder.build_roi_extractor(roi_extractor))\n                self.bbox_head.append(builder.build_head(head))\n\n        if mask_head is not None:\n            self.mask_head = nn.ModuleList()\n            if not isinstance(mask_head, list):\n                mask_head = [mask_head for _ in range(num_stages)]\n            assert len(mask_head) == self.num_stages\n            for head in mask_head:\n                self.mask_head.append(builder.build_head(head))\n            if mask_roi_extractor is not None:\n                self.share_roi_extractor = False\n                self.mask_roi_extractor = nn.ModuleList()\n                if not isinstance(mask_roi_extractor, list):\n                    mask_roi_extractor = [\n                        mask_roi_extractor for _ in range(num_stages)\n                    ]\n                assert len(mask_roi_extractor) == self.num_stages\n                for roi_extractor in mask_roi_extractor:\n                    self.mask_roi_extractor.append(\n                        builder.build_roi_extractor(roi_extractor))\n            else:\n                self.share_roi_extractor = True\n                self.mask_roi_extractor = self.bbox_roi_extractor\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        self.init_weights(pretrained=pretrained)\n\n    @property\n    def with_rpn(self):\n        return hasattr(self, 'rpn_head') and self.rpn_head is not None\n\n    def init_weights(self, pretrained=None):\n        super(CascadeRCNN, self).init_weights(pretrained)\n        self.backbone.init_weights(pretrained=pretrained)\n        if self.with_neck:\n            if isinstance(self.neck, nn.Sequential):\n                for m in self.neck:\n                    m.init_weights()\n            else:\n                self.neck.init_weights()\n        if self.with_rpn:\n            self.rpn_head.init_weights()\n        if self.with_shared_head:\n            self.shared_head.init_weights(pretrained=pretrained)\n        for i in range(self.num_stages):\n            if self.with_bbox:\n                self.bbox_roi_extractor[i].init_weights()\n                self.bbox_head[i].init_weights()\n            if self.with_mask:\n                if not self.share_roi_extractor:\n                    self.mask_roi_extractor[i].init_weights()\n                self.mask_head[i].init_weights()\n\n    def extract_feat(self, img):\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_train(self,\n                      img,\n                      img_meta,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None):\n        x = self.extract_feat(img)\n\n        losses = dict()\n\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(x)\n            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,\n                                          self.train_cfg.rpn)\n            rpn_losses = self.rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        for i in range(self.num_stages):\n            self.current_stage = i\n            rcnn_train_cfg = self.train_cfg.rcnn[i]\n            lw = self.train_cfg.stage_loss_weights[i]\n\n            # assign gts and sample proposals\n            sampling_results = []\n            if self.with_bbox or self.with_mask:\n                bbox_assigner = build_assigner(rcnn_train_cfg.assigner)\n                bbox_sampler = build_sampler(\n                    rcnn_train_cfg.sampler, context=self)\n                num_imgs = img.size(0)\n                if gt_bboxes_ignore is None:\n                    gt_bboxes_ignore = [None for _ in range(num_imgs)]\n\n                for j in range(num_imgs):\n                    assign_result = bbox_assigner.assign(\n                        proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],\n                        gt_labels[j])\n                    sampling_result = bbox_sampler.sample(\n                        assign_result,\n                        proposal_list[j],\n                        gt_bboxes[j],\n                        gt_labels[j],\n                        feats=[lvl_feat[j][None] for lvl_feat in x])\n                    sampling_results.append(sampling_result)\n\n            # bbox head forward and loss\n            bbox_roi_extractor = self.bbox_roi_extractor[i]\n            bbox_head = self.bbox_head[i]\n\n            rois = bbox2roi([res.bboxes for res in sampling_results])\n            bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],\n                                            rois)\n            if self.with_shared_head:\n                bbox_feats = self.shared_head(bbox_feats)\n            cls_score, bbox_pred = bbox_head(bbox_feats)\n\n            bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,\n                                                gt_labels, rcnn_train_cfg)\n            loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)\n            for name, value in loss_bbox.items():\n                losses['s{}.{}'.format(\n                    i, name)] = (value * lw if 'loss' in name else value)\n\n            # mask head forward and loss\n            if self.with_mask:\n                if not self.share_roi_extractor:\n                    mask_roi_extractor = self.mask_roi_extractor[i]\n                    pos_rois = bbox2roi(\n                        [res.pos_bboxes for res in sampling_results])\n                    mask_feats = mask_roi_extractor(\n                        x[:mask_roi_extractor.num_inputs], pos_rois)\n                    if self.with_shared_head:\n                        mask_feats = self.shared_head(mask_feats)\n                else:\n                    # reuse positive bbox feats\n                    pos_inds = []\n                    device = bbox_feats.device\n                    for res in sampling_results:\n                        pos_inds.append(\n                            torch.ones(\n                                res.pos_bboxes.shape[0],\n                                device=device,\n                                dtype=torch.uint8))\n                        pos_inds.append(\n                            torch.zeros(\n                                res.neg_bboxes.shape[0],\n                                device=device,\n                                dtype=torch.uint8))\n                    pos_inds = torch.cat(pos_inds)\n                    mask_feats = bbox_feats[pos_inds]\n                mask_head = self.mask_head[i]\n                mask_pred = mask_head(mask_feats)\n                mask_targets = mask_head.get_target(sampling_results, gt_masks,\n                                                    rcnn_train_cfg)\n                pos_labels = torch.cat(\n                    [res.pos_gt_labels for res in sampling_results])\n                loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)\n                for name, value in loss_mask.items():\n                    losses['s{}.{}'.format(\n                        i, name)] = (value * lw if 'loss' in name else value)\n\n            # refine bboxes\n            if i < self.num_stages - 1:\n                pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                roi_labels = bbox_targets[0]  # bbox_targets is a tuple\n                with torch.no_grad():\n                    proposal_list = bbox_head.refine_bboxes(\n                        rois, roi_labels, bbox_pred, pos_is_gts, img_meta)\n\n        return losses\n\n    def simple_test(self, img, img_meta, proposals=None, rescale=False):\n        x = self.extract_feat(img)\n        proposal_list = self.simple_test_rpn(\n            x, img_meta, self.test_cfg.rpn) if proposals is None else proposals\n\n        img_shape = img_meta[0]['img_shape']\n        ori_shape = img_meta[0]['ori_shape']\n        scale_factor = img_meta[0]['scale_factor']\n\n        # \"ms\" in variable names means multi-stage\n        ms_bbox_result = {}\n        ms_segm_result = {}\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg.rcnn\n\n        rois = bbox2roi(proposal_list)\n        for i in range(self.num_stages):\n            bbox_roi_extractor = self.bbox_roi_extractor[i]\n            bbox_head = self.bbox_head[i]\n\n            bbox_feats = bbox_roi_extractor(\n                x[:len(bbox_roi_extractor.featmap_strides)], rois)\n            if self.with_shared_head:\n                bbox_feats = self.shared_head(bbox_feats)\n\n            cls_score, bbox_pred = bbox_head(bbox_feats)\n            ms_scores.append(cls_score)\n\n            if self.test_cfg.keep_all_stages:\n                det_bboxes, det_labels = bbox_head.get_det_bboxes(\n                    rois,\n                    cls_score,\n                    bbox_pred,\n                    img_shape,\n                    scale_factor,\n                    rescale=rescale,\n                    cfg=rcnn_test_cfg)\n                bbox_result = bbox2result(det_bboxes, det_labels,\n                                          bbox_head.num_classes)\n                ms_bbox_result['stage{}'.format(i)] = bbox_result\n\n                if self.with_mask:\n                    mask_roi_extractor = self.mask_roi_extractor[i]\n                    mask_head = self.mask_head[i]\n                    if det_bboxes.shape[0] == 0:\n                        segm_result = [\n                            [] for _ in range(mask_head.num_classes - 1)\n                        ]\n                    else:\n                        _bboxes = (det_bboxes[:, :4] * scale_factor\n                                   if rescale else det_bboxes)\n                        mask_rois = bbox2roi([_bboxes])\n                        mask_feats = mask_roi_extractor(\n                            x[:len(mask_roi_extractor.featmap_strides)],\n                            mask_rois)\n                        if self.with_shared_head:\n                            mask_feats = self.shared_head(mask_feats, i)\n                        mask_pred = mask_head(mask_feats)\n                        segm_result = mask_head.get_seg_masks(\n                            mask_pred, _bboxes, det_labels, rcnn_test_cfg,\n                            ori_shape, scale_factor, rescale)\n                    ms_segm_result['stage{}'.format(i)] = segm_result\n\n            if i < self.num_stages - 1:\n                bbox_label = cls_score.argmax(dim=1)\n                rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,\n                                                  img_meta[0])\n\n        cls_score = sum(ms_scores) / self.num_stages\n        det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(\n            rois,\n            cls_score,\n            bbox_pred,\n            img_shape,\n            scale_factor,\n            rescale=rescale,\n            cfg=rcnn_test_cfg)\n        bbox_result = bbox2result(det_bboxes, det_labels,\n                                  self.bbox_head[-1].num_classes)\n        ms_bbox_result['ensemble'] = bbox_result\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [\n                    [] for _ in range(self.mask_head[-1].num_classes - 1)\n                ]\n            else:\n                _bboxes = (det_bboxes[:, :4] * scale_factor\n                           if rescale else det_bboxes)\n                mask_rois = bbox2roi([_bboxes])\n                aug_masks = []\n                for i in range(self.num_stages):\n                    mask_roi_extractor = self.mask_roi_extractor[i]\n                    mask_feats = mask_roi_extractor(\n                        x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n                    if self.with_shared_head:\n                        mask_feats = self.shared_head(mask_feats)\n                    mask_pred = self.mask_head[i](mask_feats)\n                    aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                merged_masks = merge_aug_masks(aug_masks,\n                                               [img_meta] * self.num_stages,\n                                               self.test_cfg.rcnn)\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    merged_masks, _bboxes, det_labels, rcnn_test_cfg,\n                    ori_shape, scale_factor, rescale)\n            ms_segm_result['ensemble'] = segm_result\n\n        if not self.test_cfg.keep_all_stages:\n            if self.with_mask:\n                results = (ms_bbox_result['ensemble'],\n                           ms_segm_result['ensemble'])\n            else:\n                results = ms_bbox_result['ensemble']\n        else:\n            if self.with_mask:\n                results = {\n                    stage: (ms_bbox_result[stage], ms_segm_result[stage])\n                    for stage in ms_bbox_result\n                }\n            else:\n                results = ms_bbox_result\n\n        return results\n\n    def aug_test(self, img, img_meta, proposals=None, rescale=False):\n        raise NotImplementedError\n\n    def show_result(self, data, result, img_norm_cfg, **kwargs):\n        if self.with_mask:\n            ms_bbox_result, ms_segm_result = result\n            if isinstance(ms_bbox_result, dict):\n                result = (ms_bbox_result['ensemble'],\n                          ms_segm_result['ensemble'])\n        else:\n            if isinstance(result, dict):\n                result = result['ensemble']\n        super(CascadeRCNN, self).show_result(data, result, img_norm_cfg,\n                                             **kwargs)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/ensemble_htc.py",
    "content": "from torch import nn\nfrom mmdet.core import (bbox2result, bbox_mapping)\nfrom mmdet.core import (bbox2roi, merge_aug_masks, merge_aug_bboxes, multiclass_nms, merge_aug_proposals)\nfrom mmdet.models.detectors import BaseDetector\n\n\nclass EnsembleHTC(BaseDetector):\n    def __init__(self, models):\n        super().__init__()\n        self.models = nn.ModuleList(models)\n\n    def simple_test(self, img, img_meta, **kwargs):\n        pass\n\n    def forward_train(self, imgs, img_metas, **kwargs):\n        pass\n\n    def extract_feat(self, imgs):\n        pass\n\n    def aug_test(self, imgs, img_metas, **kwargs):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        rpn_test_cfg = self.models[0].test_cfg.rpn\n        imgs_per_gpu = len(img_metas[0])\n        aug_proposals = [[] for _ in range(imgs_per_gpu)]\n        for model in self.models:\n            # recompute feats to save memory\n            for x, img_meta in zip(model.extract_feats(imgs), img_metas):\n                proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg)\n                for i, proposals in enumerate(proposal_list):\n                    aug_proposals[i].append(proposals)\n        # after merging, proposals will be rescaled to the original image size\n        proposal_list = [\n            merge_aug_proposals(proposals, img_meta, rpn_test_cfg)\n            for proposals, img_meta in zip(aug_proposals, img_metas)\n        ]\n\n        rcnn_test_cfg = self.models[0].test_cfg.rcnn\n        aug_bboxes = []\n        aug_scores = []\n        aug_img_metas = []\n        for model in self.models:\n            for x, img_meta in zip(model.extract_feats(imgs), img_metas):\n                # only one image in the batch\n                img_shape = img_meta[0]['img_shape']\n                scale_factor = img_meta[0]['scale_factor']\n                flip = img_meta[0]['flip']\n\n                proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                         scale_factor, flip)\n                # \"ms\" in variable names means multi-stage\n                ms_scores = []\n\n                rois = bbox2roi([proposals])\n                for i in range(model.num_stages):\n                    bbox_head = model.bbox_head[i]\n                    cls_score, bbox_pred = model._bbox_forward_test(i, x, rois)\n                    ms_scores.append(cls_score)\n\n                    if i < model.num_stages - 1:\n                        bbox_label = cls_score.argmax(dim=1)\n                        rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,\n                                                          img_meta[0])\n\n                cls_score = sum(ms_scores) / float(len(ms_scores))\n                bboxes, scores = model.bbox_head[-1].get_det_bboxes(\n                    rois,\n                    cls_score,\n                    bbox_pred,\n                    img_shape,\n                    scale_factor,\n                    rescale=False,\n                    cfg=None)\n                aug_bboxes.append(bboxes)\n                aug_scores.append(scores)\n                aug_img_metas.append(img_meta)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(\n            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,\n            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)\n\n        bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes)\n\n        if self.models[0].with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [[] for _ in range(self.models[0].mask_head[-1].num_classes - 1)]\n            else:\n                aug_masks = []\n                aug_img_metas = []\n                for model in self.models:\n                    for x, img_meta in zip(model.extract_feats(imgs), img_metas):\n                        img_shape = img_meta[0]['img_shape']\n                        scale_factor = img_meta[0]['scale_factor']\n                        flip = img_meta[0]['flip']\n                        _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                               scale_factor, flip)\n                        mask_rois = bbox2roi([_bboxes])\n                        mask_roi_extractor = model.mask_roi_extractor[-1]\n                        mask_feats = mask_roi_extractor(\n                            x[:len(mask_roi_extractor.featmap_strides)],\n                            mask_rois)\n                        last_feat = None\n                        for i in range(model.num_stages):\n                            mask_head = model.mask_head[i]\n                            if model.mask_info_flow:\n                                mask_pred, last_feat = mask_head(mask_feats, last_feat)\n                            else:\n                                mask_pred = mask_head(mask_feats)\n                            aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                            aug_img_metas.append(img_meta)\n                merged_masks = merge_aug_masks(aug_masks, aug_img_metas, rcnn_test_cfg)\n\n                ori_shape = img_metas[0][0]['ori_shape']\n                segm_result = self.models[0].mask_head[-1].get_seg_masks(\n                    merged_masks, det_bboxes, det_labels, rcnn_test_cfg,\n                    ori_shape, scale_factor=1.0, rescale=False)\n            return bbox_result, segm_result\n        else:\n            return bbox_result\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/fast_rcnn.py",
    "content": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FastRCNN(TwoStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 bbox_roi_extractor,\n                 bbox_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 shared_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 pretrained=None):\n        super(FastRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            shared_head=shared_head,\n            bbox_roi_extractor=bbox_roi_extractor,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            mask_roi_extractor=mask_roi_extractor,\n            mask_head=mask_head,\n            pretrained=pretrained)\n\n    def forward_test(self, imgs, img_metas, proposals, **kwargs):\n        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError('{} must be a list, but got {}'.format(\n                    name, type(var)))\n\n        num_augs = len(imgs)\n        if num_augs != len(img_metas):\n            raise ValueError(\n                'num of augmentations ({}) != num of image meta ({})'.format(\n                    len(imgs), len(img_metas)))\n        # TODO: remove the restriction of imgs_per_gpu == 1 when prepared\n        imgs_per_gpu = imgs[0].size(0)\n        assert imgs_per_gpu == 1\n\n        if num_augs == 1:\n            return self.simple_test(imgs[0], img_metas[0], proposals[0],\n                                    **kwargs)\n        else:\n            return self.aug_test(imgs, img_metas, proposals, **kwargs)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/faster_rcnn.py",
    "content": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FasterRCNN(TwoStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 bbox_roi_extractor,\n                 bbox_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 shared_head=None,\n                 pretrained=None):\n        super(FasterRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            shared_head=shared_head,\n            rpn_head=rpn_head,\n            bbox_roi_extractor=bbox_roi_extractor,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/fcos.py",
    "content": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass FCOS(SingleStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/htc.py",
    "content": "import torch\nimport torch.nn.functional as F\n\nfrom mmdet.core import (bbox2result, build_assigner, build_sampler,\n                        bbox_mapping)\nfrom mmdet.core import (bbox2roi, merge_aug_masks, merge_aug_bboxes, multiclass_nms)\nfrom .cascade_rcnn import CascadeRCNN\nfrom .test_mixins import RPNTestMixin\nfrom .. import builder\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass HybridTaskCascade(CascadeRCNN, RPNTestMixin):\n\n    def __init__(self,\n                 num_stages,\n                 backbone,\n                 semantic_roi_extractor=None,\n                 semantic_head=None,\n                 semantic_fusion=('bbox', 'mask'),\n                 interleaved=True,\n                 mask_info_flow=True,\n                 **kwargs):\n        super(HybridTaskCascade, self).__init__(num_stages, backbone, **kwargs)\n        assert self.with_bbox and self.with_mask\n        assert not self.with_shared_head  # shared head not supported\n        if semantic_head is not None:\n            self.semantic_roi_extractor = builder.build_roi_extractor(\n                semantic_roi_extractor)\n            self.semantic_head = builder.build_head(semantic_head)\n\n        self.semantic_fusion = semantic_fusion\n        self.interleaved = interleaved\n        self.mask_info_flow = mask_info_flow\n\n    @property\n    def with_semantic(self):\n        if hasattr(self, 'semantic_head') and self.semantic_head is not None:\n            return True\n        else:\n            return False\n\n    def _bbox_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_bboxes,\n                            gt_labels,\n                            rcnn_train_cfg,\n                            semantic_feat=None):\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],\n                                        rois)\n        # semantic feature fusion\n        # element-wise sum for original features and pooled semantic features\n        if self.with_semantic and 'bbox' in self.semantic_fusion:\n            bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             rois)\n            if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:\n                bbox_semantic_feat = F.adaptive_avg_pool2d(\n                    bbox_semantic_feat, bbox_feats.shape[-2:])\n            bbox_feats += bbox_semantic_feat\n\n        cls_score, bbox_pred = bbox_head(bbox_feats)\n\n        bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,\n                                            gt_labels, rcnn_train_cfg)\n        loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)\n        return loss_bbox, rois, bbox_targets, bbox_pred\n\n    def _mask_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_masks,\n                            rcnn_train_cfg,\n                            semantic_feat=None):\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n        mask_feats = mask_roi_extractor(x[:mask_roi_extractor.num_inputs],\n                                        pos_rois)\n\n        # semantic feature fusion\n        # element-wise sum for original features and pooled semantic features\n        if self.with_semantic and 'mask' in self.semantic_fusion:\n            mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             pos_rois)\n            if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:\n                mask_semantic_feat = F.adaptive_avg_pool2d(\n                    mask_semantic_feat, mask_feats.shape[-2:])\n            mask_feats += mask_semantic_feat\n\n        # mask information flow\n        # forward all previous mask heads to obtain last_feat, and fuse it\n        # with the normal mask feature\n        if self.mask_info_flow:\n            last_feat = None\n            for i in range(stage):\n                last_feat = self.mask_head[i](\n                    mask_feats, last_feat, return_logits=False)\n            mask_pred = mask_head(mask_feats, last_feat, return_feat=False)\n        else:\n            mask_pred = mask_head(mask_feats)\n\n        mask_targets = mask_head.get_target(sampling_results, gt_masks,\n                                            rcnn_train_cfg)\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)\n        return loss_mask\n\n    def _bbox_forward_test(self, stage, x, rois, semantic_feat=None):\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(\n            x[:len(bbox_roi_extractor.featmap_strides)], rois)\n        if self.with_semantic and 'bbox' in self.semantic_fusion:\n            bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             rois)\n            if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:\n                bbox_semantic_feat = F.adaptive_avg_pool2d(\n                    bbox_semantic_feat, bbox_feats.shape[-2:])\n            bbox_feats += bbox_semantic_feat\n        cls_score, bbox_pred = bbox_head(bbox_feats)\n        return cls_score, bbox_pred\n\n    def _mask_forward_test(self, stage, x, bboxes, semantic_feat=None):\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        mask_rois = bbox2roi([bboxes])\n        mask_feats = mask_roi_extractor(\n            x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n        if self.with_semantic and 'mask' in self.semantic_fusion:\n            mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             mask_rois)\n            if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:\n                mask_semantic_feat = F.adaptive_avg_pool2d(\n                    mask_semantic_feat, mask_feats.shape[-2:])\n            mask_feats += mask_semantic_feat\n        if self.mask_info_flow:\n            last_feat = None\n            last_pred = None\n            for i in range(stage):\n                mask_pred, last_feat = self.mask_head[i](mask_feats, last_feat)\n                if last_pred is not None:\n                    mask_pred = mask_pred + last_pred\n                last_pred = mask_pred\n            mask_pred = mask_head(mask_feats, last_feat, return_feat=False)\n            if last_pred is not None:\n                mask_pred = mask_pred + last_pred\n        else:\n            mask_pred = mask_head(mask_feats)\n        return mask_pred\n\n    def forward_train(self,\n                      img,\n                      img_meta,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      gt_semantic_seg=None,\n                      proposals=None):\n        x = self.extract_feat(img)\n\n        losses = dict()\n\n        # RPN part, the same as normal two-stage detectors\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(x)\n            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,\n                                          self.train_cfg.rpn)\n            rpn_losses = self.rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        # semantic segmentation part\n        # 2 outputs: segmentation prediction and embedded features\n        if self.with_semantic:\n            semantic_pred, semantic_feat = self.semantic_head(x)\n            loss_seg = self.semantic_head.loss(semantic_pred, gt_semantic_seg)\n            losses['loss_semantic_seg'] = loss_seg\n        else:\n            semantic_feat = None\n\n        for i in range(self.num_stages):\n            self.current_stage = i\n            rcnn_train_cfg = self.train_cfg.rcnn[i]\n            lw = self.train_cfg.stage_loss_weights[i]\n\n            # assign gts and sample proposals\n            sampling_results = []\n            bbox_assigner = build_assigner(rcnn_train_cfg.assigner)\n            bbox_sampler = build_sampler(rcnn_train_cfg.sampler, context=self)\n            num_imgs = img.size(0)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n\n            for j in range(num_imgs):\n                assign_result = bbox_assigner.assign(\n                    proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],\n                    gt_labels[j])\n                sampling_result = bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[j],\n                    gt_bboxes[j],\n                    gt_labels[j],\n                    feats=[lvl_feat[j][None] for lvl_feat in x])\n                sampling_results.append(sampling_result)\n\n            # bbox head forward and loss\n            loss_bbox, rois, bbox_targets, bbox_pred = \\\n                self._bbox_forward_train(\n                    i, x, sampling_results, gt_bboxes, gt_labels,\n                    rcnn_train_cfg, semantic_feat)\n            roi_labels = bbox_targets[0]\n\n            for name, value in loss_bbox.items():\n                losses['s{}.{}'.format(\n                    i, name)] = (value * lw if 'loss' in name else value)\n\n            # mask head forward and loss\n            if self.with_mask:\n                # interleaved execution: use regressed bboxes by the box branch\n                # to train the mask branch\n                if self.interleaved:\n                    pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                    with torch.no_grad():\n                        proposal_list = self.bbox_head[i].refine_bboxes(\n                            rois, roi_labels, bbox_pred, pos_is_gts, img_meta)\n                        # re-assign and sample 512 RoIs from 512 RoIs\n                        sampling_results = []\n                        for j in range(num_imgs):\n                            assign_result = bbox_assigner.assign(\n                                proposal_list[j], gt_bboxes[j],\n                                gt_bboxes_ignore[j], gt_labels[j])\n                            sampling_result = bbox_sampler.sample(\n                                assign_result,\n                                proposal_list[j],\n                                gt_bboxes[j],\n                                gt_labels[j],\n                                feats=[lvl_feat[j][None] for lvl_feat in x])\n                            sampling_results.append(sampling_result)\n                loss_mask = self._mask_forward_train(i, x, sampling_results,\n                                                     gt_masks, rcnn_train_cfg,\n                                                     semantic_feat)\n                for name, value in loss_mask.items():\n                    losses['s{}.{}'.format(\n                        i, name)] = (value * lw if 'loss' in name else value)\n\n            # refine bboxes (same as Cascade R-CNN)\n            if i < self.num_stages - 1 and not self.interleaved:\n                pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                with torch.no_grad():\n                    proposal_list = self.bbox_head[i].refine_bboxes(\n                        rois, roi_labels, bbox_pred, pos_is_gts, img_meta)\n\n        return losses\n\n    def simple_test(self, img, img_meta, proposals=None, rescale=False):\n        x = self.extract_feat(img)\n        proposal_list = self.simple_test_rpn(\n            x, img_meta, self.test_cfg.rpn) if proposals is None else proposals\n\n        if self.with_semantic:\n            _, semantic_feat = self.semantic_head(x)\n        else:\n            semantic_feat = None\n\n        img_shape = img_meta[0]['img_shape']\n        ori_shape = img_meta[0]['ori_shape']\n        scale_factor = img_meta[0]['scale_factor']\n\n        # \"ms\" in variable names means multi-stage\n        ms_bbox_result = {}\n        ms_segm_result = {}\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg.rcnn\n\n        rois = bbox2roi(proposal_list)\n        for i in range(self.num_stages):\n            bbox_head = self.bbox_head[i]\n            cls_score, bbox_pred = self._bbox_forward_test(\n                i, x, rois, semantic_feat=semantic_feat)\n            ms_scores.append(cls_score)\n\n            if self.test_cfg.keep_all_stages:\n                det_bboxes, det_labels = bbox_head.get_det_bboxes(\n                    rois,\n                    cls_score,\n                    bbox_pred,\n                    img_shape,\n                    scale_factor,\n                    rescale=rescale,\n                    nms_cfg=rcnn_test_cfg)\n                bbox_result = bbox2result(det_bboxes, det_labels,\n                                          bbox_head.num_classes)\n                ms_bbox_result['stage{}'.format(i)] = bbox_result\n\n                if self.with_mask:\n                    mask_head = self.mask_head[i]\n                    if det_bboxes.shape[0] == 0:\n                        segm_result = [\n                            [] for _ in range(mask_head.num_classes - 1)\n                        ]\n                    else:\n                        _bboxes = (det_bboxes[:, :4] * scale_factor\n                                   if rescale else det_bboxes)\n                        mask_pred = self._mask_forward_test(\n                            i, x, _bboxes, semantic_feat=semantic_feat)\n                        segm_result = mask_head.get_seg_masks(\n                            mask_pred, _bboxes, det_labels, rcnn_test_cfg,\n                            ori_shape, scale_factor, rescale)\n                    ms_segm_result['stage{}'.format(i)] = segm_result\n\n            if i < self.num_stages - 1:\n                bbox_label = cls_score.argmax(dim=1)\n                rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,\n                                                  img_meta[0])\n\n        cls_score = sum(ms_scores) / float(len(ms_scores))\n        det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(\n            rois,\n            cls_score,\n            bbox_pred,\n            img_shape,\n            scale_factor,\n            rescale=rescale,\n            cfg=rcnn_test_cfg)\n        bbox_result = bbox2result(det_bboxes, det_labels,\n                                  self.bbox_head[-1].num_classes)\n        ms_bbox_result['ensemble'] = bbox_result\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [\n                    [] for _ in range(self.mask_head[-1].num_classes - 1)\n                ]\n            else:\n                _bboxes = (det_bboxes[:, :4] * scale_factor\n                           if rescale else det_bboxes)\n\n                mask_rois = bbox2roi([_bboxes])\n                aug_masks = []\n                mask_roi_extractor = self.mask_roi_extractor[-1]\n                mask_feats = mask_roi_extractor(\n                    x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n                if self.with_semantic and 'mask' in self.semantic_fusion:\n                    mask_semantic_feat = self.semantic_roi_extractor(\n                        [semantic_feat], mask_rois)\n                    mask_feats += mask_semantic_feat\n                last_feat = None\n                for i in range(self.num_stages):\n                    mask_head = self.mask_head[i]\n                    if self.mask_info_flow:\n                        mask_pred, last_feat = mask_head(mask_feats, last_feat)\n                    else:\n                        mask_pred = mask_head(mask_feats)\n                    aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                merged_masks = merge_aug_masks(aug_masks,\n                                               [img_meta] * self.num_stages,\n                                               self.test_cfg.rcnn)\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    merged_masks, _bboxes, det_labels, rcnn_test_cfg,\n                    ori_shape, scale_factor, rescale)\n            ms_segm_result['ensemble'] = segm_result\n\n        if not self.test_cfg.keep_all_stages:\n            if self.with_mask:\n                results = (ms_bbox_result['ensemble'],\n                           ms_segm_result['ensemble'])\n            else:\n                results = ms_bbox_result['ensemble']\n        else:\n            if self.with_mask:\n                results = {\n                    stage: (ms_bbox_result[stage], ms_segm_result[stage])\n                    for stage in ms_bbox_result\n                }\n            else:\n                results = ms_bbox_result\n\n        return results\n\n    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        # recompute feats to save memory\n        proposal_list = self.aug_test_rpn(\n            self.extract_feats(imgs), img_metas, self.test_cfg.rpn)\n\n        rcnn_test_cfg = self.test_cfg.rcnn\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta in zip(self.extract_feats(imgs), img_metas):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip)\n            # \"ms\" in variable names means multi-stage\n            ms_scores = []\n\n            rois = bbox2roi([proposals])\n            for i in range(self.num_stages):\n                bbox_head = self.bbox_head[i]\n                cls_score, bbox_pred = self._bbox_forward_test(i, x, rois)\n                ms_scores.append(cls_score)\n\n                if i < self.num_stages - 1:\n                    bbox_label = cls_score.argmax(dim=1)\n                    rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,\n                                                      img_meta[0])\n\n            cls_score = sum(ms_scores) / float(len(ms_scores))\n            bboxes, scores = self.bbox_head[-1].get_det_bboxes(\n                rois,\n                cls_score,\n                bbox_pred,\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(\n            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,\n            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)\n\n        bbox_result = bbox2result(det_bboxes, det_labels,\n                                  self.bbox_head[-1].num_classes)\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)]\n            else:\n                aug_masks = []\n                aug_img_metas = []\n                for x, img_meta in zip(self.extract_feats(imgs), img_metas):\n                    img_shape = img_meta[0]['img_shape']\n                    scale_factor = img_meta[0]['scale_factor']\n                    flip = img_meta[0]['flip']\n                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                           scale_factor, flip)\n                    mask_rois = bbox2roi([_bboxes])\n                    mask_roi_extractor = self.mask_roi_extractor[-1]\n                    mask_feats = mask_roi_extractor(\n                        x[:len(mask_roi_extractor.featmap_strides)],\n                        mask_rois)\n                    last_feat = None\n                    for i in range(self.num_stages):\n                        mask_head = self.mask_head[i]\n                        if self.mask_info_flow:\n                            mask_pred, last_feat = mask_head(mask_feats, last_feat)\n                        else:\n                            mask_pred = mask_head(mask_feats)\n                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                        aug_img_metas.append(img_meta)\n                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,\n                                               self.test_cfg.rcnn)\n\n                ori_shape = img_metas[0][0]['ori_shape']\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    merged_masks, det_bboxes, det_labels, rcnn_test_cfg,\n                    ori_shape, scale_factor=1.0, rescale=False)\n            return bbox_result, segm_result\n        else:\n            return bbox_result\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/mask_rcnn.py",
    "content": "from .two_stage import TwoStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass MaskRCNN(TwoStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 bbox_roi_extractor,\n                 bbox_head,\n                 mask_roi_extractor,\n                 mask_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 shared_head=None,\n                 pretrained=None):\n        super(MaskRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            shared_head=shared_head,\n            rpn_head=rpn_head,\n            bbox_roi_extractor=bbox_roi_extractor,\n            bbox_head=bbox_head,\n            mask_roi_extractor=mask_roi_extractor,\n            mask_head=mask_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/retinanet.py",
    "content": "from .single_stage import SingleStageDetector\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass RetinaNet(SingleStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                        test_cfg, pretrained)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/rpn.py",
    "content": "import mmcv\n\nfrom mmdet.core import tensor2imgs, bbox_mapping\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin\nfrom .. import builder\nfrom ..registry import DETECTORS\n\n\n@DETECTORS.register_module\nclass RPN(BaseDetector, RPNTestMixin):\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 rpn_head,\n                 train_cfg,\n                 test_cfg,\n                 pretrained=None):\n        super(RPN, self).__init__()\n        self.backbone = builder.build_backbone(backbone)\n        self.neck = builder.build_neck(neck) if neck is not None else None\n        self.rpn_head = builder.build_head(rpn_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.init_weights(pretrained=pretrained)\n\n    def init_weights(self, pretrained=None):\n        super(RPN, self).init_weights(pretrained)\n        self.backbone.init_weights(pretrained=pretrained)\n        if self.with_neck:\n            self.neck.init_weights()\n        self.rpn_head.init_weights()\n\n    def extract_feat(self, img):\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_train(self,\n                      img,\n                      img_meta,\n                      gt_bboxes=None,\n                      gt_bboxes_ignore=None):\n        if self.train_cfg.rpn.get('debug', False):\n            self.rpn_head.debug_imgs = tensor2imgs(img)\n\n        x = self.extract_feat(img)\n        rpn_outs = self.rpn_head(x)\n\n        rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)\n        losses = self.rpn_head.loss(\n            *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, img, img_meta, rescale=False):\n        x = self.extract_feat(img)\n        proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)\n        if rescale:\n            for proposals, meta in zip(proposal_list, img_meta):\n                proposals[:, :4] /= meta['scale_factor']\n        # TODO: remove this restriction\n        return proposal_list[0].cpu().numpy()\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        proposal_list = self.aug_test_rpn(\n            self.extract_feats(imgs), img_metas, self.test_cfg.rpn)\n        if not rescale:\n            for proposals, img_meta in zip(proposal_list, img_metas[0]):\n                img_shape = img_meta['img_shape']\n                scale_factor = img_meta['scale_factor']\n                flip = img_meta['flip']\n                proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,\n                                                scale_factor, flip)\n        # TODO: remove this restriction\n        return proposal_list[0].cpu().numpy()\n\n    def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20):\n        \"\"\"Show RPN proposals on the image.\n\n        Although we assume batch size is 1, this method supports arbitrary\n        batch size.\n        \"\"\"\n        img_tensor = data['img'][0]\n        img_metas = data['img_meta'][0].data[0]\n        imgs = tensor2imgs(img_tensor, **img_norm_cfg)\n        assert len(imgs) == len(img_metas)\n        for img, img_meta in zip(imgs, img_metas):\n            h, w, _ = img_meta['img_shape']\n            img_show = img[:h, :w, :]\n            mmcv.imshow_bboxes(img_show, result, top_k=top_k)\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/single_stage.py",
    "content": "import torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet.core import bbox2result\n\n\n@DETECTORS.register_module\nclass SingleStageDetector(BaseDetector):\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(SingleStageDetector, self).__init__()\n        self.backbone = builder.build_backbone(backbone)\n        if neck is not None:\n            self.neck = builder.build_neck(neck)\n        self.bbox_head = builder.build_head(bbox_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.init_weights(pretrained=pretrained)\n\n    def init_weights(self, pretrained=None):\n        super(SingleStageDetector, self).init_weights(pretrained)\n        self.backbone.init_weights(pretrained=pretrained)\n        if self.with_neck:\n            if isinstance(self.neck, nn.Sequential):\n                for m in self.neck:\n                    m.init_weights()\n            else:\n                self.neck.init_weights()\n        self.bbox_head.init_weights()\n\n    def extract_feat(self, img):\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None):\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x)\n        loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)\n        losses = self.bbox_head.loss(\n            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, img, img_meta, rescale=False):\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x)\n        bbox_inputs = outs + (img_meta, self.test_cfg, rescale)\n        bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)\n        bbox_results = [\n            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)\n            for det_bboxes, det_labels in bbox_list\n        ]\n        return bbox_results[0]\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        raise NotImplementedError\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/test_mixins.py",
    "content": "from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,\n                        merge_aug_bboxes, merge_aug_masks, multiclass_nms)\n\n\nclass RPNTestMixin(object):\n\n    def simple_test_rpn(self, x, img_meta, rpn_test_cfg):\n        rpn_outs = self.rpn_head(x)\n        proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)\n        proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        return proposal_list\n\n    def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):\n        imgs_per_gpu = len(img_metas[0])\n        aug_proposals = [[] for _ in range(imgs_per_gpu)]\n        for x, img_meta in zip(feats, img_metas):\n            proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)\n            for i, proposals in enumerate(proposal_list):\n                aug_proposals[i].append(proposals)\n        # after merging, proposals will be rescaled to the original image size\n        merged_proposals = [\n            merge_aug_proposals(proposals, img_meta, rpn_test_cfg)\n            for proposals, img_meta in zip(aug_proposals, img_metas)\n        ]\n        return merged_proposals\n\n\nclass BBoxTestMixin(object):\n\n    def simple_test_bboxes(self,\n                           x,\n                           img_meta,\n                           proposals,\n                           rcnn_test_cfg,\n                           rescale=False):\n        \"\"\"Test only det bboxes without augmentation.\"\"\"\n        rois = bbox2roi(proposals)\n        roi_feats = self.bbox_roi_extractor(\n            x[:len(self.bbox_roi_extractor.featmap_strides)], rois)\n        if self.with_shared_head:\n            roi_feats = self.shared_head(roi_feats)\n        cls_score, bbox_pred = self.bbox_head(roi_feats)\n        img_shape = img_meta[0]['img_shape']\n        scale_factor = img_meta[0]['scale_factor']\n        det_bboxes, det_labels = self.bbox_head.get_det_bboxes(\n            rois,\n            cls_score,\n            bbox_pred,\n            img_shape,\n            scale_factor,\n            rescale=rescale,\n            cfg=rcnn_test_cfg)\n        return det_bboxes, det_labels\n\n    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta in zip(feats, img_metas):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n            # TODO more flexible\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip)\n            rois = bbox2roi([proposals])\n            # recompute feature maps to save GPU memory\n            roi_feats = self.bbox_roi_extractor(\n                x[:len(self.bbox_roi_extractor.featmap_strides)], rois)\n            if self.with_shared_head:\n                roi_feats = self.shared_head(roi_feats)\n            cls_score, bbox_pred = self.bbox_head(roi_feats)\n            bboxes, scores = self.bbox_head.get_det_bboxes(\n                rois,\n                cls_score,\n                bbox_pred,\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(\n            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,\n            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)\n        return det_bboxes, det_labels\n\n\nclass MaskTestMixin(object):\n\n    def simple_test_mask(self,\n                         x,\n                         img_meta,\n                         det_bboxes,\n                         det_labels,\n                         rescale=False):\n        # image shape of the first image in the batch (only one)\n        ori_shape = img_meta[0]['ori_shape']\n        scale_factor = img_meta[0]['scale_factor']\n        if det_bboxes.shape[0] == 0:\n            segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]\n        else:\n            # if det_bboxes is rescaled to the original image size, we need to\n            # rescale it back to the testing scale to obtain RoIs.\n            _bboxes = (det_bboxes[:, :4] * scale_factor\n                       if rescale else det_bboxes)\n            mask_rois = bbox2roi([_bboxes])\n            mask_feats = self.mask_roi_extractor(\n                x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)\n            if self.with_shared_head:\n                mask_feats = self.shared_head(mask_feats)\n            mask_pred = self.mask_head(mask_feats)\n            segm_result = self.mask_head.get_seg_masks(\n                mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape,\n                scale_factor, rescale)\n        return segm_result\n\n    def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):\n        if det_bboxes.shape[0] == 0:\n            segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]\n        else:\n            aug_masks = []\n            for x, img_meta in zip(feats, img_metas):\n                img_shape = img_meta[0]['img_shape']\n                scale_factor = img_meta[0]['scale_factor']\n                flip = img_meta[0]['flip']\n                _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                       scale_factor, flip)\n                mask_rois = bbox2roi([_bboxes])\n                mask_feats = self.mask_roi_extractor(\n                    x[:len(self.mask_roi_extractor.featmap_strides)],\n                    mask_rois)\n                if self.with_shared_head:\n                    mask_feats = self.shared_head(mask_feats)\n                mask_pred = self.mask_head(mask_feats)\n                # convert to numpy array to save memory\n                aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n            merged_masks = merge_aug_masks(aug_masks, img_metas,\n                                           self.test_cfg.rcnn)\n\n            ori_shape = img_metas[0][0]['ori_shape']\n            segm_result = self.mask_head.get_seg_masks(\n                merged_masks,\n                det_bboxes,\n                det_labels,\n                self.test_cfg.rcnn,\n                ori_shape,\n                scale_factor=1.0,\n                rescale=False)\n        return segm_result\n"
  },
  {
    "path": "mmdetection/mmdet/models/detectors/two_stage.py",
    "content": "import torch\nimport torch.nn as nn\n\nfrom .base import BaseDetector\nfrom .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin\nfrom .. import builder\nfrom ..registry import DETECTORS\nfrom mmdet.core import bbox2roi, bbox2result, build_assigner, build_sampler\n\n\n@DETECTORS.register_module\nclass TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,\n                       MaskTestMixin):\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 shared_head=None,\n                 rpn_head=None,\n                 bbox_roi_extractor=None,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(TwoStageDetector, self).__init__()\n        self.backbone = builder.build_backbone(backbone)\n\n        if neck is not None:\n            self.neck = builder.build_neck(neck)\n\n        if shared_head is not None:\n            self.shared_head = builder.build_shared_head(shared_head)\n\n        if rpn_head is not None:\n            self.rpn_head = builder.build_head(rpn_head)\n\n        if bbox_head is not None:\n            self.bbox_roi_extractor = builder.build_roi_extractor(\n                bbox_roi_extractor)\n            self.bbox_head = builder.build_head(bbox_head)\n\n        if mask_head is not None:\n            if mask_roi_extractor is not None:\n                self.mask_roi_extractor = builder.build_roi_extractor(\n                    mask_roi_extractor)\n                self.share_roi_extractor = False\n            else:\n                self.share_roi_extractor = True\n                self.mask_roi_extractor = self.bbox_roi_extractor\n            self.mask_head = builder.build_head(mask_head)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        self.init_weights(pretrained=pretrained)\n\n    @property\n    def with_rpn(self):\n        return hasattr(self, 'rpn_head') and self.rpn_head is not None\n\n    def init_weights(self, pretrained=None):\n        super(TwoStageDetector, self).init_weights(pretrained)\n        self.backbone.init_weights(pretrained=pretrained)\n        if self.with_neck:\n            if isinstance(self.neck, nn.Sequential):\n                for m in self.neck:\n                    m.init_weights()\n            else:\n                self.neck.init_weights()\n        if self.with_shared_head:\n            self.shared_head.init_weights(pretrained=pretrained)\n        if self.with_rpn:\n            self.rpn_head.init_weights()\n        if self.with_bbox:\n            self.bbox_roi_extractor.init_weights()\n            self.bbox_head.init_weights()\n        if self.with_mask:\n            self.mask_head.init_weights()\n            if not self.share_roi_extractor:\n                self.mask_roi_extractor.init_weights()\n\n    def extract_feat(self, img):\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_train(self,\n                      img,\n                      img_meta,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None):\n        x = self.extract_feat(img)\n\n        losses = dict()\n\n        # RPN forward and loss\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(x)\n            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,\n                                          self.train_cfg.rpn)\n            rpn_losses = self.rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        # assign gts and sample proposals\n        if self.with_bbox or self.with_mask:\n            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)\n            bbox_sampler = build_sampler(\n                self.train_cfg.rcnn.sampler, context=self)\n            num_imgs = img.size(0)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n            sampling_results = []\n            for i in range(num_imgs):\n                assign_result = bbox_assigner.assign(\n                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],\n                    gt_labels[i])\n                sampling_result = bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[i],\n                    gt_bboxes[i],\n                    gt_labels[i],\n                    feats=[lvl_feat[i][None] for lvl_feat in x])\n                sampling_results.append(sampling_result)\n\n        # bbox head forward and loss\n        if self.with_bbox:\n            rois = bbox2roi([res.bboxes for res in sampling_results])\n            # TODO: a more flexible way to decide which feature maps to use\n            bbox_feats = self.bbox_roi_extractor(\n                x[:self.bbox_roi_extractor.num_inputs], rois)\n            if self.with_shared_head:\n                bbox_feats = self.shared_head(bbox_feats)\n            cls_score, bbox_pred = self.bbox_head(bbox_feats)\n\n            bbox_targets = self.bbox_head.get_target(\n                sampling_results, gt_bboxes, gt_labels, self.train_cfg.rcnn)\n            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,\n                                            *bbox_targets)\n            losses.update(loss_bbox)\n\n        # mask head forward and loss\n        if self.with_mask:\n            if not self.share_roi_extractor:\n                pos_rois = bbox2roi(\n                    [res.pos_bboxes for res in sampling_results])\n                mask_feats = self.mask_roi_extractor(\n                    x[:self.mask_roi_extractor.num_inputs], pos_rois)\n                if self.with_shared_head:\n                    mask_feats = self.shared_head(mask_feats)\n            else:\n                pos_inds = []\n                device = bbox_feats.device\n                for res in sampling_results:\n                    pos_inds.append(\n                        torch.ones(\n                            res.pos_bboxes.shape[0],\n                            device=device,\n                            dtype=torch.uint8))\n                    pos_inds.append(\n                        torch.zeros(\n                            res.neg_bboxes.shape[0],\n                            device=device,\n                            dtype=torch.uint8))\n                pos_inds = torch.cat(pos_inds)\n                mask_feats = bbox_feats[pos_inds]\n            mask_pred = self.mask_head(mask_feats)\n\n            mask_targets = self.mask_head.get_target(\n                sampling_results, gt_masks, self.train_cfg.rcnn)\n            pos_labels = torch.cat(\n                [res.pos_gt_labels for res in sampling_results])\n            loss_mask = self.mask_head.loss(mask_pred, mask_targets,\n                                            pos_labels)\n            losses.update(loss_mask)\n\n        return losses\n\n    def simple_test(self, img, img_meta, proposals=None, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        assert self.with_bbox, \"Bbox head must be implemented.\"\n\n        x = self.extract_feat(img)\n\n        proposal_list = self.simple_test_rpn(\n            x, img_meta, self.test_cfg.rpn) if proposals is None else proposals\n\n        det_bboxes, det_labels = self.simple_test_bboxes(\n            x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)\n        bbox_results = bbox2result(det_bboxes, det_labels,\n                                   self.bbox_head.num_classes)\n\n        if not self.with_mask:\n            return bbox_results\n        else:\n            segm_results = self.simple_test_mask(\n                x, img_meta, det_bboxes, det_labels, rescale=rescale)\n            return bbox_results, segm_results\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        # recompute feats to save memory\n        proposal_list = self.aug_test_rpn(\n            self.extract_feats(imgs), img_metas, self.test_cfg.rpn)\n        det_bboxes, det_labels = self.aug_test_bboxes(\n            self.extract_feats(imgs), img_metas, proposal_list,\n            self.test_cfg.rcnn)\n\n        if rescale:\n            _det_bboxes = det_bboxes\n        else:\n            _det_bboxes = det_bboxes.clone()\n            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']\n        bbox_results = bbox2result(_det_bboxes, det_labels,\n                                   self.bbox_head.num_classes)\n\n        # det_bboxes always keep the original scale\n        if self.with_mask:\n            segm_results = self.aug_test_mask(\n                self.extract_feats(imgs), img_metas, det_bboxes, det_labels)\n            return bbox_results, segm_results\n        else:\n            return bbox_results\n"
  },
  {
    "path": "mmdetection/mmdet/models/mask_heads/__init__.py",
    "content": "from .fcn_mask_head import FCNMaskHead\nfrom .htc_mask_head import HTCMaskHead\nfrom .fused_semantic_head import FusedSemanticHead\n\n__all__ = ['FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead']\n"
  },
  {
    "path": "mmdetection/mmdet/models/mask_heads/fcn_mask_head.py",
    "content": "import mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nimport torch\nimport torch.nn as nn\n\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\nfrom mmdet.core import mask_cross_entropy, mask_target\n\n\n@HEADS.register_module\nclass FCNMaskHead(nn.Module):\n\n    def __init__(self,\n                 num_convs=4,\n                 roi_feat_size=14,\n                 in_channels=256,\n                 conv_kernel_size=3,\n                 conv_out_channels=256,\n                 upsample_method='deconv',\n                 upsample_ratio=2,\n                 num_classes=81,\n                 class_agnostic=False,\n                 conv_cfg=None,\n                 norm_cfg=None):\n        super(FCNMaskHead, self).__init__()\n        if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:\n            raise ValueError(\n                'Invalid upsample method {}, accepted methods '\n                'are \"deconv\", \"nearest\", \"bilinear\"'.format(upsample_method))\n        self.num_convs = num_convs\n        self.roi_feat_size = roi_feat_size  # WARN: not used and reserved\n        self.in_channels = in_channels\n        self.conv_kernel_size = conv_kernel_size\n        self.conv_out_channels = conv_out_channels\n        self.upsample_method = upsample_method\n        self.upsample_ratio = upsample_ratio\n        self.num_classes = num_classes\n        self.class_agnostic = class_agnostic\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self.convs = nn.ModuleList()\n        for i in range(self.num_convs):\n            in_channels = (self.in_channels\n                           if i == 0 else self.conv_out_channels)\n            padding = (self.conv_kernel_size - 1) // 2\n            self.convs.append(\n                ConvModule(\n                    in_channels,\n                    self.conv_out_channels,\n                    self.conv_kernel_size,\n                    padding=padding,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg))\n        upsample_in_channels = (self.conv_out_channels\n                                if self.num_convs > 0 else in_channels)\n        if self.upsample_method is None:\n            self.upsample = None\n        elif self.upsample_method == 'deconv':\n            self.upsample = nn.ConvTranspose2d(\n                upsample_in_channels,\n                self.conv_out_channels,\n                self.upsample_ratio,\n                stride=self.upsample_ratio)\n        else:\n            self.upsample = nn.Upsample(\n                scale_factor=self.upsample_ratio, mode=self.upsample_method)\n\n        out_channels = 1 if self.class_agnostic else self.num_classes\n        logits_in_channel = (self.conv_out_channels\n                             if self.upsample_method == 'deconv' else\n                             upsample_in_channels)\n        self.conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1)\n        self.relu = nn.ReLU(inplace=True)\n        self.debug_imgs = None\n\n    def init_weights(self):\n        for m in [self.upsample, self.conv_logits]:\n            if m is None:\n                continue\n            nn.init.kaiming_normal_(\n                m.weight, mode='fan_out', nonlinearity='relu')\n            nn.init.constant_(m.bias, 0)\n\n    def forward(self, x):\n        for conv in self.convs:\n            x = conv(x)\n        if self.upsample is not None:\n            x = self.upsample(x)\n            if self.upsample_method == 'deconv':\n                x = self.relu(x)\n        mask_pred = self.conv_logits(x)\n        return mask_pred\n\n    def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        pos_assigned_gt_inds = [\n            res.pos_assigned_gt_inds for res in sampling_results\n        ]\n        mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,\n                                   gt_masks, rcnn_train_cfg)\n        return mask_targets\n\n    def loss(self, mask_pred, mask_targets, labels):\n        loss = dict()\n        if self.class_agnostic:\n            loss_mask = mask_cross_entropy(mask_pred, mask_targets,\n                                           torch.zeros_like(labels))\n        else:\n            loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)\n        loss['loss_mask'] = loss_mask\n        return loss\n\n    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,\n                      ori_shape, scale_factor, rescale):\n        \"\"\"Get segmentation masks from mask_pred and bboxes.\n\n        Args:\n            mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).\n                For single-scale testing, mask_pred is the direct output of\n                model, whose type is Tensor, while for multi-scale testing,\n                it will be converted to numpy array outside of this method.\n            det_bboxes (Tensor): shape (n, 4/5)\n            det_labels (Tensor): shape (n, )\n            img_shape (Tensor): shape (3, )\n            rcnn_test_cfg (dict): rcnn testing config\n            ori_shape: original image size\n\n        Returns:\n            list[list]: encoded masks\n        \"\"\"\n        if isinstance(mask_pred, torch.Tensor):\n            mask_pred = mask_pred.sigmoid().cpu().numpy()\n        assert isinstance(mask_pred, np.ndarray)\n\n        cls_segms = [[] for _ in range(self.num_classes - 1)]\n        bboxes = det_bboxes.cpu().numpy()[:, :4]\n        labels = det_labels.cpu().numpy() + 1\n\n        if rescale:\n            img_h, img_w = ori_shape[:2]\n        else:\n            img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)\n            img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)\n            scale_factor = 1.0\n\n        for i in range(bboxes.shape[0]):\n            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)\n            label = labels[i]\n            w = max(bbox[2] - bbox[0] + 1, 1)\n            h = max(bbox[3] - bbox[1] + 1, 1)\n\n            if not self.class_agnostic:\n                mask_pred_ = mask_pred[i, label, :, :]\n            else:\n                mask_pred_ = mask_pred[i, 0, :, :]\n            im_mask = np.zeros((img_h, img_w), dtype=np.uint8)\n\n            bbox_mask = mmcv.imresize(mask_pred_, (w, h))\n            bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(\n                np.uint8)\n            im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask\n            rle = mask_util.encode(\n                np.array(im_mask[:, :, np.newaxis], order='F'))[0]\n            cls_segms[label - 1].append(rle)\n\n        return cls_segms\n"
  },
  {
    "path": "mmdetection/mmdet/models/mask_heads/fused_semantic_head.py",
    "content": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import kaiming_init\n\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HEADS.register_module\nclass FusedSemanticHead(nn.Module):\n    \"\"\"Multi-level fused semantic segmentation head.\n\n    in_1 -> 1x1 conv ---\n                        |\n    in_2 -> 1x1 conv -- |\n                       ||\n    in_3 -> 1x1 conv - ||\n                      |||                  /-> 1x1 conv (mask prediction)\n    in_4 -> 1x1 conv -----> 3x3 convs (*4)\n                        |                  \\-> 1x1 conv (feature)\n    in_5 -> 1x1 conv ---\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_ins,\n                 fusion_level,\n                 num_convs=4,\n                 in_channels=256,\n                 conv_out_channels=256,\n                 num_classes=183,\n                 ignore_label=255,\n                 loss_weight=0.2,\n                 conv_cfg=None,\n                 norm_cfg=None):\n        super(FusedSemanticHead, self).__init__()\n        self.num_ins = num_ins\n        self.fusion_level = fusion_level\n        self.num_convs = num_convs\n        self.in_channels = in_channels\n        self.conv_out_channels = conv_out_channels\n        self.num_classes = num_classes\n        self.ignore_label = ignore_label\n        self.loss_weight = loss_weight\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self.lateral_convs = nn.ModuleList()\n        for i in range(self.num_ins):\n            self.lateral_convs.append(\n                ConvModule(\n                    self.in_channels,\n                    self.in_channels,\n                    1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    inplace=False))\n\n        self.convs = nn.ModuleList()\n        for i in range(self.num_convs):\n            in_channels = self.in_channels if i == 0 else conv_out_channels\n            self.convs.append(\n                ConvModule(\n                    in_channels,\n                    conv_out_channels,\n                    3,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.conv_embedding = ConvModule(\n            conv_out_channels,\n            conv_out_channels,\n            1,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg)\n        self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1)\n\n        self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label)\n\n    def init_weights(self):\n        kaiming_init(self.conv_logits)\n\n    def forward(self, feats):\n        x = self.lateral_convs[self.fusion_level](feats[self.fusion_level])\n        fused_size = tuple(x.shape[-2:])\n        for i, feat in enumerate(feats):\n            if i != self.fusion_level:\n                feat = F.interpolate(\n                    feat,\n                    size=fused_size,\n                    mode='bilinear',\n                    align_corners=True)\n                x += self.lateral_convs[i](feat)\n\n        for i in range(self.num_convs):\n            x = self.convs[i](x)\n\n        mask_pred = self.conv_logits(x)\n        x = self.conv_embedding(x)\n        return mask_pred, x\n\n    def loss(self, mask_pred, labels):\n        labels = labels.squeeze(1).long()\n        loss_semantic_seg = self.criterion(mask_pred, labels)\n        loss_semantic_seg *= self.loss_weight\n        return loss_semantic_seg\n"
  },
  {
    "path": "mmdetection/mmdet/models/mask_heads/htc_mask_head.py",
    "content": "from .fcn_mask_head import FCNMaskHead\nfrom ..registry import HEADS\nfrom ..utils import ConvModule\n\n\n@HEADS.register_module\nclass HTCMaskHead(FCNMaskHead):\n\n    def __init__(self, *args, **kwargs):\n        super(HTCMaskHead, self).__init__(*args, **kwargs)\n        self.conv_res = ConvModule(\n            self.conv_out_channels,\n            self.conv_out_channels,\n            1,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg)\n\n    def init_weights(self):\n        super(HTCMaskHead, self).init_weights()\n        self.conv_res.init_weights()\n\n    def forward(self, x, res_feat=None, return_logits=True, return_feat=True):\n        if res_feat is not None:\n            res_feat = self.conv_res(res_feat)\n            x = x + res_feat\n        for conv in self.convs:\n            x = conv(x)\n        res_feat = x\n        outs = []\n        if return_logits:\n            x = self.upsample(x)\n            if self.upsample_method == 'deconv':\n                x = self.relu(x)\n            mask_pred = self.conv_logits(x)\n            outs.append(mask_pred)\n        if return_feat:\n            outs.append(res_feat)\n        return outs if len(outs) > 1 else outs[0]\n"
  },
  {
    "path": "mmdetection/mmdet/models/necks/__init__.py",
    "content": "from .fpn import FPN\n\n__all__ = ['FPN']\n"
  },
  {
    "path": "mmdetection/mmdet/models/necks/fpn.py",
    "content": "import torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init\n\nfrom ..registry import NECKS\nfrom ..utils import ConvModule\n\n\n@NECKS.register_module\nclass FPN(nn.Module):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=0,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 extra_convs_on_inputs=True,\n                 relu_before_extra_convs=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 activation=None):\n        super(FPN, self).__init__()\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.activation = activation\n        self.relu_before_extra_convs = relu_before_extra_convs\n\n        if end_level == -1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level < inputs, no extra level is allowed\n            self.backbone_end_level = end_level\n            assert end_level <= len(in_channels)\n            assert num_outs == end_level - start_level\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n        self.extra_convs_on_inputs = extra_convs_on_inputs\n\n        self.lateral_convs = nn.ModuleList()\n        self.fpn_convs = nn.ModuleList()\n\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                activation=self.activation,\n                inplace=False)\n            fpn_conv = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                activation=self.activation,\n                inplace=False)\n\n            self.lateral_convs.append(l_conv)\n            self.fpn_convs.append(fpn_conv)\n\n        # add extra conv layers (e.g., RetinaNet)\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n        if add_extra_convs and extra_levels >= 1:\n            for i in range(extra_levels):\n                if i == 0 and self.extra_convs_on_inputs:\n                    in_channels = self.in_channels[self.backbone_end_level - 1]\n                else:\n                    in_channels = out_channels\n                extra_fpn_conv = ConvModule(\n                    in_channels,\n                    out_channels,\n                    3,\n                    stride=2,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    activation=self.activation,\n                    inplace=False)\n                self.fpn_convs.append(extra_fpn_conv)\n\n    # default init_weights for conv(msra) and norm in ConvModule\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                xavier_init(m, distribution='uniform')\n\n    def forward(self, inputs):\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n\n        # build top-down path\n        used_backbone_levels = len(laterals)\n        for i in range(used_backbone_levels - 1, 0, -1):\n            laterals[i - 1] += F.interpolate(\n                laterals[i], scale_factor=2, mode='nearest')\n\n        # build outputs\n        # part 1: from original levels\n        outs = [\n            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)\n        ]\n        # part 2: add extra levels\n        if self.num_outs > len(outs):\n            # use max pool to get more levels on top of outputs\n            # (e.g., Faster R-CNN, Mask R-CNN)\n            if not self.add_extra_convs:\n                for i in range(self.num_outs - used_backbone_levels):\n                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))\n            # add conv layers on top of original feature maps (RetinaNet)\n            else:\n                if self.extra_convs_on_inputs:\n                    orig = inputs[self.backbone_end_level - 1]\n                    outs.append(self.fpn_convs[used_backbone_levels](orig))\n                else:\n                    outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))\n                for i in range(used_backbone_levels + 1, self.num_outs):\n                    if self.relu_before_extra_convs:\n                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))\n                    else:\n                        outs.append(self.fpn_convs[i](outs[-1]))\n        return tuple(outs)\n"
  },
  {
    "path": "mmdetection/mmdet/models/registry.py",
    "content": "import torch.nn as nn\n\n\nclass Registry(object):\n\n    def __init__(self, name):\n        self._name = name\n        self._module_dict = dict()\n\n    @property\n    def name(self):\n        return self._name\n\n    @property\n    def module_dict(self):\n        return self._module_dict\n\n    def _register_module(self, module_class):\n        \"\"\"Register a module.\n\n        Args:\n            module (:obj:`nn.Module`): Module to be registered.\n        \"\"\"\n        if not issubclass(module_class, nn.Module):\n            raise TypeError('module must be a child of nn.Module, but got {}'.\n                            format(module_class))\n        module_name = module_class.__name__\n        if module_name in self._module_dict:\n            raise KeyError('{} is already registered in {}'.format(\n                module_name, self.name))\n        self._module_dict[module_name] = module_class\n\n    def register_module(self, cls):\n        self._register_module(cls)\n        return cls\n\n\nBACKBONES = Registry('backbone')\nNECKS = Registry('neck')\nROI_EXTRACTORS = Registry('roi_extractor')\nSHARED_HEADS = Registry('shared_head')\nHEADS = Registry('head')\nDETECTORS = Registry('detector')\n"
  },
  {
    "path": "mmdetection/mmdet/models/roi_extractors/__init__.py",
    "content": "from .single_level import SingleRoIExtractor\n\n__all__ = ['SingleRoIExtractor']\n"
  },
  {
    "path": "mmdetection/mmdet/models/roi_extractors/single_level.py",
    "content": "from __future__ import division\n\nimport torch\nimport torch.nn as nn\n\nfrom mmdet import ops\nfrom ..registry import ROI_EXTRACTORS\n\n\n@ROI_EXTRACTORS.register_module\nclass SingleRoIExtractor(nn.Module):\n    \"\"\"Extract RoI features from a single level feature map.\n\n    If there are mulitple input feature levels, each RoI is mapped to a level\n    according to its scale.\n\n    Args:\n        roi_layer (dict): Specify RoI layer type and arguments.\n        out_channels (int): Output channels of RoI layers.\n        featmap_strides (int): Strides of input feature maps.\n        finest_scale (int): Scale threshold of mapping to level 0.\n    \"\"\"\n\n    def __init__(self,\n                 roi_layer,\n                 out_channels,\n                 featmap_strides,\n                 finest_scale=56):\n        super(SingleRoIExtractor, self).__init__()\n        self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)\n        self.out_channels = out_channels\n        self.featmap_strides = featmap_strides\n        self.finest_scale = finest_scale\n\n    @property\n    def num_inputs(self):\n        \"\"\"int: Input feature map levels.\"\"\"\n        return len(self.featmap_strides)\n\n    def init_weights(self):\n        pass\n\n    def build_roi_layers(self, layer_cfg, featmap_strides):\n        cfg = layer_cfg.copy()\n        layer_type = cfg.pop('type')\n        assert hasattr(ops, layer_type)\n        layer_cls = getattr(ops, layer_type)\n        roi_layers = nn.ModuleList(\n            [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])\n        return roi_layers\n\n    def map_roi_levels(self, rois, num_levels):\n        \"\"\"Map rois to corresponding feature levels by scales.\n\n        - scale < finest_scale: level 0\n        - finest_scale <= scale < finest_scale * 2: level 1\n        - finest_scale * 2 <= scale < finest_scale * 4: level 2\n        - scale >= finest_scale * 4: level 3\n\n        Args:\n            rois (Tensor): Input RoIs, shape (k, 5).\n            num_levels (int): Total level number.\n\n        Returns:\n            Tensor: Level index (0-based) of each RoI, shape (k, )\n        \"\"\"\n        scale = torch.sqrt(\n            (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))\n        target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))\n        target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()\n        return target_lvls\n\n    def forward(self, feats, rois):\n        if len(feats) == 1:\n            return self.roi_layers[0](feats[0], rois)\n\n        out_size = self.roi_layers[0].out_size\n        num_levels = len(feats)\n        target_lvls = self.map_roi_levels(rois, num_levels)\n        roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,\n                                           out_size, out_size).fill_(0)\n        for i in range(num_levels):\n            inds = target_lvls == i\n            if inds.any():\n                rois_ = rois[inds, :]\n                roi_feats_t = self.roi_layers[i](feats[i], rois_)\n                roi_feats[inds] += roi_feats_t\n        return roi_feats\n"
  },
  {
    "path": "mmdetection/mmdet/models/shared_heads/__init__.py",
    "content": "from .res_layer import ResLayer\n\n__all__ = ['ResLayer']\n"
  },
  {
    "path": "mmdetection/mmdet/models/shared_heads/res_layer.py",
    "content": "import logging\n\nimport torch.nn as nn\nfrom mmcv.cnn import constant_init, kaiming_init\nfrom mmcv.runner import load_checkpoint\n\nfrom ..backbones import ResNet, make_res_layer\nfrom ..registry import SHARED_HEADS\n\n\n@SHARED_HEADS.register_module\nclass ResLayer(nn.Module):\n\n    def __init__(self,\n                 depth,\n                 stage=3,\n                 stride=2,\n                 dilation=1,\n                 style='pytorch',\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 norm_eval=True,\n                 with_cp=False,\n                 dcn=None):\n        super(ResLayer, self).__init__()\n        self.norm_eval = norm_eval\n        self.norm_cfg = norm_cfg\n        self.stage = stage\n        block, stage_blocks = ResNet.arch_settings[depth]\n        stage_block = stage_blocks[stage]\n        planes = 64 * 2**stage\n        inplanes = 64 * 2**(stage - 1) * block.expansion\n\n        res_layer = make_res_layer(\n            block,\n            inplanes,\n            planes,\n            stage_block,\n            stride=stride,\n            dilation=dilation,\n            style=style,\n            with_cp=with_cp,\n            norm_cfg=self.norm_cfg,\n            dcn=dcn)\n        self.add_module('layer{}'.format(stage + 1), res_layer)\n\n    def init_weights(self, pretrained=None):\n        if isinstance(pretrained, str):\n            logger = logging.getLogger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n        elif pretrained is None:\n            for m in self.modules():\n                if isinstance(m, nn.Conv2d):\n                    kaiming_init(m)\n                elif isinstance(m, nn.BatchNorm2d):\n                    constant_init(m, 1)\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n    def forward(self, x):\n        res_layer = getattr(self, 'layer{}'.format(self.stage + 1))\n        out = res_layer(x)\n        return out\n\n    def train(self, mode=True):\n        super(ResLayer, self).train(mode)\n        if self.norm_eval:\n            for m in self.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eval()\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/__init__.py",
    "content": "from .conv_ws import conv_ws_2d, ConvWS2d\nfrom .conv_module import build_conv_layer, ConvModule\nfrom .norm import build_norm_layer\nfrom .scale import Scale\nfrom .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,\n                          bias_init_with_prob)\n\n__all__ = [\n    'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',\n    'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',\n    'kaiming_init', 'bias_init_with_prob', 'Scale'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/conv_module.py",
    "content": "import warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import kaiming_init, constant_init\n\nfrom .conv_ws import ConvWS2d\nfrom .norm import build_norm_layer\n\nconv_cfg = {\n    'Conv': nn.Conv2d,\n    'ConvWS': ConvWS2d,\n    # TODO: octave conv\n}\n\n\ndef build_conv_layer(cfg, *args, **kwargs):\n    \"\"\" Build convolution layer\n\n    Args:\n        cfg (None or dict): cfg should contain:\n            type (str): identify conv layer type.\n            layer args: args needed to instantiate a conv layer.\n\n    Returns:\n        layer (nn.Module): created conv layer\n    \"\"\"\n    if cfg is None:\n        cfg_ = dict(type='Conv')\n    else:\n        assert isinstance(cfg, dict) and 'type' in cfg\n        cfg_ = cfg.copy()\n\n    layer_type = cfg_.pop('type')\n    if layer_type not in conv_cfg:\n        raise KeyError('Unrecognized norm type {}'.format(layer_type))\n    else:\n        conv_layer = conv_cfg[layer_type]\n\n    layer = conv_layer(*args, **kwargs, **cfg_)\n\n    return layer\n\n\nclass ConvModule(nn.Module):\n    \"\"\"Conv-Norm-Activation block.\n\n    Args:\n        in_channels (int): Same as nn.Conv2d.\n        out_channels (int): Same as nn.Conv2d.\n        kernel_size (int or tuple[int]): Same as nn.Conv2d.\n        stride (int or tuple[int]): Same as nn.Conv2d.\n        padding (int or tuple[int]): Same as nn.Conv2d.\n        dilation (int or tuple[int]): Same as nn.Conv2d.\n        groups (int): Same as nn.Conv2d.\n        bias (bool or str): If specified as `auto`, it will be decided by the\n            norm_cfg. Bias will be set as True if norm_cfg is None, otherwise\n            False.\n        conv_cfg (dict): Config dict for convolution layer.\n        norm_cfg (dict): Config dict for normalization layer.\n        activation (str or None): Activation type, \"ReLU\" by default.\n        inplace (bool): Whether to use inplace mode for activation.\n        activate_last (bool): Whether to apply the activation layer in the\n            last. (Do not use this flag since the behavior and api may be\n            changed in the future.)\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias='auto',\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 activation='relu',\n                 inplace=True,\n                 activate_last=True):\n        super(ConvModule, self).__init__()\n        assert conv_cfg is None or isinstance(conv_cfg, dict)\n        assert norm_cfg is None or isinstance(norm_cfg, dict)\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.activation = activation\n        self.inplace = inplace\n        self.activate_last = activate_last\n\n        self.with_norm = norm_cfg is not None\n        self.with_activatation = activation is not None\n        # if the conv layer is before a norm layer, bias is unnecessary.\n        if bias == 'auto':\n            bias = False if self.with_norm else True\n        self.with_bias = bias\n\n        if self.with_norm and self.with_bias:\n            warnings.warn('ConvModule has norm and bias at the same time')\n\n        # build convolution layer\n        self.conv = build_conv_layer(conv_cfg,\n                                     in_channels,\n                                     out_channels,\n                                     kernel_size,\n                                     stride=stride,\n                                     padding=padding,\n                                     dilation=dilation,\n                                     groups=groups,\n                                     bias=bias)\n        # export the attributes of self.conv to a higher level for convenience\n        self.in_channels = self.conv.in_channels\n        self.out_channels = self.conv.out_channels\n        self.kernel_size = self.conv.kernel_size\n        self.stride = self.conv.stride\n        self.padding = self.conv.padding\n        self.dilation = self.conv.dilation\n        self.transposed = self.conv.transposed\n        self.output_padding = self.conv.output_padding\n        self.groups = self.conv.groups\n\n        # build normalization layers\n        if self.with_norm:\n            norm_channels = out_channels if self.activate_last else in_channels\n            self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)\n            self.add_module(self.norm_name, norm)\n\n        # build activation layer\n        if self.with_activatation:\n            if self.activation not in ['relu']:\n                raise ValueError('{} is currently not supported.'.format(\n                    self.activation))\n            if self.activation == 'relu':\n                self.activate = nn.ReLU(inplace=inplace)\n\n        # Use msra init by default\n        self.init_weights()\n\n    @property\n    def norm(self):\n        return getattr(self, self.norm_name)\n\n    def init_weights(self):\n        nonlinearity = 'relu' if self.activation is None else self.activation\n        kaiming_init(self.conv, nonlinearity=nonlinearity)\n        if self.with_norm:\n            constant_init(self.norm, 1, bias=0)\n\n    def forward(self, x, activate=True, norm=True):\n        if self.activate_last:\n            x = self.conv(x)\n            if norm and self.with_norm:\n                x = self.norm(x)\n            if activate and self.with_activatation:\n                x = self.activate(x)\n        else:\n            # WARN: this may be removed or modified\n            if norm and self.with_norm:\n                x = self.norm(x)\n            if activate and self.with_activatation:\n                x = self.activate(x)\n            x = self.conv(x)\n        return x\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/conv_ws.py",
    "content": "import torch.nn as nn\nimport torch.nn.functional as F\n\n\ndef conv_ws_2d(input,\n               weight,\n               bias=None,\n               stride=1,\n               padding=0,\n               dilation=1,\n               groups=1,\n               eps=1e-5):\n    c_in = weight.size(0)\n    weight_flat = weight.view(c_in, -1)\n    mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)\n    std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)\n    weight = (weight - mean) / (std + eps)\n    return F.conv2d(input, weight, bias, stride, padding, dilation, groups)\n\n\nclass ConvWS2d(nn.Conv2d):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 eps=1e-5):\n        super(ConvWS2d, self).__init__(\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            groups=groups,\n            bias=bias)\n        self.eps = eps\n\n    def forward(self, x):\n        return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,\n                          self.dilation, self.groups, self.eps)\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/norm.py",
    "content": "import torch.nn as nn\n\n\nnorm_cfg = {\n    # format: layer_type: (abbreviation, module)\n    'BN': ('bn', nn.BatchNorm2d),\n    'SyncBN': ('bn', nn.SyncBatchNorm),\n    'GN': ('gn', nn.GroupNorm),\n    # and potentially 'SN'\n}\n\n\ndef build_norm_layer(cfg, num_features, postfix=''):\n    \"\"\" Build normalization layer\n\n    Args:\n        cfg (dict): cfg should contain:\n            type (str): identify norm layer type.\n            layer args: args needed to instantiate a norm layer.\n            requires_grad (bool): [optional] whether stop gradient updates\n        num_features (int): number of channels from input.\n        postfix (int, str): appended into norm abbreviation to\n            create named layer.\n\n    Returns:\n        name (str): abbreviation + postfix\n        layer (nn.Module): created norm layer\n    \"\"\"\n    assert isinstance(cfg, dict) and 'type' in cfg\n    cfg_ = cfg.copy()\n\n    layer_type = cfg_.pop('type')\n    if layer_type not in norm_cfg:\n        raise KeyError('Unrecognized norm type {}'.format(layer_type))\n    else:\n        abbr, norm_layer = norm_cfg[layer_type]\n        if norm_layer is None:\n            raise NotImplementedError\n\n    assert isinstance(postfix, (int, str))\n    name = abbr + str(postfix)\n\n    requires_grad = cfg_.pop('requires_grad', True)\n    cfg_.setdefault('eps', 1e-5)\n    if layer_type != 'GN':\n        layer = norm_layer(num_features, **cfg_)\n        if layer_type == 'SyncBN':\n            layer._specify_ddp_gpu_num(1)\n    else:\n        assert 'num_groups' in cfg_\n        layer = norm_layer(num_channels=num_features, **cfg_)\n\n    for param in layer.parameters():\n        param.requires_grad = requires_grad\n\n    return name, layer\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/scale.py",
    "content": "import torch\nimport torch.nn as nn\n\n\nclass Scale(nn.Module):\n\n    def __init__(self, scale=1.0):\n        super(Scale, self).__init__()\n        self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))\n\n    def forward(self, x):\n        return x * self.scale\n"
  },
  {
    "path": "mmdetection/mmdet/models/utils/weight_init.py",
    "content": "import numpy as np\nimport torch.nn as nn\n\n\ndef xavier_init(module, gain=1, bias=0, distribution='normal'):\n    assert distribution in ['uniform', 'normal']\n    if distribution == 'uniform':\n        nn.init.xavier_uniform_(module.weight, gain=gain)\n    else:\n        nn.init.xavier_normal_(module.weight, gain=gain)\n    if hasattr(module, 'bias'):\n        nn.init.constant_(module.bias, bias)\n\n\ndef normal_init(module, mean=0, std=1, bias=0):\n    nn.init.normal_(module.weight, mean, std)\n    if hasattr(module, 'bias'):\n        nn.init.constant_(module.bias, bias)\n\n\ndef uniform_init(module, a=0, b=1, bias=0):\n    nn.init.uniform_(module.weight, a, b)\n    if hasattr(module, 'bias'):\n        nn.init.constant_(module.bias, bias)\n\n\ndef kaiming_init(module,\n                 mode='fan_out',\n                 nonlinearity='relu',\n                 bias=0,\n                 distribution='normal'):\n    assert distribution in ['uniform', 'normal']\n    if distribution == 'uniform':\n        nn.init.kaiming_uniform_(\n            module.weight, mode=mode, nonlinearity=nonlinearity)\n    else:\n        nn.init.kaiming_normal_(\n            module.weight, mode=mode, nonlinearity=nonlinearity)\n    if hasattr(module, 'bias'):\n        nn.init.constant_(module.bias, bias)\n\n\ndef bias_init_with_prob(prior_prob):\n    \"\"\" initialize conv/fc bias value according to giving probablity\"\"\"\n    bias_init = float(-np.log((1 - prior_prob) / prior_prob))\n    return bias_init\n"
  },
  {
    "path": "mmdetection/mmdet/ops/__init__.py",
    "content": "from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv,\n                  ModulatedDeformConvPack, DeformRoIPooling,\n                  DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack,\n                  deform_conv, modulated_deform_conv, deform_roi_pooling)\nfrom .nms import nms, soft_nms\nfrom .roi_align import RoIAlign, roi_align\nfrom .roi_pool import RoIPool, roi_pool\nfrom .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss\n\n__all__ = [\n    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',\n    'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',\n    'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',\n    'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',\n    'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/__init__.py",
    "content": "from .functions.deform_conv import deform_conv, modulated_deform_conv\nfrom .functions.deform_pool import deform_roi_pooling\nfrom .modules.deform_conv import (DeformConv, ModulatedDeformConv,\n                                  DeformConvPack, ModulatedDeformConvPack)\nfrom .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,\n                                  ModulatedDeformRoIPoolingPack)\n\n__all__ = [\n    'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',\n    'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',\n    'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',\n    'deform_roi_pooling'\n]\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/functions/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/functions/deform_conv.py",
    "content": "import torch\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\n\nfrom .. import deform_conv_cuda\n\n\nclass DeformConvFunction(Function):\n\n    @staticmethod\n    def forward(ctx,\n                input,\n                offset,\n                weight,\n                stride=1,\n                padding=0,\n                dilation=1,\n                groups=1,\n                deformable_groups=1,\n                im2col_step=64):\n        if input is not None and input.dim() != 4:\n            raise ValueError(\n                \"Expected 4D tensor as input, got {}D tensor instead.\".format(\n                    input.dim()))\n        ctx.stride = _pair(stride)\n        ctx.padding = _pair(padding)\n        ctx.dilation = _pair(dilation)\n        ctx.groups = groups\n        ctx.deformable_groups = deformable_groups\n        ctx.im2col_step = im2col_step\n\n        ctx.save_for_backward(input, offset, weight)\n\n        output = input.new_empty(\n            DeformConvFunction._output_size(input, weight, ctx.padding,\n                                            ctx.dilation, ctx.stride))\n\n        ctx.bufs_ = [input.new_empty(0), input.new_empty(0)]  # columns, ones\n\n        if not input.is_cuda:\n            raise NotImplementedError\n        else:\n            cur_im2col_step = min(ctx.im2col_step, input.shape[0])\n            assert (input.shape[0] %\n                    cur_im2col_step) == 0, 'im2col step must divide batchsize'\n            deform_conv_cuda.deform_conv_forward_cuda(\n                input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1],\n                weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0],\n                ctx.padding[1], ctx.padding[0], ctx.dilation[1],\n                ctx.dilation[0], ctx.groups, ctx.deformable_groups,\n                cur_im2col_step)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        input, offset, weight = ctx.saved_tensors\n\n        grad_input = grad_offset = grad_weight = None\n\n        if not grad_output.is_cuda:\n            raise NotImplementedError\n        else:\n            cur_im2col_step = min(ctx.im2col_step, input.shape[0])\n            assert (input.shape[0] %\n                    cur_im2col_step) == 0, 'im2col step must divide batchsize'\n\n            if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:\n                grad_input = torch.zeros_like(input)\n                grad_offset = torch.zeros_like(offset)\n                deform_conv_cuda.deform_conv_backward_input_cuda(\n                    input, offset, grad_output, grad_input,\n                    grad_offset, weight, ctx.bufs_[0], weight.size(3),\n                    weight.size(2), ctx.stride[1], ctx.stride[0],\n                    ctx.padding[1], ctx.padding[0], ctx.dilation[1],\n                    ctx.dilation[0], ctx.groups, ctx.deformable_groups,\n                    cur_im2col_step)\n\n            if ctx.needs_input_grad[2]:\n                grad_weight = torch.zeros_like(weight)\n                deform_conv_cuda.deform_conv_backward_parameters_cuda(\n                    input, offset, grad_output,\n                    grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),\n                    weight.size(2), ctx.stride[1], ctx.stride[0],\n                    ctx.padding[1], ctx.padding[0], ctx.dilation[1],\n                    ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,\n                    cur_im2col_step)\n\n        return (grad_input, grad_offset, grad_weight, None, None, None, None,\n                None)\n\n    @staticmethod\n    def _output_size(input, weight, padding, dilation, stride):\n        channels = weight.size(0)\n        output_size = (input.size(0), channels)\n        for d in range(input.dim() - 2):\n            in_size = input.size(d + 2)\n            pad = padding[d]\n            kernel = dilation[d] * (weight.size(d + 2) - 1) + 1\n            stride_ = stride[d]\n            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )\n        if not all(map(lambda s: s > 0, output_size)):\n            raise ValueError(\n                \"convolution input is too small (output would be {})\".format(\n                    'x'.join(map(str, output_size))))\n        return output_size\n\n\nclass ModulatedDeformConvFunction(Function):\n\n    @staticmethod\n    def forward(ctx,\n                input,\n                offset,\n                mask,\n                weight,\n                bias=None,\n                stride=1,\n                padding=0,\n                dilation=1,\n                groups=1,\n                deformable_groups=1):\n        ctx.stride = stride\n        ctx.padding = padding\n        ctx.dilation = dilation\n        ctx.groups = groups\n        ctx.deformable_groups = deformable_groups\n        ctx.with_bias = bias is not None\n        if not ctx.with_bias:\n            bias = input.new_empty(1)  # fake tensor\n        if not input.is_cuda:\n            raise NotImplementedError\n        if weight.requires_grad or mask.requires_grad or offset.requires_grad \\\n                or input.requires_grad:\n            ctx.save_for_backward(input, offset, mask, weight, bias)\n        output = input.new_empty(\n            ModulatedDeformConvFunction._infer_shape(ctx, input, weight))\n        ctx._bufs = [input.new_empty(0), input.new_empty(0)]\n        deform_conv_cuda.modulated_deform_conv_cuda_forward(\n            input, weight, bias, ctx._bufs[0], offset, mask, output,\n            ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,\n            ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,\n            ctx.groups, ctx.deformable_groups, ctx.with_bias)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        if not grad_output.is_cuda:\n            raise NotImplementedError\n        input, offset, mask, weight, bias = ctx.saved_tensors\n        grad_input = torch.zeros_like(input)\n        grad_offset = torch.zeros_like(offset)\n        grad_mask = torch.zeros_like(mask)\n        grad_weight = torch.zeros_like(weight)\n        grad_bias = torch.zeros_like(bias)\n        deform_conv_cuda.modulated_deform_conv_cuda_backward(\n            input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],\n            grad_input, grad_weight, grad_bias, grad_offset, grad_mask,\n            grad_output, weight.shape[2], weight.shape[3], ctx.stride,\n            ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,\n            ctx.groups, ctx.deformable_groups, ctx.with_bias)\n        if not ctx.with_bias:\n            grad_bias = None\n\n        return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\n                None, None, None, None, None)\n\n    @staticmethod\n    def _infer_shape(ctx, input, weight):\n        n = input.size(0)\n        channels_out = weight.size(0)\n        height, width = input.shape[2:4]\n        kernel_h, kernel_w = weight.shape[2:4]\n        height_out = (height + 2 * ctx.padding -\n                      (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1\n        width_out = (width + 2 * ctx.padding -\n                     (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1\n        return n, channels_out, height_out, width_out\n\n\ndeform_conv = DeformConvFunction.apply\nmodulated_deform_conv = ModulatedDeformConvFunction.apply\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/functions/deform_pool.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom .. import deform_pool_cuda\n\n\nclass DeformRoIPoolingFunction(Function):\n\n    @staticmethod\n    def forward(ctx,\n                data,\n                rois,\n                offset,\n                spatial_scale,\n                out_size,\n                out_channels,\n                no_trans,\n                group_size=1,\n                part_size=None,\n                sample_per_part=4,\n                trans_std=.0):\n        ctx.spatial_scale = spatial_scale\n        ctx.out_size = out_size\n        ctx.out_channels = out_channels\n        ctx.no_trans = no_trans\n        ctx.group_size = group_size\n        ctx.part_size = out_size if part_size is None else part_size\n        ctx.sample_per_part = sample_per_part\n        ctx.trans_std = trans_std\n\n        assert 0.0 <= ctx.trans_std <= 1.0\n        if not data.is_cuda:\n            raise NotImplementedError\n\n        n = rois.shape[0]\n        output = data.new_empty(n, out_channels, out_size, out_size)\n        output_count = data.new_empty(n, out_channels, out_size, out_size)\n        deform_pool_cuda.deform_psroi_pooling_cuda_forward(\n            data, rois, offset, output, output_count, ctx.no_trans,\n            ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,\n            ctx.part_size, ctx.sample_per_part, ctx.trans_std)\n\n        if data.requires_grad or rois.requires_grad or offset.requires_grad:\n            ctx.save_for_backward(data, rois, offset)\n        ctx.output_count = output_count\n\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        if not grad_output.is_cuda:\n            raise NotImplementedError\n\n        data, rois, offset = ctx.saved_tensors\n        output_count = ctx.output_count\n        grad_input = torch.zeros_like(data)\n        grad_rois = None\n        grad_offset = torch.zeros_like(offset)\n\n        deform_pool_cuda.deform_psroi_pooling_cuda_backward(\n            grad_output, data, rois, offset, output_count, grad_input,\n            grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,\n            ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,\n            ctx.trans_std)\n        return (grad_input, grad_rois, grad_offset, None, None, None, None,\n                None, None, None, None)\n\n\ndeform_roi_pooling = DeformRoIPoolingFunction.apply\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/modules/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/modules/deform_conv.py",
    "content": "import math\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn.modules.utils import _pair\n\nfrom ..functions.deform_conv import deform_conv, modulated_deform_conv\n\n\nclass DeformConv(nn.Module):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 deformable_groups=1,\n                 bias=False):\n        super(DeformConv, self).__init__()\n\n        assert not bias\n        assert in_channels % groups == 0, \\\n            'in_channels {} cannot be divisible by groups {}'.format(\n                in_channels, groups)\n        assert out_channels % groups == 0, \\\n            'out_channels {} cannot be divisible by groups {}'.format(\n                out_channels, groups)\n\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.kernel_size = _pair(kernel_size)\n        self.stride = _pair(stride)\n        self.padding = _pair(padding)\n        self.dilation = _pair(dilation)\n        self.groups = groups\n        self.deformable_groups = deformable_groups\n\n        self.weight = nn.Parameter(\n            torch.Tensor(out_channels, in_channels // self.groups,\n                         *self.kernel_size))\n\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        n = self.in_channels\n        for k in self.kernel_size:\n            n *= k\n        stdv = 1. / math.sqrt(n)\n        self.weight.data.uniform_(-stdv, stdv)\n\n    def forward(self, x, offset):\n        return deform_conv(x, offset, self.weight, self.stride, self.padding,\n                           self.dilation, self.groups, self.deformable_groups)\n\n\nclass DeformConvPack(DeformConv):\n\n    def __init__(self, *args, **kwargs):\n        super(DeformConvPack, self).__init__(*args, **kwargs)\n\n        self.conv_offset = nn.Conv2d(\n            self.in_channels,\n            self.deformable_groups * 2 * self.kernel_size[0] *\n            self.kernel_size[1],\n            kernel_size=self.kernel_size,\n            stride=_pair(self.stride),\n            padding=_pair(self.padding),\n            bias=True)\n        self.init_offset()\n\n    def init_offset(self):\n        self.conv_offset.weight.data.zero_()\n        self.conv_offset.bias.data.zero_()\n\n    def forward(self, x):\n        offset = self.conv_offset(x)\n        return deform_conv(x, offset, self.weight, self.stride, self.padding,\n                           self.dilation, self.groups, self.deformable_groups)\n\n\nclass ModulatedDeformConv(nn.Module):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 deformable_groups=1,\n                 bias=True):\n        super(ModulatedDeformConv, self).__init__()\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.kernel_size = _pair(kernel_size)\n        self.stride = stride\n        self.padding = padding\n        self.dilation = dilation\n        self.groups = groups\n        self.deformable_groups = deformable_groups\n        self.with_bias = bias\n\n        self.weight = nn.Parameter(\n            torch.Tensor(out_channels, in_channels // groups,\n                         *self.kernel_size))\n        if bias:\n            self.bias = nn.Parameter(torch.Tensor(out_channels))\n        else:\n            self.register_parameter('bias', None)\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        n = self.in_channels\n        for k in self.kernel_size:\n            n *= k\n        stdv = 1. / math.sqrt(n)\n        self.weight.data.uniform_(-stdv, stdv)\n        if self.bias is not None:\n            self.bias.data.zero_()\n\n    def forward(self, x, offset, mask):\n        return modulated_deform_conv(x, offset, mask, self.weight, self.bias,\n                                     self.stride, self.padding, self.dilation,\n                                     self.groups, self.deformable_groups)\n\n\nclass ModulatedDeformConvPack(ModulatedDeformConv):\n\n    def __init__(self, *args, **kwargs):\n        super(ModulatedDeformConvPack, self).__init__(*args, **kwargs)\n\n        self.conv_offset_mask = nn.Conv2d(\n            self.in_channels,\n            self.deformable_groups * 3 * self.kernel_size[0] *\n            self.kernel_size[1],\n            kernel_size=self.kernel_size,\n            stride=_pair(self.stride),\n            padding=_pair(self.padding),\n            bias=True)\n        self.init_offset()\n\n    def init_offset(self):\n        self.conv_offset_mask.weight.data.zero_()\n        self.conv_offset_mask.bias.data.zero_()\n\n    def forward(self, x):\n        out = self.conv_offset_mask(x)\n        o1, o2, mask = torch.chunk(out, 3, dim=1)\n        offset = torch.cat((o1, o2), dim=1)\n        mask = torch.sigmoid(mask)\n        return modulated_deform_conv(x, offset, mask, self.weight, self.bias,\n                                     self.stride, self.padding, self.dilation,\n                                     self.groups, self.deformable_groups)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/modules/deform_pool.py",
    "content": "from torch import nn\n\nfrom ..functions.deform_pool import deform_roi_pooling\n\n\nclass DeformRoIPooling(nn.Module):\n\n    def __init__(self,\n                 spatial_scale,\n                 out_size,\n                 out_channels,\n                 no_trans,\n                 group_size=1,\n                 part_size=None,\n                 sample_per_part=4,\n                 trans_std=.0):\n        super(DeformRoIPooling, self).__init__()\n        self.spatial_scale = spatial_scale\n        self.out_size = out_size\n        self.out_channels = out_channels\n        self.no_trans = no_trans\n        self.group_size = group_size\n        self.part_size = out_size if part_size is None else part_size\n        self.sample_per_part = sample_per_part\n        self.trans_std = trans_std\n\n    def forward(self, data, rois, offset):\n        if self.no_trans:\n            offset = data.new_empty(0)\n        return deform_roi_pooling(\n            data, rois, offset, self.spatial_scale, self.out_size,\n            self.out_channels, self.no_trans, self.group_size, self.part_size,\n            self.sample_per_part, self.trans_std)\n\n\nclass DeformRoIPoolingPack(DeformRoIPooling):\n\n    def __init__(self,\n                 spatial_scale,\n                 out_size,\n                 out_channels,\n                 no_trans,\n                 group_size=1,\n                 part_size=None,\n                 sample_per_part=4,\n                 trans_std=.0,\n                 num_offset_fcs=3,\n                 deform_fc_channels=1024):\n        super(DeformRoIPoolingPack,\n              self).__init__(spatial_scale, out_size, out_channels, no_trans,\n                             group_size, part_size, sample_per_part, trans_std)\n\n        self.num_offset_fcs = num_offset_fcs\n        self.deform_fc_channels = deform_fc_channels\n\n        if not no_trans:\n            seq = []\n            ic = self.out_size * self.out_size * self.out_channels\n            for i in range(self.num_offset_fcs):\n                if i < self.num_offset_fcs - 1:\n                    oc = self.deform_fc_channels\n                else:\n                    oc = self.out_size * self.out_size * 2\n                seq.append(nn.Linear(ic, oc))\n                ic = oc\n                if i < self.num_offset_fcs - 1:\n                    seq.append(nn.ReLU(inplace=True))\n            self.offset_fc = nn.Sequential(*seq)\n            self.offset_fc[-1].weight.data.zero_()\n            self.offset_fc[-1].bias.data.zero_()\n\n    def forward(self, data, rois):\n        assert data.size(1) == self.out_channels\n        if self.no_trans:\n            offset = data.new_empty(0)\n            return deform_roi_pooling(\n                data, rois, offset, self.spatial_scale, self.out_size,\n                self.out_channels, self.no_trans, self.group_size,\n                self.part_size, self.sample_per_part, self.trans_std)\n        else:\n            n = rois.shape[0]\n            offset = data.new_empty(0)\n            x = deform_roi_pooling(data, rois, offset, self.spatial_scale,\n                                   self.out_size, self.out_channels, True,\n                                   self.group_size, self.part_size,\n                                   self.sample_per_part, self.trans_std)\n            offset = self.offset_fc(x.view(n, -1))\n            offset = offset.view(n, 2, self.out_size, self.out_size)\n            return deform_roi_pooling(\n                data, rois, offset, self.spatial_scale, self.out_size,\n                self.out_channels, self.no_trans, self.group_size,\n                self.part_size, self.sample_per_part, self.trans_std)\n\n\nclass ModulatedDeformRoIPoolingPack(DeformRoIPooling):\n\n    def __init__(self,\n                 spatial_scale,\n                 out_size,\n                 out_channels,\n                 no_trans,\n                 group_size=1,\n                 part_size=None,\n                 sample_per_part=4,\n                 trans_std=.0,\n                 num_offset_fcs=3,\n                 num_mask_fcs=2,\n                 deform_fc_channels=1024):\n        super(ModulatedDeformRoIPoolingPack, self).__init__(\n            spatial_scale, out_size, out_channels, no_trans, group_size,\n            part_size, sample_per_part, trans_std)\n\n        self.num_offset_fcs = num_offset_fcs\n        self.num_mask_fcs = num_mask_fcs\n        self.deform_fc_channels = deform_fc_channels\n\n        if not no_trans:\n            offset_fc_seq = []\n            ic = self.out_size * self.out_size * self.out_channels\n            for i in range(self.num_offset_fcs):\n                if i < self.num_offset_fcs - 1:\n                    oc = self.deform_fc_channels\n                else:\n                    oc = self.out_size * self.out_size * 2\n                offset_fc_seq.append(nn.Linear(ic, oc))\n                ic = oc\n                if i < self.num_offset_fcs - 1:\n                    offset_fc_seq.append(nn.ReLU(inplace=True))\n            self.offset_fc = nn.Sequential(*offset_fc_seq)\n            self.offset_fc[-1].weight.data.zero_()\n            self.offset_fc[-1].bias.data.zero_()\n\n            mask_fc_seq = []\n            ic = self.out_size * self.out_size * self.out_channels\n            for i in range(self.num_mask_fcs):\n                if i < self.num_mask_fcs - 1:\n                    oc = self.deform_fc_channels\n                else:\n                    oc = self.out_size * self.out_size\n                mask_fc_seq.append(nn.Linear(ic, oc))\n                ic = oc\n                if i < self.num_mask_fcs - 1:\n                    mask_fc_seq.append(nn.ReLU(inplace=True))\n                else:\n                    mask_fc_seq.append(nn.Sigmoid())\n            self.mask_fc = nn.Sequential(*mask_fc_seq)\n            self.mask_fc[-2].weight.data.zero_()\n            self.mask_fc[-2].bias.data.zero_()\n\n    def forward(self, data, rois):\n        assert data.size(1) == self.out_channels\n        if self.no_trans:\n            offset = data.new_empty(0)\n            return deform_roi_pooling(\n                data, rois, offset, self.spatial_scale, self.out_size,\n                self.out_channels, self.no_trans, self.group_size,\n                self.part_size, self.sample_per_part, self.trans_std)\n        else:\n            n = rois.shape[0]\n            offset = data.new_empty(0)\n            x = deform_roi_pooling(data, rois, offset, self.spatial_scale,\n                                   self.out_size, self.out_channels, True,\n                                   self.group_size, self.part_size,\n                                   self.sample_per_part, self.trans_std)\n            offset = self.offset_fc(x.view(n, -1))\n            offset = offset.view(n, 2, self.out_size, self.out_size)\n            mask = self.mask_fc(x.view(n, -1))\n            mask = mask.view(n, 1, self.out_size, self.out_size)\n            return deform_roi_pooling(\n                data, rois, offset, self.spatial_scale, self.out_size,\n                self.out_channels, self.no_trans, self.group_size,\n                self.part_size, self.sample_per_part, self.trans_std) * mask\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='deform_conv',\n    ext_modules=[\n        CUDAExtension('deform_conv_cuda', [\n            'src/deform_conv_cuda.cpp',\n            'src/deform_conv_cuda_kernel.cu',\n        ]),\n        CUDAExtension('deform_pool_cuda', [\n            'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu'\n        ]),\n    ],\n    cmdclass={'build_ext': BuildExtension})\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.cpp",
    "content": "// modify from\n// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c\n\n#include <torch/extension.h>\n\n#include <cmath>\n#include <vector>\n\nvoid deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset,\n                       const int channels, const int height, const int width,\n                       const int ksize_h, const int ksize_w, const int pad_h,\n                       const int pad_w, const int stride_h, const int stride_w,\n                       const int dilation_h, const int dilation_w,\n                       const int parallel_imgs, const int deformable_group,\n                       at::Tensor data_col);\n\nvoid deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset,\n                       const int channels, const int height, const int width,\n                       const int ksize_h, const int ksize_w, const int pad_h,\n                       const int pad_w, const int stride_h, const int stride_w,\n                       const int dilation_h, const int dilation_w,\n                       const int parallel_imgs, const int deformable_group,\n                       at::Tensor grad_im);\n\nvoid deformable_col2im_coord(\n    const at::Tensor data_col, const at::Tensor data_im,\n    const at::Tensor data_offset, const int channels, const int height,\n    const int width, const int ksize_h, const int ksize_w, const int pad_h,\n    const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w, const int parallel_imgs,\n    const int deformable_group, at::Tensor grad_offset);\n\nvoid modulated_deformable_im2col_cuda(\n    const at::Tensor data_im, const at::Tensor data_offset,\n    const at::Tensor data_mask, const int batch_size, const int channels,\n    const int height_im, const int width_im, const int height_col,\n    const int width_col, const int kernel_h, const int kenerl_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w, const int deformable_group,\n    at::Tensor data_col);\n\nvoid modulated_deformable_col2im_cuda(\n    const at::Tensor data_col, const at::Tensor data_offset,\n    const at::Tensor data_mask, const int batch_size, const int channels,\n    const int height_im, const int width_im, const int height_col,\n    const int width_col, const int kernel_h, const int kenerl_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w, const int deformable_group,\n    at::Tensor grad_im);\n\nvoid modulated_deformable_col2im_coord_cuda(\n    const at::Tensor data_col, const at::Tensor data_im,\n    const at::Tensor data_offset, const at::Tensor data_mask,\n    const int batch_size, const int channels, const int height_im,\n    const int width_im, const int height_col, const int width_col,\n    const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w,\n    const int stride_h, const int stride_w, const int dilation_h,\n    const int dilation_w, const int deformable_group, at::Tensor grad_offset,\n    at::Tensor grad_mask);\n\nvoid shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,\n                 at::Tensor weight, int kH, int kW, int dH, int dW, int padH,\n                 int padW, int dilationH, int dilationW, int group,\n                 int deformable_group) {\n  AT_CHECK(weight.ndimension() == 4,\n           \"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, \"\n           \"but got: %s\",\n           weight.ndimension());\n\n  AT_CHECK(weight.is_contiguous(), \"weight tensor has to be contiguous\");\n\n  AT_CHECK(kW > 0 && kH > 0,\n           \"kernel size should be greater than zero, but got kH: %d kW: %d\", kH,\n           kW);\n\n  AT_CHECK((weight.size(2) == kH && weight.size(3) == kW),\n           \"kernel size should be consistent with weight, \",\n           \"but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d\", kH,\n           kW, weight.size(2), weight.size(3));\n\n  AT_CHECK(dW > 0 && dH > 0,\n           \"stride should be greater than zero, but got dH: %d dW: %d\", dH, dW);\n\n  AT_CHECK(\n      dilationW > 0 && dilationH > 0,\n      \"dilation should be greater than 0, but got dilationH: %d dilationW: %d\",\n      dilationH, dilationW);\n\n  int ndim = input.ndimension();\n  int dimf = 0;\n  int dimh = 1;\n  int dimw = 2;\n\n  if (ndim == 4) {\n    dimf++;\n    dimh++;\n    dimw++;\n  }\n\n  AT_CHECK(ndim == 3 || ndim == 4, \"3D or 4D input tensor expected but got: %s\",\n           ndim);\n\n  long nInputPlane = weight.size(1) * group;\n  long inputHeight = input.size(dimh);\n  long inputWidth = input.size(dimw);\n  long nOutputPlane = weight.size(0);\n  long outputHeight =\n      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;\n  long outputWidth =\n      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;\n\n  AT_CHECK(nInputPlane % deformable_group == 0,\n           \"input channels must divide deformable group size\");\n\n  if (outputWidth < 1 || outputHeight < 1)\n    AT_ERROR(\n        \"Given input size: (%ld x %ld x %ld). \"\n        \"Calculated output size: (%ld x %ld x %ld). Output size is too small\",\n        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,\n        outputWidth);\n\n  AT_CHECK(input.size(1) == nInputPlane,\n           \"invalid number of input planes, expected: %d, but got: %d\",\n           nInputPlane, input.size(1));\n\n  AT_CHECK((inputHeight >= kH && inputWidth >= kW),\n           \"input image is smaller than kernel\");\n\n  AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),\n           \"invalid spatial size of offset, expected height: %d width: %d, but \"\n           \"got height: %d width: %d\",\n           outputHeight, outputWidth, offset.size(2), offset.size(3));\n\n  AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),\n           \"invalid number of channels of offset\");\n\n  if (gradOutput != NULL) {\n    AT_CHECK(gradOutput->size(dimf) == nOutputPlane,\n             \"invalid number of gradOutput planes, expected: %d, but got: %d\",\n             nOutputPlane, gradOutput->size(dimf));\n\n    AT_CHECK((gradOutput->size(dimh) == outputHeight &&\n              gradOutput->size(dimw) == outputWidth),\n             \"invalid size of gradOutput, expected height: %d width: %d , but \"\n             \"got height: %d width: %d\",\n             outputHeight, outputWidth, gradOutput->size(dimh),\n             gradOutput->size(dimw));\n  }\n}\n\nint deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,\n                             at::Tensor offset, at::Tensor output,\n                             at::Tensor columns, at::Tensor ones, int kW,\n                             int kH, int dW, int dH, int padW, int padH,\n                             int dilationW, int dilationH, int group,\n                             int deformable_group, int im2col_step) {\n  // todo: resize columns to include im2col: done\n  // todo: add im2col_step as input\n  // todo: add new output buffer and transpose it to output (or directly\n  // transpose output) todo: possibly change data indexing because of\n  // parallel_imgs\n\n  shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,\n              dilationH, dilationW, group, deformable_group);\n\n  input = input.contiguous();\n  offset = offset.contiguous();\n  weight = weight.contiguous();\n\n  int batch = 1;\n  if (input.ndimension() == 3) {\n    // Force batch\n    batch = 0;\n    input.unsqueeze_(0);\n    offset.unsqueeze_(0);\n  }\n\n  // todo: assert batchsize dividable by im2col_step\n\n  long batchSize = input.size(0);\n  long nInputPlane = input.size(1);\n  long inputHeight = input.size(2);\n  long inputWidth = input.size(3);\n\n  long nOutputPlane = weight.size(0);\n\n  long outputWidth =\n      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;\n  long outputHeight =\n      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;\n\n  AT_CHECK((offset.size(0) == batchSize), \"invalid batch size of offset\");\n\n  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,\n                        outputHeight, outputWidth});\n  columns = at::zeros(\n      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},\n      input.options());\n\n  if (ones.ndimension() != 2 ||\n      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {\n    ones = at::ones({outputHeight, outputWidth}, input.options());\n  }\n\n  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,\n                      inputHeight, inputWidth});\n  offset =\n      offset.view({batchSize / im2col_step, im2col_step,\n                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  at::Tensor output_buffer =\n      at::zeros({batchSize / im2col_step, nOutputPlane,\n                 im2col_step * outputHeight, outputWidth},\n                output.options());\n\n  output_buffer = output_buffer.view(\n      {output_buffer.size(0), group, output_buffer.size(1) / group,\n       output_buffer.size(2), output_buffer.size(3)});\n\n  for (int elt = 0; elt < batchSize / im2col_step; elt++) {\n    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,\n                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,\n                      dilationW, im2col_step, deformable_group, columns);\n\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n    weight = weight.view({group, weight.size(0) / group, weight.size(1),\n                          weight.size(2), weight.size(3)});\n\n    for (int g = 0; g < group; g++) {\n      output_buffer[elt][g] = output_buffer[elt][g]\n                                  .flatten(1)\n                                  .addmm_(weight[g].flatten(1), columns[g])\n                                  .view_as(output_buffer[elt][g]);\n    }\n  }\n\n  output_buffer = output_buffer.view(\n      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),\n       output_buffer.size(3), output_buffer.size(4)});\n\n  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,\n                                      im2col_step, outputHeight, outputWidth});\n  output_buffer.transpose_(1, 2);\n  output.copy_(output_buffer);\n  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});\n\n  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});\n  offset = offset.view(\n      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  if (batch == 0) {\n    output = output.view({nOutputPlane, outputHeight, outputWidth});\n    input = input.view({nInputPlane, inputHeight, inputWidth});\n    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});\n  }\n\n  return 1;\n}\n\nint deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,\n                                    at::Tensor gradOutput, at::Tensor gradInput,\n                                    at::Tensor gradOffset, at::Tensor weight,\n                                    at::Tensor columns, int kW, int kH, int dW,\n                                    int dH, int padW, int padH, int dilationW,\n                                    int dilationH, int group,\n                                    int deformable_group, int im2col_step) {\n  shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW,\n              dilationH, dilationW, group, deformable_group);\n\n  input = input.contiguous();\n  offset = offset.contiguous();\n  gradOutput = gradOutput.contiguous();\n  weight = weight.contiguous();\n\n  int batch = 1;\n\n  if (input.ndimension() == 3) {\n    // Force batch\n    batch = 0;\n    input = input.view({1, input.size(0), input.size(1), input.size(2)});\n    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});\n    gradOutput = gradOutput.view(\n        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});\n  }\n\n  long batchSize = input.size(0);\n  long nInputPlane = input.size(1);\n  long inputHeight = input.size(2);\n  long inputWidth = input.size(3);\n\n  long nOutputPlane = weight.size(0);\n\n  long outputWidth =\n      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;\n  long outputHeight =\n      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;\n\n  AT_CHECK((offset.size(0) == batchSize), 3, \"invalid batch size of offset\");\n  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});\n  columns = at::zeros(\n      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},\n      input.options());\n\n  // change order of grad output\n  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,\n                                nOutputPlane, outputHeight, outputWidth});\n  gradOutput.transpose_(1, 2);\n\n  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,\n                              inputHeight, inputWidth});\n  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,\n                      inputHeight, inputWidth});\n  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,\n                                deformable_group * 2 * kH * kW, outputHeight,\n                                outputWidth});\n  offset =\n      offset.view({batchSize / im2col_step, im2col_step,\n                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  for (int elt = 0; elt < batchSize / im2col_step; elt++) {\n    // divide into groups\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n    weight = weight.view({group, weight.size(0) / group, weight.size(1),\n                          weight.size(2), weight.size(3)});\n    gradOutput = gradOutput.view(\n        {gradOutput.size(0), group, gradOutput.size(1) / group,\n         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});\n\n    for (int g = 0; g < group; g++) {\n      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),\n                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);\n    }\n\n    columns =\n        columns.view({columns.size(0) * columns.size(1), columns.size(2)});\n    gradOutput = gradOutput.view(\n        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),\n         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});\n\n    deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane,\n                            inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,\n                            dilationH, dilationW, im2col_step, deformable_group,\n                            gradOffset[elt]);\n\n    deformable_col2im(columns, offset[elt], nInputPlane, inputHeight,\n                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,\n                      dilationW, im2col_step, deformable_group, gradInput[elt]);\n  }\n\n  gradOutput.transpose_(1, 2);\n  gradOutput =\n      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});\n\n  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});\n  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});\n  gradOffset = gradOffset.view(\n      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n  offset = offset.view(\n      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  if (batch == 0) {\n    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});\n    input = input.view({nInputPlane, inputHeight, inputWidth});\n    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});\n    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});\n    gradOffset =\n        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});\n  }\n\n  return 1;\n}\n\nint deform_conv_backward_parameters_cuda(\n    at::Tensor input, at::Tensor offset, at::Tensor gradOutput,\n    at::Tensor gradWeight,  // at::Tensor gradBias,\n    at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,\n    int padW, int padH, int dilationW, int dilationH, int group,\n    int deformable_group, float scale, int im2col_step) {\n  // todo: transpose and reshape outGrad\n  // todo: reshape columns\n  // todo: add im2col_step as input\n\n  shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH,\n              padW, dilationH, dilationW, group, deformable_group);\n\n  input = input.contiguous();\n  offset = offset.contiguous();\n  gradOutput = gradOutput.contiguous();\n\n  int batch = 1;\n\n  if (input.ndimension() == 3) {\n    // Force batch\n    batch = 0;\n    input = input.view(\n        at::IntList({1, input.size(0), input.size(1), input.size(2)}));\n    gradOutput = gradOutput.view(\n        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});\n  }\n\n  long batchSize = input.size(0);\n  long nInputPlane = input.size(1);\n  long inputHeight = input.size(2);\n  long inputWidth = input.size(3);\n\n  long nOutputPlane = gradWeight.size(0);\n\n  long outputWidth =\n      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;\n  long outputHeight =\n      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;\n\n  AT_CHECK((offset.size(0) == batchSize), \"invalid batch size of offset\");\n\n  columns = at::zeros(\n      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},\n      input.options());\n\n  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,\n                                nOutputPlane, outputHeight, outputWidth});\n  gradOutput.transpose_(1, 2);\n\n  at::Tensor gradOutputBuffer = at::zeros_like(gradOutput);\n  gradOutputBuffer =\n      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,\n                             outputHeight, outputWidth});\n  gradOutputBuffer.copy_(gradOutput);\n  gradOutputBuffer =\n      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,\n                             im2col_step * outputHeight, outputWidth});\n\n  gradOutput.transpose_(1, 2);\n  gradOutput =\n      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});\n\n  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,\n                      inputHeight, inputWidth});\n  offset =\n      offset.view({batchSize / im2col_step, im2col_step,\n                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  for (int elt = 0; elt < batchSize / im2col_step; elt++) {\n    deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight,\n                      inputWidth, kH, kW, padH, padW, dH, dW, dilationH,\n                      dilationW, im2col_step, deformable_group, columns);\n\n    // divide into group\n    gradOutputBuffer = gradOutputBuffer.view(\n        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,\n         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n    gradWeight =\n        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),\n                         gradWeight.size(2), gradWeight.size(3)});\n\n    for (int g = 0; g < group; g++) {\n      gradWeight[g] = gradWeight[g]\n                          .flatten(1)\n                          .addmm_(gradOutputBuffer[elt][g].flatten(1),\n                                  columns[g].transpose(1, 0), 1.0, scale)\n                          .view_as(gradWeight[g]);\n    }\n    gradOutputBuffer = gradOutputBuffer.view(\n        {gradOutputBuffer.size(0),\n         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),\n         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});\n    columns =\n        columns.view({columns.size(0) * columns.size(1), columns.size(2)});\n    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),\n                                  gradWeight.size(2), gradWeight.size(3),\n                                  gradWeight.size(4)});\n  }\n\n  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});\n  offset = offset.view(\n      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});\n\n  if (batch == 0) {\n    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});\n    input = input.view({nInputPlane, inputHeight, inputWidth});\n  }\n\n  return 1;\n}\n\nvoid modulated_deform_conv_cuda_forward(\n    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,\n    at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,\n    int kernel_h, int kernel_w, const int stride_h, const int stride_w,\n    const int pad_h, const int pad_w, const int dilation_h,\n    const int dilation_w, const int group, const int deformable_group,\n    const bool with_bias) {\n  AT_CHECK(input.is_contiguous(), \"input tensor has to be contiguous\");\n  AT_CHECK(weight.is_contiguous(), \"weight tensor has to be contiguous\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n\n  const int channels_out = weight.size(0);\n  const int channels_kernel = weight.size(1);\n  const int kernel_h_ = weight.size(2);\n  const int kernel_w_ = weight.size(3);\n\n  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)\n    AT_ERROR(\"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\",\n             kernel_h_, kernel_w, kernel_h_, kernel_w_);\n  if (channels != channels_kernel * group)\n    AT_ERROR(\"Input shape and kernel channels wont match: (%d vs %d).\",\n             channels, channels_kernel * group);\n\n  const int height_out =\n      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n  const int width_out =\n      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n  if (ones.ndimension() != 2 ||\n      ones.size(0) * ones.size(1) < height_out * width_out) {\n    // Resize plane and fill with ones...\n    ones = at::ones({height_out, width_out}, input.options());\n  }\n\n  // resize output\n  output = output.view({batch, channels_out, height_out, width_out}).zero_();\n  // resize temporary columns\n  columns =\n      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},\n                input.options());\n\n  output = output.view({output.size(0), group, output.size(1) / group,\n                        output.size(2), output.size(3)});\n\n  for (int b = 0; b < batch; b++) {\n    modulated_deformable_im2col_cuda(\n        input[b], offset[b], mask[b], 1, channels, height, width, height_out,\n        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n        dilation_h, dilation_w, deformable_group, columns);\n\n    // divide into group\n    weight = weight.view({group, weight.size(0) / group, weight.size(1),\n                          weight.size(2), weight.size(3)});\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n\n    for (int g = 0; g < group; g++) {\n      output[b][g] = output[b][g]\n                         .flatten(1)\n                         .addmm_(weight[g].flatten(1), columns[g])\n                         .view_as(output[b][g]);\n    }\n\n    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),\n                          weight.size(3), weight.size(4)});\n    columns =\n        columns.view({columns.size(0) * columns.size(1), columns.size(2)});\n  }\n\n  output = output.view({output.size(0), output.size(1) * output.size(2),\n                        output.size(3), output.size(4)});\n\n  if (with_bias) {\n    output += bias.view({1, bias.size(0), 1, 1});\n  }\n}\n\nvoid modulated_deform_conv_cuda_backward(\n    at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,\n    at::Tensor offset, at::Tensor mask, at::Tensor columns,\n    at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,\n    at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,\n    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,\n    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,\n    const bool with_bias) {\n  AT_CHECK(input.is_contiguous(), \"input tensor has to be contiguous\");\n  AT_CHECK(weight.is_contiguous(), \"weight tensor has to be contiguous\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n\n  const int channels_kernel = weight.size(1);\n  const int kernel_h_ = weight.size(2);\n  const int kernel_w_ = weight.size(3);\n  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)\n    AT_ERROR(\"Input shape and kernel shape wont match: (%d x %d vs %d x %d).\",\n             kernel_h_, kernel_w, kernel_h_, kernel_w_);\n  if (channels != channels_kernel * group)\n    AT_ERROR(\"Input shape and kernel channels wont match: (%d vs %d).\",\n             channels, channels_kernel * group);\n\n  const int height_out =\n      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;\n  const int width_out =\n      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;\n\n  if (ones.ndimension() != 2 ||\n      ones.size(0) * ones.size(1) < height_out * width_out) {\n    // Resize plane and fill with ones...\n    ones = at::ones({height_out, width_out}, input.options());\n  }\n\n  grad_input = grad_input.view({batch, channels, height, width});\n  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},\n                      input.options());\n\n  grad_output =\n      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,\n                        grad_output.size(2), grad_output.size(3)});\n\n  for (int b = 0; b < batch; b++) {\n    // divide int group\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n    weight = weight.view({group, weight.size(0) / group, weight.size(1),\n                          weight.size(2), weight.size(3)});\n\n    for (int g = 0; g < group; g++) {\n      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),\n                        grad_output[b][g].flatten(1), 0.0f, 1.0f);\n    }\n\n    columns =\n        columns.view({columns.size(0) * columns.size(1), columns.size(2)});\n    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),\n                          weight.size(3), weight.size(4)});\n\n    // gradient w.r.t. input coordinate data\n    modulated_deformable_col2im_coord_cuda(\n        columns, input[b], offset[b], mask[b], 1, channels, height, width,\n        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,\n        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],\n        grad_mask[b]);\n    // gradient w.r.t. input data\n    modulated_deformable_col2im_cuda(\n        columns, offset[b], mask[b], 1, channels, height, width, height_out,\n        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n        dilation_h, dilation_w, deformable_group, grad_input[b]);\n\n    // gradient w.r.t. weight, dWeight should accumulate across the batch and\n    // group\n    modulated_deformable_im2col_cuda(\n        input[b], offset[b], mask[b], 1, channels, height, width, height_out,\n        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n        dilation_h, dilation_w, deformable_group, columns);\n\n    columns = columns.view({group, columns.size(0) / group, columns.size(1)});\n    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,\n                                    grad_weight.size(1), grad_weight.size(2),\n                                    grad_weight.size(3)});\n    if (with_bias)\n      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});\n\n    for (int g = 0; g < group; g++) {\n      grad_weight[g] =\n          grad_weight[g]\n              .flatten(1)\n              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))\n              .view_as(grad_weight[g]);\n      if (with_bias) {\n        grad_bias[g] =\n            grad_bias[g]\n                .view({-1, 1})\n                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))\n                .view(-1);\n      }\n    }\n\n    columns =\n        columns.view({columns.size(0) * columns.size(1), columns.size(2)});\n    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),\n                                    grad_weight.size(2), grad_weight.size(3),\n                                    grad_weight.size(4)});\n    if (with_bias)\n      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});\n  }\n  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),\n                                  grad_output.size(2), grad_output.size(3),\n                                  grad_output.size(4)});\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"deform_conv_forward_cuda\", &deform_conv_forward_cuda,\n        \"deform forward (CUDA)\");\n  m.def(\"deform_conv_backward_input_cuda\", &deform_conv_backward_input_cuda,\n        \"deform_conv_backward_input (CUDA)\");\n  m.def(\"deform_conv_backward_parameters_cuda\",\n        &deform_conv_backward_parameters_cuda,\n        \"deform_conv_backward_parameters (CUDA)\");\n  m.def(\"modulated_deform_conv_cuda_forward\",\n        &modulated_deform_conv_cuda_forward,\n        \"modulated deform conv forward (CUDA)\");\n  m.def(\"modulated_deform_conv_cuda_backward\",\n        &modulated_deform_conv_cuda_backward,\n        \"modulated deform conv backward (CUDA)\");\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu",
    "content": "/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contributions by the University of California:\n * Copyright (c) 2014-2017 The Regents of the University of California (Regents)\n * All rights reserved.\n *\n * All other contributions:\n * Copyright (c) 2014-2017, the respective contributors\n * All rights reserved.\n *\n * Caffe uses a shared copyright model: each contributor holds copyright over\n * their contributions to Caffe. The project versioning records all such\n * contribution and copyright details. If a contributor wants to further mark\n * their specific copyright on a particular contribution, they should indicate\n * their copyright solely in the commit message of the change when it is\n * committed.\n *\n * LICENSE\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n * 1. Redistributions of source code must retain the above copyright notice, this\n * list of conditions and the following disclaimer.\n * 2. Redistributions in binary form must reproduce the above copyright notice,\n * this list of conditions and the following disclaimer in the documentation\n * and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * CONTRIBUTION AGREEMENT\n *\n * By contributing to the BVLC/caffe repository through pull-request, comment,\n * or otherwise, the contributor releases their content to the\n * license and copyright terms herein.\n *\n ***************** END Caffe Copyright Notice and Disclaimer ********************\n *\n * Copyright (c) 2018 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file modulated_deformable_im2col.cuh\n * \\brief Function definitions of converting an image to\n * column matrix based on kernel, padding, dilation, and offset.\n * These functions are mainly used in deformable convolution operators.\n * \\ref: https://arxiv.org/abs/1703.06211\n * \\author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng\n */\n\n// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu\n\n#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n#include <stdio.h>\n#include <math.h>\n#include <float.h>\n\nusing namespace at;\n\n#define CUDA_KERNEL_LOOP(i, n)                                 \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \\\n       i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\nconst int kMaxGridNum = 65535;\n\ninline int GET_BLOCKS(const int N)\n{\n  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width,\n                                               const int height, const int width, scalar_t h, scalar_t w)\n{\n\n  int h_low = floor(h);\n  int w_low = floor(w);\n  int h_high = h_low + 1;\n  int w_high = w_low + 1;\n\n  scalar_t lh = h - h_low;\n  scalar_t lw = w - w_low;\n  scalar_t hh = 1 - lh, hw = 1 - lw;\n\n  scalar_t v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n    v1 = bottom_data[h_low * data_width + w_low];\n  scalar_t v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n    v2 = bottom_data[h_low * data_width + w_high];\n  scalar_t v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n    v3 = bottom_data[h_high * data_width + w_low];\n  scalar_t v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n    v4 = bottom_data[h_high * data_width + w_high];\n\n  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  return val;\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,\n                                        const int h, const int w, const int height, const int width)\n{\n\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  scalar_t weight = 0;\n  if (h == argmax_h_low && w == argmax_w_low)\n    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);\n  if (h == argmax_h_low && w == argmax_w_high)\n    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);\n  if (h == argmax_h_high && w == argmax_w_low)\n    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);\n  if (h == argmax_h_high && w == argmax_w_high)\n    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);\n  return weight;\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,\n                                          const int height, const int width, const scalar_t *im_data,\n                                          const int data_width, const int bp_dir)\n{\n\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  scalar_t weight = 0;\n\n  if (bp_dir == 0)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n  else if (bp_dir == 1)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n\n  return weight;\n}\n\ntemplate <typename scalar_t>\n__global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset,\n                                             const int height, const int width, const int kernel_h, const int kernel_w,\n                                             const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n                                             const int dilation_h, const int dilation_w, const int channel_per_deformable_group,\n                                             const int batch_size, const int num_channels, const int deformable_group,\n                                             const int height_col, const int width_col,\n                                             scalar_t *data_col)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    // index index of output matrix\n    const int w_col = index % width_col;\n    const int h_col = (index / width_col) % height_col;\n    const int b_col = (index / width_col / height_col) % batch_size;\n    const int c_im = (index / width_col / height_col) / batch_size;\n    const int c_col = c_im * kernel_h * kernel_w;\n\n    // compute deformable group index\n    const int deformable_group_index = c_im / channel_per_deformable_group;\n\n    const int h_in = h_col * stride_h - pad_h;\n    const int w_in = w_col * stride_w - pad_w;\n    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;\n    //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;\n    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;\n    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n\n    for (int i = 0; i < kernel_h; ++i)\n    {\n      for (int j = 0; j < kernel_w; ++j)\n      {\n        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;\n        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;\n        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n        scalar_t val = static_cast<scalar_t>(0);\n        const scalar_t h_im = h_in + i * dilation_h + offset_h;\n        const scalar_t w_im = w_in + j * dilation_w + offset_w;\n        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)\n        {\n          //const scalar_t map_h = i * dilation_h + offset_h;\n          //const scalar_t map_w = j * dilation_w + offset_w;\n          //const int cur_height = height - h_in;\n          //const int cur_width = width - w_in;\n          //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);\n          val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);\n        }\n        *data_col_ptr = val;\n        data_col_ptr += batch_size * height_col * width_col;\n      }\n    }\n  }\n}\n\nvoid deformable_im2col(\n    const at::Tensor data_im, const at::Tensor data_offset, const int channels,\n    const int height, const int width, const int ksize_h, const int ksize_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w, const int parallel_imgs,\n    const int deformable_group, at::Tensor data_col)\n{\n  // num_axes should be smaller than block size\n  // todo: check parallel_imgs is correctly passed in\n  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;\n  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;\n  int num_kernels = channels * height_col * width_col * parallel_imgs;\n  int channel_per_deformable_group = channels / deformable_group;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_im.type(), \"deformable_im2col_gpu\", ([&] {\n        const scalar_t *data_im_ = data_im.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        scalar_t *data_col_ = data_col.data<scalar_t>();\n\n        deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,\n            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,\n            channel_per_deformable_group, parallel_imgs, channels, deformable_group,\n            height_col, width_col, data_col_);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in deformable_im2col: %s\\n\", cudaGetErrorString(err));\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void deformable_col2im_gpu_kernel(\n    const int n, const scalar_t *data_col, const scalar_t *data_offset,\n    const int channels, const int height, const int width,\n    const int kernel_h, const int kernel_w,\n    const int pad_h, const int pad_w,\n    const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w,\n    const int channel_per_deformable_group,\n    const int batch_size, const int deformable_group,\n    const int height_col, const int width_col,\n    scalar_t *grad_im)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    const int j = (index / width_col / height_col / batch_size) % kernel_w;\n    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;\n    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / channel_per_deformable_group;\n\n    int w_out = index % width_col;\n    int h_out = (index / width_col) % height_col;\n    int b = (index / width_col / height_col) % batch_size;\n    int w_in = w_out * stride_w - pad_w;\n    int h_in = h_out * stride_h - pad_h;\n\n    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) *\n                                                        2 * kernel_h * kernel_w * height_col * width_col;\n    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;\n    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;\n    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;\n    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;\n\n    const scalar_t cur_top_grad = data_col[index];\n    const int cur_h = (int)cur_inv_h_data;\n    const int cur_w = (int)cur_inv_w_data;\n    for (int dy = -2; dy <= 2; dy++)\n    {\n      for (int dx = -2; dx <= 2; dx++)\n      {\n        if (cur_h + dy >= 0 && cur_h + dy < height &&\n            cur_w + dx >= 0 && cur_w + dx < width &&\n            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&\n            abs(cur_inv_w_data - (cur_w + dx)) < 1)\n        {\n          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;\n          scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);\n          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);\n        }\n      }\n    }\n  }\n}\n\nvoid deformable_col2im(\n    const at::Tensor data_col, const at::Tensor data_offset, const int channels,\n    const int height, const int width, const int ksize_h,\n    const int ksize_w, const int pad_h, const int pad_w,\n    const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w,\n    const int parallel_imgs, const int deformable_group,\n    at::Tensor grad_im)\n{\n\n  // todo: make sure parallel_imgs is passed in correctly\n  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;\n  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;\n  int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;\n  int channel_per_deformable_group = channels / deformable_group;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_col.type(), \"deformable_col2im_gpu\", ([&] {\n        const scalar_t *data_col_ = data_col.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        scalar_t *grad_im_ = grad_im.data<scalar_t>();\n\n        deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,\n            ksize_w, pad_h, pad_w, stride_h, stride_w,\n            dilation_h, dilation_w, channel_per_deformable_group,\n            parallel_imgs, deformable_group, height_col, width_col, grad_im_);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in deformable_col2im: %s\\n\", cudaGetErrorString(err));\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col,\n                                                   const scalar_t *data_im, const scalar_t *data_offset,\n                                                   const int channels, const int height, const int width,\n                                                   const int kernel_h, const int kernel_w,\n                                                   const int pad_h, const int pad_w,\n                                                   const int stride_h, const int stride_w,\n                                                   const int dilation_h, const int dilation_w,\n                                                   const int channel_per_deformable_group,\n                                                   const int batch_size, const int offset_channels, const int deformable_group,\n                                                   const int height_col, const int width_col, scalar_t *grad_offset)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    scalar_t val = 0;\n    int w = index % width_col;\n    int h = (index / width_col) % height_col;\n    int c = (index / width_col / height_col) % offset_channels;\n    int b = (index / width_col / height_col) / offset_channels;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / (2 * kernel_h * kernel_w);\n    const int col_step = kernel_h * kernel_w;\n    int cnt = 0;\n    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group *\n                                                  batch_size * width_col * height_col;\n    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) *\n                                                channel_per_deformable_group / kernel_h / kernel_w * height * width;\n    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 *\n                                                        kernel_h * kernel_w * height_col * width_col;\n\n    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;\n\n    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)\n    {\n      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;\n      const int bp_dir = offset_c % 2;\n\n      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;\n      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;\n      int w_out = col_pos % width_col;\n      int h_out = (col_pos / width_col) % height_col;\n      int w_in = w_out * stride_w - pad_w;\n      int h_in = h_out * stride_h - pad_h;\n      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);\n      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);\n      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n      scalar_t inv_h = h_in + i * dilation_h + offset_h;\n      scalar_t inv_w = w_in + j * dilation_w + offset_w;\n      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)\n      {\n        inv_h = inv_w = -2;\n      }\n      const scalar_t weight = get_coordinate_weight(\n          inv_h, inv_w,\n          height, width, data_im_ptr + cnt * height * width, width, bp_dir);\n      val += weight * data_col_ptr[col_pos];\n      cnt += 1;\n    }\n\n    grad_offset[index] = val;\n  }\n}\n\nvoid deformable_col2im_coord(\n    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset,\n    const int channels, const int height, const int width, const int ksize_h,\n    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,\n    const int stride_w, const int dilation_h, const int dilation_w,\n    const int parallel_imgs, const int deformable_group, at::Tensor grad_offset)\n{\n\n  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;\n  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;\n  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs;\n  int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_col.type(), \"deformable_col2im_coord_gpu\", ([&] {\n        const scalar_t *data_col_ = data_col.data<scalar_t>();\n        const scalar_t *data_im_ = data_im.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        scalar_t *grad_offset_ = grad_offset.data<scalar_t>();\n\n        deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_col_, data_im_, data_offset_, channels, height, width,\n            ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,\n            dilation_h, dilation_w, channel_per_deformable_group,\n            parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group,\n            height_col, width_col, grad_offset_);\n      }));\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width,\n                                         const int height, const int width, scalar_t h, scalar_t w)\n{\n  int h_low = floor(h);\n  int w_low = floor(w);\n  int h_high = h_low + 1;\n  int w_high = w_low + 1;\n\n  scalar_t lh = h - h_low;\n  scalar_t lw = w - w_low;\n  scalar_t hh = 1 - lh, hw = 1 - lw;\n\n  scalar_t v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n    v1 = bottom_data[h_low * data_width + w_low];\n  scalar_t v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n    v2 = bottom_data[h_low * data_width + w_high];\n  scalar_t v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n    v3 = bottom_data[h_high * data_width + w_low];\n  scalar_t v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n    v4 = bottom_data[h_high * data_width + w_high];\n\n  scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n  scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  return val;\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w,\n                                             const int h, const int w, const int height, const int width)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  scalar_t weight = 0;\n  if (h == argmax_h_low && w == argmax_w_low)\n    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);\n  if (h == argmax_h_low && w == argmax_w_high)\n    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);\n  if (h == argmax_h_high && w == argmax_w_low)\n    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);\n  if (h == argmax_h_high && w == argmax_w_high)\n    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);\n  return weight;\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w,\n                                               const int height, const int width, const scalar_t *im_data,\n                                               const int data_width, const int bp_dir)\n{\n  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)\n  {\n    //empty\n    return 0;\n  }\n\n  int argmax_h_low = floor(argmax_h);\n  int argmax_w_low = floor(argmax_w);\n  int argmax_h_high = argmax_h_low + 1;\n  int argmax_w_high = argmax_w_low + 1;\n\n  scalar_t weight = 0;\n\n  if (bp_dir == 0)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n  else if (bp_dir == 1)\n  {\n    if (argmax_h_low >= 0 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];\n    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)\n      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];\n    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)\n      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];\n    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)\n      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];\n  }\n\n  return weight;\n}\n\ntemplate <typename scalar_t>\n__global__ void modulated_deformable_im2col_gpu_kernel(const int n,\n                                                       const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask,\n                                                       const int height, const int width, const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int num_channels, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       scalar_t *data_col)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    // index index of output matrix\n    const int w_col = index % width_col;\n    const int h_col = (index / width_col) % height_col;\n    const int b_col = (index / width_col / height_col) % batch_size;\n    const int c_im = (index / width_col / height_col) / batch_size;\n    const int c_col = c_im * kernel_h * kernel_w;\n\n    // compute deformable group index\n    const int deformable_group_index = c_im / channel_per_deformable_group;\n\n    const int h_in = h_col * stride_h - pad_h;\n    const int w_in = w_col * stride_w - pad_w;\n\n    scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;\n    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;\n    const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;\n    const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n\n    const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    for (int i = 0; i < kernel_h; ++i)\n    {\n      for (int j = 0; j < kernel_w; ++j)\n      {\n        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;\n        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;\n        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;\n        const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n        const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n        const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];\n        scalar_t val = static_cast<scalar_t>(0);\n        const scalar_t h_im = h_in + i * dilation_h + offset_h;\n        const scalar_t w_im = w_in + j * dilation_w + offset_w;\n        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {\n        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)\n        {\n          //const float map_h = i * dilation_h + offset_h;\n          //const float map_w = j * dilation_w + offset_w;\n          //const int cur_height = height - h_in;\n          //const int cur_width = width - w_in;\n          //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);\n          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);\n        }\n        *data_col_ptr = val * mask;\n        data_col_ptr += batch_size * height_col * width_col;\n        //data_col_ptr += height_col * width_col;\n      }\n    }\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void modulated_deformable_col2im_gpu_kernel(const int n,\n                                                       const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask,\n                                                       const int channels, const int height, const int width,\n                                                       const int kernel_h, const int kernel_w,\n                                                       const int pad_h, const int pad_w,\n                                                       const int stride_h, const int stride_w,\n                                                       const int dilation_h, const int dilation_w,\n                                                       const int channel_per_deformable_group,\n                                                       const int batch_size, const int deformable_group,\n                                                       const int height_col, const int width_col,\n                                                       scalar_t *grad_im)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    const int j = (index / width_col / height_col / batch_size) % kernel_w;\n    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;\n    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / channel_per_deformable_group;\n\n    int w_out = index % width_col;\n    int h_out = (index / width_col) % height_col;\n    int b = (index / width_col / height_col) % batch_size;\n    int w_in = w_out * stride_w - pad_w;\n    int h_in = h_out * stride_h - pad_h;\n\n    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;\n    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;\n    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;\n    const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n    const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n    const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];\n    const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h;\n    const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w;\n\n    const scalar_t cur_top_grad = data_col[index] * mask;\n    const int cur_h = (int)cur_inv_h_data;\n    const int cur_w = (int)cur_inv_w_data;\n    for (int dy = -2; dy <= 2; dy++)\n    {\n      for (int dx = -2; dx <= 2; dx++)\n      {\n        if (cur_h + dy >= 0 && cur_h + dy < height &&\n            cur_w + dx >= 0 && cur_w + dx < width &&\n            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&\n            abs(cur_inv_w_data - (cur_w + dx)) < 1)\n        {\n          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;\n          scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);\n          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,\n                                                             const scalar_t *data_col, const scalar_t *data_im,\n                                                             const scalar_t *data_offset, const scalar_t *data_mask,\n                                                             const int channels, const int height, const int width,\n                                                             const int kernel_h, const int kernel_w,\n                                                             const int pad_h, const int pad_w,\n                                                             const int stride_h, const int stride_w,\n                                                             const int dilation_h, const int dilation_w,\n                                                             const int channel_per_deformable_group,\n                                                             const int batch_size, const int offset_channels, const int deformable_group,\n                                                             const int height_col, const int width_col,\n                                                             scalar_t *grad_offset, scalar_t *grad_mask)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    scalar_t val = 0, mval = 0;\n    int w = index % width_col;\n    int h = (index / width_col) % height_col;\n    int c = (index / width_col / height_col) % offset_channels;\n    int b = (index / width_col / height_col) / offset_channels;\n    // compute the start and end of the output\n\n    const int deformable_group_index = c / (2 * kernel_h * kernel_w);\n    const int col_step = kernel_h * kernel_w;\n    int cnt = 0;\n    const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;\n    const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;\n    const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;\n    const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;\n\n    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;\n\n    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)\n    {\n      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;\n      const int bp_dir = offset_c % 2;\n\n      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;\n      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;\n      int w_out = col_pos % width_col;\n      int h_out = (col_pos / width_col) % height_col;\n      int w_in = w_out * stride_w - pad_w;\n      int h_in = h_out * stride_h - pad_h;\n      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);\n      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);\n      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);\n      const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr];\n      const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr];\n      const scalar_t mask = data_mask_ptr[data_mask_hw_ptr];\n      scalar_t inv_h = h_in + i * dilation_h + offset_h;\n      scalar_t inv_w = w_in + j * dilation_w + offset_w;\n      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)\n      {\n        inv_h = inv_w = -2;\n      }\n      else\n      {\n        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);\n      }\n      const scalar_t weight = dmcn_get_coordinate_weight(\n          inv_h, inv_w,\n          height, width, data_im_ptr + cnt * height * width, width, bp_dir);\n      val += weight * data_col_ptr[col_pos] * mask;\n      cnt += 1;\n    }\n    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);\n    grad_offset[index] = val;\n    if (offset_c % 2 == 0)\n      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);\n      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;\n  }\n}\n\nvoid modulated_deformable_im2col_cuda(\n    const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,\n    const int batch_size, const int channels, const int height_im, const int width_im,\n    const int height_col, const int width_col, const int kernel_h, const int kenerl_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w,\n    const int deformable_group, at::Tensor data_col)\n{\n  // num_axes should be smaller than block size\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * batch_size * height_col * width_col;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_im.type(), \"modulated_deformable_im2col_gpu\", ([&] {\n        const scalar_t *data_im_ = data_im.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        const scalar_t *data_mask_ = data_mask.data<scalar_t>();\n        scalar_t *data_col_ = data_col.data<scalar_t>();\n\n        modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,\n            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,\n            batch_size, channels, deformable_group, height_col, width_col, data_col_);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_im2col_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n}\n\nvoid modulated_deformable_col2im_cuda(\n    const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask,\n    const int batch_size, const int channels, const int height_im, const int width_im,\n    const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w,\n    const int deformable_group, at::Tensor grad_im)\n{\n\n  const int channel_per_deformable_group = channels / deformable_group;\n  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_col.type(), \"modulated_deformable_col2im_gpu\", ([&] {\n        const scalar_t *data_col_ = data_col.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        const scalar_t *data_mask_ = data_mask.data<scalar_t>();\n        scalar_t *grad_im_ = grad_im.data<scalar_t>();\n\n        modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,\n            kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,\n            dilation_h, dilation_w, channel_per_deformable_group,\n            batch_size, deformable_group, height_col, width_col, grad_im_);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n}\n\nvoid modulated_deformable_col2im_coord_cuda(\n    const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask,\n    const int batch_size, const int channels, const int height_im, const int width_im,\n    const int height_col, const int width_col, const int kernel_h, const int kernel_w,\n    const int pad_h, const int pad_w, const int stride_h, const int stride_w,\n    const int dilation_h, const int dilation_w,\n    const int deformable_group,\n    at::Tensor grad_offset, at::Tensor grad_mask)\n{\n  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;\n  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data_col.type(), \"modulated_deformable_col2im_coord_gpu\", ([&] {\n        const scalar_t *data_col_ = data_col.data<scalar_t>();\n        const scalar_t *data_im_ = data_im.data<scalar_t>();\n        const scalar_t *data_offset_ = data_offset.data<scalar_t>();\n        const scalar_t *data_mask_ = data_mask.data<scalar_t>();\n        scalar_t *grad_offset_ = grad_offset.data<scalar_t>();\n        scalar_t *grad_mask_ = grad_mask.data<scalar_t>();\n\n        modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(\n            num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,\n            kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,\n            dilation_h, dilation_w, channel_per_deformable_group,\n            batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col,\n            grad_offset_, grad_mask_);\n      }));\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in modulated_deformable_col2im_coord_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/src/deform_pool_cuda.cpp",
    "content": "// modify from\n// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c\n\n// based on\n// author: Charles Shang\n// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu\n\n#include <torch/extension.h>\n\n#include <cmath>\n#include <vector>\n\nvoid DeformablePSROIPoolForward(\n    const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,\n    at::Tensor out, at::Tensor top_count, const int batch, const int channels,\n    const int height, const int width, const int num_bbox,\n    const int channels_trans, const int no_trans, const float spatial_scale,\n    const int output_dim, const int group_size, const int pooled_size,\n    const int part_size, const int sample_per_part, const float trans_std);\n\nvoid DeformablePSROIPoolBackwardAcc(\n    const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,\n    const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,\n    at::Tensor trans_grad, const int batch, const int channels,\n    const int height, const int width, const int num_bbox,\n    const int channels_trans, const int no_trans, const float spatial_scale,\n    const int output_dim, const int group_size, const int pooled_size,\n    const int part_size, const int sample_per_part, const float trans_std);\n\nvoid deform_psroi_pooling_cuda_forward(\n    at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,\n    at::Tensor top_count, const int no_trans, const float spatial_scale,\n    const int output_dim, const int group_size, const int pooled_size,\n    const int part_size, const int sample_per_part, const float trans_std) {\n  AT_CHECK(input.is_contiguous(), \"input tensor has to be contiguous\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n\n  const int num_bbox = bbox.size(0);\n  if (num_bbox != out.size(0))\n    AT_ERROR(\"Output shape and bbox number wont match: (%d vs %d).\",\n             out.size(0), num_bbox);\n\n  DeformablePSROIPoolForward(\n      input, bbox, trans, out, top_count, batch, channels, height, width,\n      num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,\n      pooled_size, part_size, sample_per_part, trans_std);\n}\n\nvoid deform_psroi_pooling_cuda_backward(\n    at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,\n    at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,\n    const int no_trans, const float spatial_scale, const int output_dim,\n    const int group_size, const int pooled_size, const int part_size,\n    const int sample_per_part, const float trans_std) {\n  AT_CHECK(out_grad.is_contiguous(), \"out_grad tensor has to be contiguous\");\n  AT_CHECK(input.is_contiguous(), \"input tensor has to be contiguous\");\n\n  const int batch = input.size(0);\n  const int channels = input.size(1);\n  const int height = input.size(2);\n  const int width = input.size(3);\n  const int channels_trans = no_trans ? 2 : trans.size(1);\n\n  const int num_bbox = bbox.size(0);\n  if (num_bbox != out_grad.size(0))\n    AT_ERROR(\"Output shape and bbox number wont match: (%d vs %d).\",\n             out_grad.size(0), num_bbox);\n\n  DeformablePSROIPoolBackwardAcc(\n      out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,\n      channels, height, width, num_bbox, channels_trans, no_trans,\n      spatial_scale, output_dim, group_size, pooled_size, part_size,\n      sample_per_part, trans_std);\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"deform_psroi_pooling_cuda_forward\", &deform_psroi_pooling_cuda_forward,\n        \"deform psroi pooling forward(CUDA)\");\n  m.def(\"deform_psroi_pooling_cuda_backward\",\n        &deform_psroi_pooling_cuda_backward,\n        \"deform psroi pooling backward(CUDA)\");\n}"
  },
  {
    "path": "mmdetection/mmdet/ops/dcn/src/deform_pool_cuda_kernel.cu",
    "content": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psroi_pooling.cu\n * \\brief\n * \\author Yi Li, Guodong Zhang, Jifeng Dai\n*/\n/***************** Adapted by Charles Shang *********************/\n// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu\n\n#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n#include <stdio.h>\n#include <math.h>\n#include <algorithm>\n\nusing namespace at;\n\n#define CUDA_KERNEL_LOOP(i, n)                        \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \\\n       i < (n);                                       \\\n       i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N)\n{\n  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t bilinear_interp(\n    const scalar_t *data,\n    const scalar_t x,\n    const scalar_t y,\n    const int width,\n    const int height)\n{\n  int x1 = floor(x);\n  int x2 = ceil(x);\n  int y1 = floor(y);\n  int y2 = ceil(y);\n  scalar_t dist_x = (scalar_t)(x - x1);\n  scalar_t dist_y = (scalar_t)(y - y1);\n  scalar_t value11 = data[y1 * width + x1];\n  scalar_t value12 = data[y2 * width + x1];\n  scalar_t value21 = data[y1 * width + x2];\n  scalar_t value22 = data[y2 * width + x2];\n  scalar_t value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;\n  return value;\n}\n\ntemplate <typename scalar_t>\n__global__ void DeformablePSROIPoolForwardKernel(\n    const int count,\n    const scalar_t *bottom_data,\n    const scalar_t spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const scalar_t *bottom_rois, const scalar_t *bottom_trans,\n    const int no_trans,\n    const scalar_t trans_std,\n    const int sample_per_part,\n    const int output_dim,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class,\n    scalar_t *top_data,\n    scalar_t *top_count)\n{\n  CUDA_KERNEL_LOOP(index, count)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const scalar_t *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n\n    // Force too small ROIs to be 1x1\n    scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0\n    scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1);\n\n    // Compute w and h at bottom\n    scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height);\n    scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width);\n\n    scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part);\n    scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part);\n\n    int part_h = floor((scalar_t)(ph) / pooled_height * part_size);\n    int part_w = floor((scalar_t)(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;\n    scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;\n\n    scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    scalar_t sum = 0;\n    int count = 0;\n    int gw = floor((scalar_t)(pw)*group_size / pooled_width);\n    int gh = floor((scalar_t)(ph)*group_size / pooled_height);\n    gw = min(max(gw, 0), group_size - 1);\n    gh = min(max(gh, 0), group_size - 1);\n\n    const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        scalar_t w = wstart + iw * sub_bin_size_w;\n        scalar_t h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = min(max(w, 0.), width - 1.);\n        h = min(max(h, 0.), height - 1.);\n        int c = (ctop * group_size + gh) * group_size + gw;\n        scalar_t val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);\n        sum += val;\n        count++;\n      }\n    }\n    top_data[index] = count == 0 ? (scalar_t)(0) : sum / count;\n    top_count[index] = count;\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void DeformablePSROIPoolBackwardAccKernel(\n    const int count,\n    const scalar_t *top_diff,\n    const scalar_t *top_count,\n    const int num_rois,\n    const scalar_t spatial_scale,\n    const int channels,\n    const int height, const int width,\n    const int pooled_height, const int pooled_width,\n    const int output_dim,\n    scalar_t *bottom_data_diff, scalar_t *bottom_trans_diff,\n    const scalar_t *bottom_data,\n    const scalar_t *bottom_rois,\n    const scalar_t *bottom_trans,\n    const int no_trans,\n    const scalar_t trans_std,\n    const int sample_per_part,\n    const int group_size,\n    const int part_size,\n    const int num_classes,\n    const int channels_each_class)\n{\n  CUDA_KERNEL_LOOP(index, count)\n  {\n    // The output is in order (n, ctop, ph, pw)\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int ctop = (index / pooled_width / pooled_height) % output_dim;\n    int n = index / pooled_width / pooled_height / output_dim;\n\n    // [start, end) interval for spatial sampling\n    const scalar_t *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;\n    scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;\n    scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;\n    scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;\n\n    // Force too small ROIs to be 1x1\n    scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0\n    scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1);\n\n    // Compute w and h at bottom\n    scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height);\n    scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width);\n\n    scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part);\n    scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part);\n\n    int part_h = floor((scalar_t)(ph) / pooled_height * part_size);\n    int part_w = floor((scalar_t)(pw) / pooled_width * part_size);\n    int class_id = ctop / channels_each_class;\n    scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;\n    scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std;\n\n    scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w;\n    wstart += trans_x * roi_width;\n    scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h;\n    hstart += trans_y * roi_height;\n\n    if (top_count[index] <= 0)\n    {\n      continue;\n    }\n    scalar_t diff_val = top_diff[index] / top_count[index];\n    const scalar_t *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;\n    scalar_t *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;\n    int gw = floor((scalar_t)(pw)*group_size / pooled_width);\n    int gh = floor((scalar_t)(ph)*group_size / pooled_height);\n    gw = min(max(gw, 0), group_size - 1);\n    gh = min(max(gh, 0), group_size - 1);\n\n    for (int ih = 0; ih < sample_per_part; ih++)\n    {\n      for (int iw = 0; iw < sample_per_part; iw++)\n      {\n        scalar_t w = wstart + iw * sub_bin_size_w;\n        scalar_t h = hstart + ih * sub_bin_size_h;\n        // bilinear interpolation\n        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)\n        {\n          continue;\n        }\n        w = min(max(w, 0.), width - 1.);\n        h = min(max(h, 0.), height - 1.);\n        int c = (ctop * group_size + gh) * group_size + gw;\n        // backward on feature\n        int x0 = floor(w);\n        int x1 = ceil(w);\n        int y0 = floor(h);\n        int y1 = ceil(h);\n        scalar_t dist_x = w - x0, dist_y = h - y0;\n        scalar_t q00 = (1 - dist_x) * (1 - dist_y);\n        scalar_t q01 = (1 - dist_x) * dist_y;\n        scalar_t q10 = dist_x * (1 - dist_y);\n        scalar_t q11 = dist_x * dist_y;\n        int bottom_index_base = c * height * width;\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);\n        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);\n\n        if (no_trans)\n        {\n          continue;\n        }\n        scalar_t U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];\n        scalar_t U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];\n        scalar_t U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];\n        scalar_t U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];\n        scalar_t diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;\n        diff_x *= roi_width;\n        scalar_t diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;\n        diff_y *= roi_height;\n\n        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);\n        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);\n      }\n    }\n  }\n}\n\nvoid DeformablePSROIPoolForward(const at::Tensor data,\n                                const at::Tensor bbox,\n                                const at::Tensor trans,\n                                at::Tensor out,\n                                at::Tensor top_count,\n                                const int batch,\n                                const int channels,\n                                const int height,\n                                const int width,\n                                const int num_bbox,\n                                const int channels_trans,\n                                const int no_trans,\n                                const float spatial_scale,\n                                const int output_dim,\n                                const int group_size,\n                                const int pooled_size,\n                                const int part_size,\n                                const int sample_per_part,\n                                const float trans_std)\n{\n  const int pooled_height = pooled_size;\n  const int pooled_width = pooled_size;\n  const int count = num_bbox * output_dim * pooled_height * pooled_width;\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      data.type(), \"deformable_psroi_pool_forward\", ([&] {\n        const scalar_t *bottom_data = data.data<scalar_t>();\n        const scalar_t *bottom_rois = bbox.data<scalar_t>();\n        const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();\n        scalar_t *top_data = out.data<scalar_t>();\n        scalar_t *top_count_data = top_count.data<scalar_t>();\n\n        DeformablePSROIPoolForwardKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(\n            count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width,\n            bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, output_dim,\n            group_size, part_size, num_classes, channels_each_class, top_data, top_count_data);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in DeformablePSROIPoolForward: %s\\n\", cudaGetErrorString(err));\n  }\n}\n\nvoid DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,\n                                    const at::Tensor data,\n                                    const at::Tensor bbox,\n                                    const at::Tensor trans,\n                                    const at::Tensor top_count,\n                                    at::Tensor in_grad,\n                                    at::Tensor trans_grad,\n                                    const int batch,\n                                    const int channels,\n                                    const int height,\n                                    const int width,\n                                    const int num_bbox,\n                                    const int channels_trans,\n                                    const int no_trans,\n                                    const float spatial_scale,\n                                    const int output_dim,\n                                    const int group_size,\n                                    const int pooled_size,\n                                    const int part_size,\n                                    const int sample_per_part,\n                                    const float trans_std)\n{\n  // LOG(INFO) << \"DeformablePSROIPoolBackward\";\n  const int num_rois = num_bbox;\n  const int pooled_height = pooled_size;\n  const int pooled_width = pooled_size;\n  const int count = num_bbox * output_dim * pooled_height * pooled_width;\n  const int num_classes = no_trans ? 1 : channels_trans / 2;\n  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      out_grad.type(), \"deformable_psroi_pool_backward_acc\", ([&] {\n        const scalar_t *top_diff = out_grad.data<scalar_t>();\n        const scalar_t *bottom_data = data.data<scalar_t>();\n        const scalar_t *bottom_rois = bbox.data<scalar_t>();\n        const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();\n        scalar_t *bottom_data_diff = in_grad.data<scalar_t>();\n        scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data<scalar_t>();\n        const scalar_t *top_count_data = top_count.data<scalar_t>();\n\n        DeformablePSROIPoolBackwardAccKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(\n            count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width,\n            pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff,\n            bottom_data, bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part,\n            group_size, part_size, num_classes, channels_each_class);\n      }));\n\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in DeformablePSROIPoolForward: %s\\n\", cudaGetErrorString(err));\n  }\n}"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/__init__.py",
    "content": "from .nms_wrapper import nms, soft_nms\n\n__all__ = ['nms', 'soft_nms']\n"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/nms_wrapper.py",
    "content": "import numpy as np\nimport torch\n\nfrom . import nms_cuda, nms_cpu\nfrom .soft_nms_cpu import soft_nms_cpu\n\n\ndef nms(dets, iou_thr, device_id=None):\n    \"\"\"Dispatch to either CPU or GPU NMS implementations.\n\n    The input can be either a torch tensor or numpy array. GPU NMS will be used\n    if the input is a gpu tensor or device_id is specified, otherwise CPU NMS\n    will be used. The returned type will always be the same as inputs.\n\n    Arguments:\n        dets (torch.Tensor or np.ndarray): bboxes with scores.\n        iou_thr (float): IoU threshold for NMS.\n        device_id (int, optional): when `dets` is a numpy array, if `device_id`\n            is None, then cpu nms is used, otherwise gpu_nms will be used.\n\n    Returns:\n        tuple: kept bboxes and indice, which is always the same data type as\n            the input.\n    \"\"\"\n    # convert dets (tensor or numpy array) to tensor\n    if isinstance(dets, torch.Tensor):\n        is_numpy = False\n        dets_th = dets\n    elif isinstance(dets, np.ndarray):\n        is_numpy = True\n        device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)\n        dets_th = torch.from_numpy(dets).to(device)\n    else:\n        raise TypeError(\n            'dets must be either a Tensor or numpy array, but got {}'.format(\n                type(dets)))\n\n    # execute cpu or cuda nms\n    if dets_th.shape[0] == 0:\n        inds = dets_th.new_zeros(0, dtype=torch.long)\n    else:\n        if dets_th.is_cuda:\n            inds = nms_cuda.nms(dets_th, iou_thr)\n        else:\n            inds = nms_cpu.nms(dets_th, iou_thr)\n\n    if is_numpy:\n        inds = inds.cpu().numpy()\n    return dets[inds, :], inds\n\n\ndef soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):\n    if isinstance(dets, torch.Tensor):\n        is_tensor = True\n        dets_np = dets.detach().cpu().numpy()\n    elif isinstance(dets, np.ndarray):\n        is_tensor = False\n        dets_np = dets\n    else:\n        raise TypeError(\n            'dets must be either a Tensor or numpy array, but got {}'.format(\n                type(dets)))\n\n    method_codes = {'linear': 1, 'gaussian': 2}\n    if method not in method_codes:\n        raise ValueError('Invalid method for SoftNMS: {}'.format(method))\n    new_dets, inds = soft_nms_cpu(\n        dets_np,\n        iou_thr,\n        method=method_codes[method],\n        sigma=sigma,\n        min_score=min_score)\n\n    if is_tensor:\n        return dets.new_tensor(new_dets), dets.new_tensor(\n            inds, dtype=torch.long)\n    else:\n        return new_dets.astype(np.float32), inds.astype(np.int64)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/setup.py",
    "content": "import os.path as osp\nfrom setuptools import setup, Extension\n\nimport numpy as np\nfrom Cython.Build import cythonize\nfrom Cython.Distutils import build_ext\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\next_args = dict(\n    include_dirs=[np.get_include()],\n    language='c++',\n    extra_compile_args={\n        'cc': ['-Wno-unused-function', '-Wno-write-strings'],\n        'nvcc': ['-c', '--compiler-options', '-fPIC'],\n    },\n)\n\nextensions = [\n    Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args),\n]\n\n\ndef customize_compiler_for_nvcc(self):\n    \"\"\"inject deep into distutils to customize how the dispatch\n    to cc/nvcc works.\n    If you subclass UnixCCompiler, it's not trivial to get your subclass\n    injected in, and still have the right customizations (i.e.\n    distutils.sysconfig.customize_compiler) run on it. So instead of going\n    the OO route, I have this. Note, it's kindof like a wierd functional\n    subclassing going on.\"\"\"\n\n    # tell the compiler it can processes .cu\n    self.src_extensions.append('.cu')\n\n    # save references to the default compiler_so and _comple methods\n    default_compiler_so = self.compiler_so\n    super = self._compile\n\n    # now redefine the _compile method. This gets executed for each\n    # object but distutils doesn't have the ability to change compilers\n    # based on source extension: we add it.\n    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):\n        if osp.splitext(src)[1] == '.cu':\n            # use the cuda for .cu files\n            self.set_executable('compiler_so', 'nvcc')\n            # use only a subset of the extra_postargs, which are 1-1 translated\n            # from the extra_compile_args in the Extension class\n            postargs = extra_postargs['nvcc']\n        else:\n            postargs = extra_postargs['cc']\n\n        super(obj, src, ext, cc_args, postargs, pp_opts)\n        # reset the default compiler_so, which we might have changed for cuda\n        self.compiler_so = default_compiler_so\n\n    # inject our redefined _compile method into the class\n    self._compile = _compile\n\n\nclass custom_build_ext(build_ext):\n\n    def build_extensions(self):\n        customize_compiler_for_nvcc(self.compiler)\n        build_ext.build_extensions(self)\n\n\nsetup(\n    name='soft_nms',\n    cmdclass={'build_ext': custom_build_ext},\n    ext_modules=cythonize(extensions),\n)\n\nsetup(\n    name='nms_cuda',\n    ext_modules=[\n        CUDAExtension('nms_cuda', [\n            'src/nms_cuda.cpp',\n            'src/nms_kernel.cu',\n        ]),\n        CUDAExtension('nms_cpu', [\n            'src/nms_cpu.cpp',\n        ]),\n    ],\n    cmdclass={'build_ext': BuildExtension})\n"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/src/nms_cpu.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <torch/extension.h>\n\ntemplate <typename scalar_t>\nat::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {\n  AT_ASSERTM(!dets.type().is_cuda(), \"dets must be a CPU tensor\");\n\n  if (dets.numel() == 0) {\n    return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));\n  }\n\n  auto x1_t = dets.select(1, 0).contiguous();\n  auto y1_t = dets.select(1, 1).contiguous();\n  auto x2_t = dets.select(1, 2).contiguous();\n  auto y2_t = dets.select(1, 3).contiguous();\n  auto scores = dets.select(1, 4).contiguous();\n\n  at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);\n\n  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));\n\n  auto ndets = dets.size(0);\n  at::Tensor suppressed_t =\n      at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));\n\n  auto suppressed = suppressed_t.data<uint8_t>();\n  auto order = order_t.data<int64_t>();\n  auto x1 = x1_t.data<scalar_t>();\n  auto y1 = y1_t.data<scalar_t>();\n  auto x2 = x2_t.data<scalar_t>();\n  auto y2 = y2_t.data<scalar_t>();\n  auto areas = areas_t.data<scalar_t>();\n\n  for (int64_t _i = 0; _i < ndets; _i++) {\n    auto i = order[_i];\n    if (suppressed[i] == 1) continue;\n    auto ix1 = x1[i];\n    auto iy1 = y1[i];\n    auto ix2 = x2[i];\n    auto iy2 = y2[i];\n    auto iarea = areas[i];\n\n    for (int64_t _j = _i + 1; _j < ndets; _j++) {\n      auto j = order[_j];\n      if (suppressed[j] == 1) continue;\n      auto xx1 = std::max(ix1, x1[j]);\n      auto yy1 = std::max(iy1, y1[j]);\n      auto xx2 = std::min(ix2, x2[j]);\n      auto yy2 = std::min(iy2, y2[j]);\n\n      auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);\n      auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);\n      auto inter = w * h;\n      auto ovr = inter / (iarea + areas[j] - inter);\n      if (ovr >= threshold) suppressed[j] = 1;\n    }\n  }\n  return at::nonzero(suppressed_t == 0).squeeze(1);\n}\n\nat::Tensor nms(const at::Tensor& dets, const float threshold) {\n  at::Tensor result;\n  AT_DISPATCH_FLOATING_TYPES(dets.type(), \"nms\", [&] {\n    result = nms_cpu_kernel<scalar_t>(dets, threshold);\n  });\n  return result;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"nms\", &nms, \"non-maximum suppression\");\n}"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/src/nms_cuda.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <torch/extension.h>\n\n#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, \" must be a CUDAtensor \")\n\nat::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);\n\nat::Tensor nms(const at::Tensor& dets, const float threshold) {\n  CHECK_CUDA(dets);\n  if (dets.numel() == 0)\n    return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));\n  return nms_cuda(dets, threshold);\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"nms\", &nms, \"non-maximum suppression\");\n}"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/src/nms_kernel.cu",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THC.h>\n#include <THC/THCDeviceUtils.cuh>\n\n#include <vector>\n#include <iostream>\n\nint const threadsPerBlock = sizeof(unsigned long long) * 8;\n\n__device__ inline float devIoU(float const * const a, float const * const b) {\n  float left = max(a[0], b[0]), right = min(a[2], b[2]);\n  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);\n  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);\n  float interS = width * height;\n  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);\n  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);\n  return interS / (Sa + Sb - interS);\n}\n\n__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,\n                           const float *dev_boxes, unsigned long long *dev_mask) {\n  const int row_start = blockIdx.y;\n  const int col_start = blockIdx.x;\n\n  // if (row_start > col_start) return;\n\n  const int row_size =\n        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);\n  const int col_size =\n        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);\n\n  __shared__ float block_boxes[threadsPerBlock * 5];\n  if (threadIdx.x < col_size) {\n    block_boxes[threadIdx.x * 5 + 0] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];\n    block_boxes[threadIdx.x * 5 + 1] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];\n    block_boxes[threadIdx.x * 5 + 2] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];\n    block_boxes[threadIdx.x * 5 + 3] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];\n    block_boxes[threadIdx.x * 5 + 4] =\n        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];\n  }\n  __syncthreads();\n\n  if (threadIdx.x < row_size) {\n    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;\n    const float *cur_box = dev_boxes + cur_box_idx * 5;\n    int i = 0;\n    unsigned long long t = 0;\n    int start = 0;\n    if (row_start == col_start) {\n      start = threadIdx.x + 1;\n    }\n    for (i = start; i < col_size; i++) {\n      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {\n        t |= 1ULL << i;\n      }\n    }\n    const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);\n    dev_mask[cur_box_idx * col_blocks + col_start] = t;\n  }\n}\n\n// boxes is a N x 5 tensor\nat::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {\n  using scalar_t = float;\n  AT_ASSERTM(boxes.type().is_cuda(), \"boxes must be a CUDA tensor\");\n  auto scores = boxes.select(1, 4);\n  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));\n  auto boxes_sorted = boxes.index_select(0, order_t);\n\n  int boxes_num = boxes.size(0);\n\n  const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);\n\n  scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();\n\n  THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState\n\n  unsigned long long* mask_dev = NULL;\n  //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,\n  //                      boxes_num * col_blocks * sizeof(unsigned long long)));\n\n  mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));\n\n  dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),\n              THCCeilDiv(boxes_num, threadsPerBlock));\n  dim3 threads(threadsPerBlock);\n  nms_kernel<<<blocks, threads>>>(boxes_num,\n                                  nms_overlap_thresh,\n                                  boxes_dev,\n                                  mask_dev);\n\n  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);\n  THCudaCheck(cudaMemcpy(&mask_host[0],\n                        mask_dev,\n                        sizeof(unsigned long long) * boxes_num * col_blocks,\n                        cudaMemcpyDeviceToHost));\n\n  std::vector<unsigned long long> remv(col_blocks);\n  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);\n\n  at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));\n  int64_t* keep_out = keep.data<int64_t>();\n\n  int num_to_keep = 0;\n  for (int i = 0; i < boxes_num; i++) {\n    int nblock = i / threadsPerBlock;\n    int inblock = i % threadsPerBlock;\n\n    if (!(remv[nblock] & (1ULL << inblock))) {\n      keep_out[num_to_keep++] = i;\n      unsigned long long *p = &mask_host[0] + i * col_blocks;\n      for (int j = nblock; j < col_blocks; j++) {\n        remv[j] |= p[j];\n      }\n    }\n  }\n\n  THCudaFree(state, mask_dev);\n  // TODO improve this part\n  return std::get<0>(order_t.index({\n                       keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(\n                         order_t.device(), keep.scalar_type())\n                     }).sort(0, false));\n}"
  },
  {
    "path": "mmdetection/mmdet/ops/nms/src/soft_nms_cpu.pyx",
    "content": "# ----------------------------------------------------------\n# Soft-NMS: Improving Object Detection With One Line of Code\n# Copyright (c) University of Maryland, College Park\n# Licensed under The MIT License [see LICENSE for details]\n# Written by Navaneeth Bodla and Bharat Singh\n# Modified by Kai Chen\n# ----------------------------------------------------------\n\n# cython: language_level=3, boundscheck=False\n\nimport numpy as np\ncimport numpy as np\n\n\ncdef inline np.float32_t max(np.float32_t a, np.float32_t b):\n    return a if a >= b else b\n\ncdef inline np.float32_t min(np.float32_t a, np.float32_t b):\n    return a if a <= b else b\n\n\ndef soft_nms_cpu(\n    np.ndarray[float, ndim=2] boxes_in,\n    float iou_thr,\n    unsigned int method=1,\n    float sigma=0.5,\n    float min_score=0.001,\n):\n    boxes = boxes_in.copy()\n    cdef unsigned int N = boxes.shape[0]\n    cdef float iw, ih, box_area\n    cdef float ua\n    cdef int pos = 0\n    cdef float maxscore = 0\n    cdef int maxpos = 0\n    cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov\n    inds = np.arange(N)\n\n    for i in range(N):\n        maxscore = boxes[i, 4]\n        maxpos = i\n\n        tx1 = boxes[i, 0]\n        ty1 = boxes[i, 1]\n        tx2 = boxes[i, 2]\n        ty2 = boxes[i, 3]\n        ts = boxes[i, 4]\n        ti = inds[i]\n\n        pos = i + 1\n        # get max box\n        while pos < N:\n            if maxscore < boxes[pos, 4]:\n                maxscore = boxes[pos, 4]\n                maxpos = pos\n            pos = pos + 1\n\n        # add max box as a detection\n        boxes[i, 0] = boxes[maxpos, 0]\n        boxes[i, 1] = boxes[maxpos, 1]\n        boxes[i, 2] = boxes[maxpos, 2]\n        boxes[i, 3] = boxes[maxpos, 3]\n        boxes[i, 4] = boxes[maxpos, 4]\n        inds[i] = inds[maxpos]\n\n        # swap ith box with position of max box\n        boxes[maxpos, 0] = tx1\n        boxes[maxpos, 1] = ty1\n        boxes[maxpos, 2] = tx2\n        boxes[maxpos, 3] = ty2\n        boxes[maxpos, 4] = ts\n        inds[maxpos] = ti\n\n        tx1 = boxes[i, 0]\n        ty1 = boxes[i, 1]\n        tx2 = boxes[i, 2]\n        ty2 = boxes[i, 3]\n        ts = boxes[i, 4]\n\n        pos = i + 1\n        # NMS iterations, note that N changes if detection boxes fall below\n        # threshold\n        while pos < N:\n            x1 = boxes[pos, 0]\n            y1 = boxes[pos, 1]\n            x2 = boxes[pos, 2]\n            y2 = boxes[pos, 3]\n            s = boxes[pos, 4]\n\n            area = (x2 - x1 + 1) * (y2 - y1 + 1)\n            iw = (min(tx2, x2) - max(tx1, x1) + 1)\n            if iw > 0:\n                ih = (min(ty2, y2) - max(ty1, y1) + 1)\n                if ih > 0:\n                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)\n                    ov = iw * ih / ua  # iou between max box and detection box\n\n                    if method == 1:  # linear\n                        if ov > iou_thr:\n                            weight = 1 - ov\n                        else:\n                            weight = 1\n                    elif method == 2:  # gaussian\n                        weight = np.exp(-(ov * ov) / sigma)\n                    else:  # original NMS\n                        if ov > iou_thr:\n                            weight = 0\n                        else:\n                            weight = 1\n\n                    boxes[pos, 4] = weight * boxes[pos, 4]\n\n                    # if box score falls below threshold, discard the box by\n                    # swapping with last box update N\n                    if boxes[pos, 4] < min_score:\n                        boxes[pos, 0] = boxes[N-1, 0]\n                        boxes[pos, 1] = boxes[N-1, 1]\n                        boxes[pos, 2] = boxes[N-1, 2]\n                        boxes[pos, 3] = boxes[N-1, 3]\n                        boxes[pos, 4] = boxes[N-1, 4]\n                        inds[pos] = inds[N - 1]\n                        N = N - 1\n                        pos = pos - 1\n\n            pos = pos + 1\n\n    return boxes[:N], inds[:N]\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/__init__.py",
    "content": "from .functions.roi_align import roi_align\nfrom .modules.roi_align import RoIAlign\n\n__all__ = ['roi_align', 'RoIAlign']\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/functions/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/functions/roi_align.py",
    "content": "from torch.autograd import Function\n\nfrom .. import roi_align_cuda\n\n\nclass RoIAlignFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):\n        if isinstance(out_size, int):\n            out_h = out_size\n            out_w = out_size\n        elif isinstance(out_size, tuple):\n            assert len(out_size) == 2\n            assert isinstance(out_size[0], int)\n            assert isinstance(out_size[1], int)\n            out_h, out_w = out_size\n        else:\n            raise TypeError(\n                '\"out_size\" must be an integer or tuple of integers')\n        ctx.spatial_scale = spatial_scale\n        ctx.sample_num = sample_num\n        ctx.save_for_backward(rois)\n        ctx.feature_size = features.size()\n\n        batch_size, num_channels, data_height, data_width = features.size()\n        num_rois = rois.size(0)\n\n        output = features.new_zeros(num_rois, num_channels, out_h, out_w)\n        if features.is_cuda:\n            roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,\n                                   sample_num, output)\n        else:\n            raise NotImplementedError\n\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        feature_size = ctx.feature_size\n        spatial_scale = ctx.spatial_scale\n        sample_num = ctx.sample_num\n        rois = ctx.saved_tensors[0]\n        assert (feature_size is not None and grad_output.is_cuda)\n\n        batch_size, num_channels, data_height, data_width = feature_size\n        out_w = grad_output.size(3)\n        out_h = grad_output.size(2)\n\n        grad_input = grad_rois = None\n        if ctx.needs_input_grad[0]:\n            grad_input = rois.new_zeros(batch_size, num_channels, data_height,\n                                        data_width)\n            roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,\n                                    out_w, spatial_scale, sample_num,\n                                    grad_input)\n\n        return grad_input, grad_rois, None, None, None\n\n\nroi_align = RoIAlignFunction.apply\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/gradcheck.py",
    "content": "import numpy as np\nimport torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(osp.abspath(osp.join(__file__, '../../')))\nfrom roi_align import RoIAlign  # noqa: E402\n\nfeat_size = 15\nspatial_scale = 1.0 / 8\nimg_size = feat_size / spatial_scale\nnum_imgs = 2\nnum_rois = 20\n\nbatch_ind = np.random.randint(num_imgs, size=(num_rois, 1))\nrois = np.random.rand(num_rois, 4) * img_size * 0.5\nrois[:, 2:] += img_size * 0.5\nrois = np.hstack((batch_ind, rois))\n\nfeat = torch.randn(\n    num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')\nrois = torch.from_numpy(rois).float().cuda()\ninputs = (feat, rois)\nprint('Gradcheck for roi align...')\ntest = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)\nprint(test)\ntest = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)\nprint(test)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/modules/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/modules/roi_align.py",
    "content": "from torch.nn.modules.module import Module\nfrom ..functions.roi_align import RoIAlignFunction\n\n\nclass RoIAlign(Module):\n\n    def __init__(self, out_size, spatial_scale, sample_num=0):\n        super(RoIAlign, self).__init__()\n\n        self.out_size = out_size\n        self.spatial_scale = float(spatial_scale)\n        self.sample_num = int(sample_num)\n\n    def forward(self, features, rois):\n        return RoIAlignFunction.apply(features, rois, self.out_size,\n                                      self.spatial_scale, self.sample_num)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='roi_align_cuda',\n    ext_modules=[\n        CUDAExtension('roi_align_cuda', [\n            'src/roi_align_cuda.cpp',\n            'src/roi_align_kernel.cu',\n        ]),\n    ],\n    cmdclass={'build_ext': BuildExtension})\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/src/roi_align_cuda.cpp",
    "content": "#include <torch/extension.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,\n                           const float spatial_scale, const int sample_num,\n                           const int channels, const int height,\n                           const int width, const int num_rois,\n                           const int pooled_height, const int pooled_width,\n                           at::Tensor output);\n\nint ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,\n                            const float spatial_scale, const int sample_num,\n                            const int channels, const int height,\n                            const int width, const int num_rois,\n                            const int pooled_height, const int pooled_width,\n                            at::Tensor bottom_grad);\n\n#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  AT_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nint roi_align_forward_cuda(at::Tensor features, at::Tensor rois,\n                           int pooled_height, int pooled_width,\n                           float spatial_scale, int sample_num,\n                           at::Tensor output) {\n  CHECK_INPUT(features);\n  CHECK_INPUT(rois);\n  CHECK_INPUT(output);\n\n  // Number of ROIs\n  int num_rois = rois.size(0);\n  int size_rois = rois.size(1);\n\n  if (size_rois != 5) {\n    printf(\"wrong roi size\\n\");\n    return 0;\n  }\n\n  int num_channels = features.size(1);\n  int data_height = features.size(2);\n  int data_width = features.size(3);\n\n  ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,\n                         num_channels, data_height, data_width, num_rois,\n                         pooled_height, pooled_width, output);\n\n  return 1;\n}\n\nint roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,\n                            int pooled_height, int pooled_width,\n                            float spatial_scale, int sample_num,\n                            at::Tensor bottom_grad) {\n  CHECK_INPUT(top_grad);\n  CHECK_INPUT(rois);\n  CHECK_INPUT(bottom_grad);\n\n  // Number of ROIs\n  int num_rois = rois.size(0);\n  int size_rois = rois.size(1);\n  if (size_rois != 5) {\n    printf(\"wrong roi size\\n\");\n    return 0;\n  }\n\n  int num_channels = bottom_grad.size(1);\n  int data_height = bottom_grad.size(2);\n  int data_width = bottom_grad.size(3);\n\n  ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,\n                          num_channels, data_height, data_width, num_rois,\n                          pooled_height, pooled_width, bottom_grad);\n\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"forward\", &roi_align_forward_cuda, \"Roi_Align forward (CUDA)\");\n  m.def(\"backward\", &roi_align_backward_cuda, \"Roi_Align backward (CUDA)\");\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_align/src/roi_align_kernel.cu",
    "content": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\n#define CUDA_1D_KERNEL_LOOP(i, n)                            \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \\\n       i += blockDim.x * gridDim.x)\n\n#define THREADS_PER_BLOCK 1024\n\ninline int GET_BLOCKS(const int N) {\n  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;\n  int max_block_num = 65000;\n  return min(optimal_block_num, max_block_num);\n}\n\ntemplate <typename scalar_t>\n__device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,\n                                         const int height, const int width,\n                                         scalar_t y, scalar_t x) {\n  // deal with cases that inverse elements are out of feature map boundary\n  if (y < -1.0 || y > height || x < -1.0 || x > width) {\n    return 0;\n  }\n\n  if (y <= 0) y = 0;\n  if (x <= 0) x = 0;\n\n  int y_low = (int)y;\n  int x_low = (int)x;\n  int y_high;\n  int x_high;\n\n  if (y_low >= height - 1) {\n    y_high = y_low = height - 1;\n    y = (scalar_t)y_low;\n  } else {\n    y_high = y_low + 1;\n  }\n\n  if (x_low >= width - 1) {\n    x_high = x_low = width - 1;\n    x = (scalar_t)x_low;\n  } else {\n    x_high = x_low + 1;\n  }\n\n  scalar_t ly = y - y_low;\n  scalar_t lx = x - x_low;\n  scalar_t hy = 1. - ly;\n  scalar_t hx = 1. - lx;\n  // do bilinear interpolation\n  scalar_t lt = bottom_data[y_low * width + x_low];\n  scalar_t rt = bottom_data[y_low * width + x_high];\n  scalar_t lb = bottom_data[y_high * width + x_low];\n  scalar_t rb = bottom_data[y_high * width + x_high];\n  scalar_t w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;\n\n  scalar_t val = (w1 * lt + w2 * rt + w3 * lb + w4 * rb);\n\n  return val;\n}\n\ntemplate <typename scalar_t>\n__global__ void ROIAlignForward(const int nthreads, const scalar_t *bottom_data,\n                                const scalar_t *bottom_rois,\n                                const scalar_t spatial_scale,\n                                const int sample_num, const int channels,\n                                const int height, const int width,\n                                const int pooled_height, const int pooled_width,\n                                scalar_t *top_data) {\n  CUDA_1D_KERNEL_LOOP(index, nthreads) {\n    // (n, c, ph, pw) is an element in the aligned output\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int c = (index / pooled_width / pooled_height) % channels;\n    int n = index / pooled_width / pooled_height / channels;\n\n    const scalar_t *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale;\n    scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale;\n    scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale;\n    scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale;\n\n    // Force malformed ROIs to be 1x1\n    scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.);\n    scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.);\n\n    scalar_t bin_size_h = roi_height / pooled_height;\n    scalar_t bin_size_w = roi_width / pooled_width;\n\n    const scalar_t *offset_bottom_data =\n        bottom_data + (roi_batch_ind * channels + c) * height * width;\n\n    int sample_num_h = (sample_num > 0)\n                           ? sample_num\n                           : ceil(roi_height / pooled_height);  // e.g., = 2\n    int sample_num_w =\n        (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);\n\n    scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h;\n    scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w;\n\n    int hstart = fminf(floor(h), height - 2);\n    int wstart = fminf(floor(w), width - 2);\n\n    scalar_t output_val = 0;\n    for (int iy = 0; iy < sample_num_h; iy++) {\n      const scalar_t y = roi_start_h + ph * bin_size_h +\n                         (scalar_t)(iy + scalar_t(.5f)) * bin_size_h /\n                             (scalar_t)(sample_num_h);\n      for (int ix = 0; ix < sample_num_w; ix++) {\n        const scalar_t x = roi_start_w + pw * bin_size_w +\n                           (scalar_t)(ix + scalar_t(.5f)) * bin_size_w /\n                               (scalar_t)(sample_num_w);\n        scalar_t val = bilinear_interpolate<scalar_t>(offset_bottom_data,\n                                                      height, width, y, x);\n        output_val += val;\n      }\n    }\n    output_val /= (sample_num_h * sample_num_w);\n    top_data[index] = output_val;\n  }\n}\n\nint ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,\n                           const float spatial_scale, const int sample_num,\n                           const int channels, const int height,\n                           const int width, const int num_rois,\n                           const int pooled_height, const int pooled_width,\n                           at::Tensor output) {\n  const int output_size = num_rois * pooled_height * pooled_width * channels;\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      features.type(), \"ROIAlignLaucherForward\", ([&] {\n        const scalar_t *bottom_data = features.data<scalar_t>();\n        const scalar_t *rois_data = rois.data<scalar_t>();\n        scalar_t *top_data = output.data<scalar_t>();\n\n        ROIAlignForward<scalar_t>\n            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(\n                output_size, bottom_data, rois_data, scalar_t(spatial_scale),\n                sample_num, channels, height, width, pooled_height,\n                pooled_width, top_data);\n      }));\n  THCudaCheck(cudaGetLastError());\n  return 1;\n}\n\ntemplate <typename scalar_t>\n__device__ void bilinear_interpolate_gradient(const int height, const int width,\n                                              scalar_t y, scalar_t x,\n                                              scalar_t &w1, scalar_t &w2,\n                                              scalar_t &w3, scalar_t &w4,\n                                              int &x_low, int &x_high,\n                                              int &y_low, int &y_high) {\n  // deal with cases that inverse elements are out of feature map boundary\n  if (y < -1.0 || y > height || x < -1.0 || x > width) {\n    w1 = w2 = w3 = w4 = 0.;\n    x_low = x_high = y_low = y_high = -1;\n    return;\n  }\n\n  if (y <= 0) y = 0;\n  if (x <= 0) x = 0;\n\n  y_low = (int)y;\n  x_low = (int)x;\n\n  if (y_low >= height - 1) {\n    y_high = y_low = height - 1;\n    y = (scalar_t)y_low;\n  } else {\n    y_high = y_low + 1;\n  }\n\n  if (x_low >= width - 1) {\n    x_high = x_low = width - 1;\n    x = (scalar_t)x_low;\n  } else {\n    x_high = x_low + 1;\n  }\n\n  scalar_t ly = y - y_low;\n  scalar_t lx = x - x_low;\n  scalar_t hy = 1. - ly;\n  scalar_t hx = 1. - lx;\n\n  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;\n\n  return;\n}\n\ntemplate <typename scalar_t>\n__global__ void ROIAlignBackward(\n    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,\n    const scalar_t spatial_scale, const int sample_num, const int channels,\n    const int height, const int width, const int pooled_height,\n    const int pooled_width, scalar_t *bottom_diff) {\n  CUDA_1D_KERNEL_LOOP(index, nthreads) {\n    // (n, c, ph, pw) is an element in the aligned output\n    int pw = index % pooled_width;\n    int ph = (index / pooled_width) % pooled_height;\n    int c = (index / pooled_width / pooled_height) % channels;\n    int n = index / pooled_width / pooled_height / channels;\n\n    const scalar_t *offset_bottom_rois = bottom_rois + n * 5;\n    int roi_batch_ind = offset_bottom_rois[0];\n    scalar_t roi_start_w = offset_bottom_rois[1] * spatial_scale;\n    scalar_t roi_start_h = offset_bottom_rois[2] * spatial_scale;\n    scalar_t roi_end_w = (offset_bottom_rois[3] + 1) * spatial_scale;\n    scalar_t roi_end_h = (offset_bottom_rois[4] + 1) * spatial_scale;\n\n    // Force malformed ROIs to be 1x1\n    scalar_t roi_width = fmaxf((scalar_t)roi_end_w - roi_start_w, 0.);\n    scalar_t roi_height = fmaxf((scalar_t)roi_end_h - roi_start_h, 0.);\n\n    scalar_t bin_size_h = roi_height / pooled_height;\n    scalar_t bin_size_w = roi_width / pooled_width;\n\n    scalar_t *offset_bottom_diff =\n        bottom_diff + (roi_batch_ind * channels + c) * height * width;\n    int offset_top = (n * channels + c) * pooled_height * pooled_width +\n                     ph * pooled_width + pw;\n    scalar_t offset_top_diff = top_diff[offset_top];\n\n    int sample_num_h = (sample_num > 0)\n                           ? sample_num\n                           : ceil(roi_height / pooled_height);  // e.g., = 2\n    int sample_num_w =\n        (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);\n\n    const scalar_t count = (scalar_t)(sample_num_h * sample_num_w);\n\n    scalar_t h = (scalar_t)(ph + 0.5) * bin_size_h + roi_start_h;\n    scalar_t w = (scalar_t)(pw + 0.5) * bin_size_w + roi_start_w;\n\n    int hstart = fminf(floor(h), height - 2);\n    int wstart = fminf(floor(w), width - 2);\n\n    for (int iy = 0; iy < sample_num_h; iy++) {\n      const scalar_t y =\n          roi_start_h + ph * bin_size_h +\n          (scalar_t)(iy + .5f) * bin_size_h / (scalar_t)(sample_num_h);\n      for (int ix = 0; ix < sample_num_w; ix++) {\n        const scalar_t x =\n            roi_start_w + pw * bin_size_w +\n            (scalar_t)(ix + .5f) * bin_size_w / (scalar_t)(sample_num_w);\n        scalar_t w1, w2, w3, w4;\n        int x_low, x_high, y_low, y_high;\n\n        bilinear_interpolate_gradient<scalar_t>(\n            height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high);\n        scalar_t g1 = offset_top_diff * w1 / count;\n        scalar_t g2 = offset_top_diff * w2 / count;\n        scalar_t g3 = offset_top_diff * w3 / count;\n        scalar_t g4 = offset_top_diff * w4 / count;\n        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {\n          atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);\n          atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);\n          atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);\n          atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);\n        }\n      }\n    }\n  }\n}\n\nint ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,\n                            const float spatial_scale, const int sample_num,\n                            const int channels, const int height,\n                            const int width, const int num_rois,\n                            const int pooled_height, const int pooled_width,\n                            at::Tensor bottom_grad) {\n  const int output_size = num_rois * pooled_height * pooled_width * channels;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      top_grad.type(), \"ROIAlignLaucherBackward\", ([&] {\n        const scalar_t *top_diff = top_grad.data<scalar_t>();\n        const scalar_t *rois_data = rois.data<scalar_t>();\n        scalar_t *bottom_diff = bottom_grad.data<scalar_t>();\n        if (sizeof(scalar_t) == sizeof(double)) {\n          fprintf(stderr, \"double is not supported\\n\");\n          exit(-1);\n        }\n\n        ROIAlignBackward<scalar_t>\n            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(\n                output_size, top_diff, rois_data, spatial_scale, sample_num,\n                channels, height, width, pooled_height, pooled_width,\n                bottom_diff);\n      }));\n  THCudaCheck(cudaGetLastError());\n  return 1;\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/__init__.py",
    "content": "from .functions.roi_pool import roi_pool\nfrom .modules.roi_pool import RoIPool\n\n__all__ = ['roi_pool', 'RoIPool']\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/functions/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/functions/roi_pool.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom .. import roi_pool_cuda\n\n\nclass RoIPoolFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, rois, out_size, spatial_scale):\n        if isinstance(out_size, int):\n            out_h = out_size\n            out_w = out_size\n        elif isinstance(out_size, tuple):\n            assert len(out_size) == 2\n            assert isinstance(out_size[0], int)\n            assert isinstance(out_size[1], int)\n            out_h, out_w = out_size\n        else:\n            raise TypeError(\n                '\"out_size\" must be an integer or tuple of integers')\n        assert features.is_cuda\n        ctx.save_for_backward(rois)\n        num_channels = features.size(1)\n        num_rois = rois.size(0)\n        out_size = (num_rois, num_channels, out_h, out_w)\n        output = features.new_zeros(out_size)\n        argmax = features.new_zeros(out_size, dtype=torch.int)\n        roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,\n                              output, argmax)\n        ctx.spatial_scale = spatial_scale\n        ctx.feature_size = features.size()\n        ctx.argmax = argmax\n\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        assert grad_output.is_cuda\n        spatial_scale = ctx.spatial_scale\n        feature_size = ctx.feature_size\n        argmax = ctx.argmax\n        rois = ctx.saved_tensors[0]\n        assert feature_size is not None\n\n        grad_input = grad_rois = None\n        if ctx.needs_input_grad[0]:\n            grad_input = grad_output.new_zeros(feature_size)\n            roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,\n                                   spatial_scale, grad_input)\n\n        return grad_input, grad_rois, None, None\n\n\nroi_pool = RoIPoolFunction.apply\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/gradcheck.py",
    "content": "import torch\nfrom torch.autograd import gradcheck\n\nimport os.path as osp\nimport sys\nsys.path.append(osp.abspath(osp.join(__file__, '../../')))\nfrom roi_pool import RoIPool  # noqa: E402\n\nfeat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()\nrois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],\n                     [1, 67, 40, 110, 120]]).cuda()\ninputs = (feat, rois)\nprint('Gradcheck for roi pooling...')\ntest = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)\nprint(test)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/modules/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/modules/roi_pool.py",
    "content": "from torch.nn.modules.module import Module\nfrom ..functions.roi_pool import roi_pool\n\n\nclass RoIPool(Module):\n\n    def __init__(self, out_size, spatial_scale):\n        super(RoIPool, self).__init__()\n\n        self.out_size = out_size\n        self.spatial_scale = float(spatial_scale)\n\n    def forward(self, features, rois):\n        return roi_pool(features, rois, self.out_size, self.spatial_scale)\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='roi_pool',\n    ext_modules=[\n        CUDAExtension('roi_pool_cuda', [\n            'src/roi_pool_cuda.cpp',\n            'src/roi_pool_kernel.cu',\n        ])\n    ],\n    cmdclass={'build_ext': BuildExtension})\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp",
    "content": "#include <torch/extension.h>\n\n#include <cmath>\n#include <vector>\n\nint ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,\n                          const float spatial_scale, const int channels,\n                          const int height, const int width, const int num_rois,\n                          const int pooled_h, const int pooled_w,\n                          at::Tensor output, at::Tensor argmax);\n\nint ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,\n                           const at::Tensor argmax, const float spatial_scale,\n                           const int batch_size, const int channels,\n                           const int height, const int width,\n                           const int num_rois, const int pooled_h,\n                           const int pooled_w, at::Tensor bottom_grad);\n\n#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  AT_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nint roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,\n                             int pooled_height, int pooled_width,\n                             float spatial_scale, at::Tensor output,\n                             at::Tensor argmax) {\n  CHECK_INPUT(features);\n  CHECK_INPUT(rois);\n  CHECK_INPUT(output);\n  CHECK_INPUT(argmax);\n\n  // Number of ROIs\n  int num_rois = rois.size(0);\n  int size_rois = rois.size(1);\n\n  if (size_rois != 5) {\n    printf(\"wrong roi size\\n\");\n    return 0;\n  }\n\n  int channels = features.size(1);\n  int height = features.size(2);\n  int width = features.size(3);\n\n  ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,\n                        num_rois, pooled_height, pooled_width, output, argmax);\n\n  return 1;\n}\n\nint roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,\n                              at::Tensor argmax, float spatial_scale,\n                              at::Tensor bottom_grad) {\n  CHECK_INPUT(top_grad);\n  CHECK_INPUT(rois);\n  CHECK_INPUT(argmax);\n  CHECK_INPUT(bottom_grad);\n\n  int pooled_height = top_grad.size(2);\n  int pooled_width = top_grad.size(3);\n  int num_rois = rois.size(0);\n  int size_rois = rois.size(1);\n\n  if (size_rois != 5) {\n    printf(\"wrong roi size\\n\");\n    return 0;\n  }\n  int batch_size = bottom_grad.size(0);\n  int channels = bottom_grad.size(1);\n  int height = bottom_grad.size(2);\n  int width = bottom_grad.size(3);\n\n  ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,\n                         channels, height, width, num_rois, pooled_height,\n                         pooled_width, bottom_grad);\n\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"forward\", &roi_pooling_forward_cuda, \"Roi_Pooling forward (CUDA)\");\n  m.def(\"backward\", &roi_pooling_backward_cuda, \"Roi_Pooling backward (CUDA)\");\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/roi_pool/src/roi_pool_kernel.cu",
    "content": "#include <ATen/ATen.h>\n#include <THC/THCAtomics.cuh>\n\n#define CUDA_1D_KERNEL_LOOP(i, n)                            \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \\\n       i += blockDim.x * gridDim.x)\n\n#define THREADS_PER_BLOCK 1024\n\ninline int GET_BLOCKS(const int N) {\n  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;\n  int max_block_num = 65000;\n  return min(optimal_block_num, max_block_num);\n}\n\ntemplate <typename scalar_t>\n__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,\n                               const scalar_t *rois,\n                               const scalar_t spatial_scale, const int channels,\n                               const int height, const int width,\n                               const int pooled_h, const int pooled_w,\n                               scalar_t *top_data, int *argmax_data) {\n  CUDA_1D_KERNEL_LOOP(index, nthreads) {\n    // (n, c, ph, pw) is an element in the pooled output\n    int pw = index % pooled_w;\n    int ph = (index / pooled_w) % pooled_h;\n    int c = (index / pooled_w / pooled_h) % channels;\n    int n = index / pooled_w / pooled_h / channels;\n\n    const scalar_t *offset_rois = rois + n * 5;\n    int roi_batch_ind = offset_rois[0];\n    // calculate the roi region on feature maps\n    scalar_t roi_x1 = offset_rois[1] * spatial_scale;\n    scalar_t roi_y1 = offset_rois[2] * spatial_scale;\n    scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale;\n    scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale;\n\n    // force malformed rois to be 1x1\n    scalar_t roi_w = roi_x2 - roi_x1;\n    scalar_t roi_h = roi_y2 - roi_y1;\n    if (roi_w <= 0 || roi_h <= 0) continue;\n\n    scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w);\n    scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h);\n\n    // the corresponding bin region\n    int bin_x1 = floor(static_cast<scalar_t>(pw) * bin_size_w + roi_x1);\n    int bin_y1 = floor(static_cast<scalar_t>(ph) * bin_size_h + roi_y1);\n    int bin_x2 = ceil(static_cast<scalar_t>(pw + 1) * bin_size_w + roi_x1);\n    int bin_y2 = ceil(static_cast<scalar_t>(ph + 1) * bin_size_h + roi_y1);\n\n    // add roi offsets and clip to input boundaries\n    bin_x1 = min(max(bin_x1, 0), width);\n    bin_y1 = min(max(bin_y1, 0), height);\n    bin_x2 = min(max(bin_x2, 0), width);\n    bin_y2 = min(max(bin_y2, 0), height);\n    bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);\n\n    // If nothing is pooled, argmax = -1 causes nothing to be backprop'd\n    int max_idx = -1;\n    bottom_data += (roi_batch_ind * channels + c) * height * width;\n\n    // Define an empty pooling region to be zero\n    scalar_t max_val = is_empty ? static_cast<scalar_t>(0)\n                                : bottom_data[bin_y1 * width + bin_x1] - 1;\n\n    for (int h = bin_y1; h < bin_y2; ++h) {\n      for (int w = bin_x1; w < bin_x2; ++w) {\n        int offset = h * width + w;\n        if (bottom_data[offset] > max_val) {\n          max_val = bottom_data[offset];\n          max_idx = offset;\n        }\n      }\n    }\n    top_data[index] = max_val;\n    if (argmax_data != NULL) argmax_data[index] = max_idx;\n  }\n}\n\nint ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,\n                          const float spatial_scale, const int channels,\n                          const int height, const int width, const int num_rois,\n                          const int pooled_h, const int pooled_w,\n                          at::Tensor output, at::Tensor argmax) {\n  const int output_size = num_rois * channels * pooled_h * pooled_w;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      features.type(), \"ROIPoolLaucherForward\", ([&] {\n        const scalar_t *bottom_data = features.data<scalar_t>();\n        const scalar_t *rois_data = rois.data<scalar_t>();\n        scalar_t *top_data = output.data<scalar_t>();\n        int *argmax_data = argmax.data<int>();\n\n        ROIPoolForward<scalar_t>\n            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(\n                output_size, bottom_data, rois_data, scalar_t(spatial_scale),\n                channels, height, width, pooled_h, pooled_w, top_data,\n                argmax_data);\n      }));\n  THCudaCheck(cudaGetLastError());\n  return 1;\n}\n\ntemplate <typename scalar_t>\n__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,\n                                const scalar_t *rois, const int *argmax_data,\n                                const scalar_t spatial_scale,\n                                const int channels, const int height,\n                                const int width, const int pooled_h,\n                                const int pooled_w, scalar_t *bottom_diff) {\n  CUDA_1D_KERNEL_LOOP(index, nthreads) {\n    int pw = index % pooled_w;\n    int ph = (index / pooled_w) % pooled_h;\n    int c = (index / pooled_w / pooled_h) % channels;\n    int n = index / pooled_w / pooled_h / channels;\n\n    int roi_batch_ind = rois[n * 5];\n    int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +\n                                   ph * pooled_w + pw];\n\n    atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width +\n                  bottom_index,\n              top_diff[index]);\n  }\n}\n\nint ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,\n                           const at::Tensor argmax, const float spatial_scale,\n                           const int batch_size, const int channels,\n                           const int height, const int width,\n                           const int num_rois, const int pooled_h,\n                           const int pooled_w, at::Tensor bottom_grad) {\n  const int output_size = num_rois * pooled_h * pooled_w * channels;\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      top_grad.type(), \"ROIPoolLaucherBackward\", ([&] {\n        const scalar_t *top_diff = top_grad.data<scalar_t>();\n        const scalar_t *rois_data = rois.data<scalar_t>();\n        const int *argmax_data = argmax.data<int>();\n        scalar_t *bottom_diff = bottom_grad.data<scalar_t>();\n\n        if (sizeof(scalar_t) == sizeof(double)) {\n          fprintf(stderr, \"double is not supported\\n\");\n          exit(-1);\n        }\n\n        ROIPoolBackward<scalar_t>\n            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(\n                output_size, top_diff, rois_data, argmax_data,\n                scalar_t(spatial_scale), channels, height, width, pooled_h,\n                pooled_w, bottom_diff);\n      }));\n  THCudaCheck(cudaGetLastError());\n  return 1;\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/__init__.py",
    "content": "from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss\r\n\r\n__all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']\r\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/functions/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py",
    "content": "import torch.nn.functional as F\nfrom torch.autograd import Function\nfrom torch.autograd.function import once_differentiable\n\nfrom .. import sigmoid_focal_loss_cuda\n\n\nclass SigmoidFocalLossFunction(Function):\n\n    @staticmethod\n    def forward(ctx, input, target, gamma=2.0, alpha=0.25, reduction='mean'):\n        ctx.save_for_backward(input, target)\n        num_classes = input.shape[1]\n        ctx.num_classes = num_classes\n        ctx.gamma = gamma\n        ctx.alpha = alpha\n\n        loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,\n                                               gamma, alpha)\n        reduction_enum = F._Reduction.get_enum(reduction)\n        # none: 0, mean:1, sum: 2\n        if reduction_enum == 0:\n            return loss\n        elif reduction_enum == 1:\n            return loss.mean()\n        elif reduction_enum == 2:\n            return loss.sum()\n\n    @staticmethod\n    @once_differentiable\n    def backward(ctx, d_loss):\n        input, target = ctx.saved_tensors\n        num_classes = ctx.num_classes\n        gamma = ctx.gamma\n        alpha = ctx.alpha\n        d_loss = d_loss.contiguous()\n        d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,\n                                                   num_classes, gamma, alpha)\n        return d_input, None, None, None, None\n\n\nsigmoid_focal_loss = SigmoidFocalLossFunction.apply\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/modules/__init__.py",
    "content": ""
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py",
    "content": "from torch import nn\n\nfrom ..functions.sigmoid_focal_loss import sigmoid_focal_loss\n\n\nclass SigmoidFocalLoss(nn.Module):\n\n    def __init__(self, gamma, alpha):\n        super(SigmoidFocalLoss, self).__init__()\n        self.gamma = gamma\n        self.alpha = alpha\n\n    def forward(self, logits, targets):\n        assert logits.is_cuda\n        loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)\n        return loss.sum()\n\n    def __repr__(self):\n        tmpstr = self.__class__.__name__ + \"(\"\n        tmpstr += \"gamma=\" + str(self.gamma)\n        tmpstr += \", alpha=\" + str(self.alpha)\n        tmpstr += \")\"\n        return tmpstr\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/setup.py",
    "content": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n    name='SigmoidFocalLoss',\n    ext_modules=[\n        CUDAExtension('sigmoid_focal_loss_cuda', [\n            'src/sigmoid_focal_loss.cpp',\n            'src/sigmoid_focal_loss_cuda.cu',\n        ]),\n    ],\n    cmdclass={'build_ext': BuildExtension})\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp",
    "content": "// modify from\n// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h\n#include <torch/extension.h>\n\nat::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,\n                                         const at::Tensor &targets,\n                                         const int num_classes,\n                                         const float gamma, const float alpha);\n\nat::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,\n                                          const at::Tensor &targets,\n                                          const at::Tensor &d_losses,\n                                          const int num_classes,\n                                          const float gamma, const float alpha);\n\n// Interface for Python\nat::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,\n                                    const at::Tensor &targets,\n                                    const int num_classes, const float gamma,\n                                    const float alpha) {\n  if (logits.type().is_cuda()) {\n    return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,\n                                         alpha);\n  }\n}\n\nat::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,\n                                     const at::Tensor &targets,\n                                     const at::Tensor &d_losses,\n                                     const int num_classes, const float gamma,\n                                     const float alpha) {\n  if (logits.type().is_cuda()) {\n    return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,\n                                          num_classes, gamma, alpha);\n  }\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"forward\", &SigmoidFocalLoss_forward,\n        \"SigmoidFocalLoss forward (CUDA)\");\n  m.def(\"backward\", &SigmoidFocalLoss_backward,\n        \"SigmoidFocalLoss backward (CUDA)\");\n}\n"
  },
  {
    "path": "mmdetection/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu",
    "content": "// modify from\n// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu\n\n// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n// This file is modified from\n// https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu\n// Cheng-Yang Fu\n// cyfu@cs.unc.edu\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THC.h>\n#include <THC/THCAtomics.cuh>\n#include <THC/THCDeviceUtils.cuh>\n\n#include <cfloat>\n\n// TODO make it in a common file\n#define CUDA_1D_KERNEL_LOOP(i, n)                            \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \\\n       i += blockDim.x * gridDim.x)\n\ntemplate <typename scalar_t>\n__global__ void SigmoidFocalLossForward(const int nthreads,\n                                        const scalar_t *logits,\n                                        const long *targets,\n                                        const int num_classes,\n                                        const float gamma, const float alpha,\n                                        const int num, scalar_t *losses) {\n  CUDA_1D_KERNEL_LOOP(i, nthreads) {\n    int n = i / num_classes;\n    int d = i % num_classes;  // current class[0~79];\n    int t = targets[n];       // target class [1~80];\n\n    // Decide it is positive or negative case.\n    scalar_t c1 = (t == (d + 1));\n    scalar_t c2 = (t >= 0 & t != (d + 1));\n\n    scalar_t zn = (1.0 - alpha);\n    scalar_t zp = (alpha);\n\n    // p = 1. / 1. + expf(-x); p = sigmoid(x)\n    scalar_t p = 1. / (1. + expf(-logits[i]));\n\n    // (1-p)**gamma * log(p) where\n    scalar_t term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));\n\n    // p**gamma * log(1-p)\n    scalar_t term2 =\n        powf(p, gamma) *\n        (-1. * logits[i] * (logits[i] >= 0) -\n         logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));\n\n    losses[i] = 0.0;\n    losses[i] += -c1 * term1 * zp;\n    losses[i] += -c2 * term2 * zn;\n\n  }  // CUDA_1D_KERNEL_LOOP\n}  // SigmoidFocalLossForward\n\ntemplate <typename scalar_t>\n__global__ void SigmoidFocalLossBackward(\n    const int nthreads, const scalar_t *logits, const long *targets,\n    const scalar_t *d_losses, const int num_classes, const float gamma,\n    const float alpha, const int num, scalar_t *d_logits) {\n  CUDA_1D_KERNEL_LOOP(i, nthreads) {\n    int n = i / num_classes;\n    int d = i % num_classes;  // current class[0~79];\n    int t = targets[n];       // target class [1~80], 0 is background;\n\n    // Decide it is positive or negative case.\n    scalar_t c1 = (t == (d + 1));\n    scalar_t c2 = (t >= 0 & t != (d + 1));\n\n    scalar_t zn = (1.0 - alpha);\n    scalar_t zp = (alpha);\n    // p = 1. / 1. + expf(-x); p = sigmoid(x)\n    scalar_t p = 1. / (1. + expf(-logits[i]));\n\n    // (1-p)**g * (1 - p - g*p*log(p)\n    scalar_t term1 =\n        powf((1. - p), gamma) * (1. - p - (p * gamma * logf(max(p, FLT_MIN))));\n\n    // (p**g) * (g*(1-p)*log(1-p) - p)\n    scalar_t term2 =\n        powf(p, gamma) *\n        ((-1. * logits[i] * (logits[i] >= 0) -\n          logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *\n             (1. - p) * gamma -\n         p);\n    d_logits[i] = 0.0;\n    d_logits[i] += -c1 * term1 * zp;\n    d_logits[i] += -c2 * term2 * zn;\n    d_logits[i] = d_logits[i] * d_losses[i];\n\n  }  // CUDA_1D_KERNEL_LOOP\n}  // SigmoidFocalLossBackward\n\nat::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,\n                                         const at::Tensor &targets,\n                                         const int num_classes,\n                                         const float gamma, const float alpha) {\n  AT_ASSERTM(logits.type().is_cuda(), \"logits must be a CUDA tensor\");\n  AT_ASSERTM(targets.type().is_cuda(), \"targets must be a CUDA tensor\");\n  AT_ASSERTM(logits.dim() == 2, \"logits should be NxClass\");\n\n  const int num_samples = logits.size(0);\n\n  auto losses = at::empty({num_samples, logits.size(1)}, logits.options());\n  auto losses_size = num_samples * logits.size(1);\n\n  dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L));\n  dim3 block(512);\n\n  if (losses.numel() == 0) {\n    THCudaCheck(cudaGetLastError());\n    return losses;\n  }\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      logits.type(), \"SigmoidFocalLoss_forward\", [&] {\n        SigmoidFocalLossForward<scalar_t><<<grid, block>>>(\n            losses_size, logits.contiguous().data<scalar_t>(),\n            targets.contiguous().data<long>(), num_classes, gamma, alpha,\n            num_samples, losses.data<scalar_t>());\n      });\n  THCudaCheck(cudaGetLastError());\n  return losses;\n}\n\nat::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,\n                                          const at::Tensor &targets,\n                                          const at::Tensor &d_losses,\n                                          const int num_classes,\n                                          const float gamma,\n                                          const float alpha) {\n  AT_ASSERTM(logits.type().is_cuda(), \"logits must be a CUDA tensor\");\n  AT_ASSERTM(targets.type().is_cuda(), \"targets must be a CUDA tensor\");\n  AT_ASSERTM(d_losses.type().is_cuda(), \"d_losses must be a CUDA tensor\");\n\n  AT_ASSERTM(logits.dim() == 2, \"logits should be NxClass\");\n\n  const int num_samples = logits.size(0);\n  AT_ASSERTM(logits.size(1) == num_classes,\n             \"logits.size(1) should be num_classes\");\n\n  auto d_logits = at::zeros({num_samples, num_classes}, logits.options());\n  auto d_logits_size = num_samples * logits.size(1);\n\n  dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L));\n  dim3 block(512);\n\n  if (d_logits.numel() == 0) {\n    THCudaCheck(cudaGetLastError());\n    return d_logits;\n  }\n\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      logits.type(), \"SigmoidFocalLoss_backward\", [&] {\n        SigmoidFocalLossBackward<scalar_t><<<grid, block>>>(\n            d_logits_size, logits.contiguous().data<scalar_t>(),\n            targets.contiguous().data<long>(),\n            d_losses.contiguous().data<scalar_t>(), num_classes, gamma, alpha,\n            num_samples, d_logits.data<scalar_t>());\n      });\n\n  THCudaCheck(cudaGetLastError());\n  return d_logits;\n}\n"
  },
  {
    "path": "mmdetection/setup.py",
    "content": "import os\nimport subprocess\nimport time\nfrom setuptools import find_packages, setup\n\n\ndef readme():\n    with open('README.md', encoding='utf-8') as f:\n        content = f.read()\n    return content\n\n\nMAJOR = 0\nMINOR = 6\nPATCH = 0\nSUFFIX = ''\nSHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)\n\nversion_file = 'mmdet/version.py'\n\n\ndef get_git_hash():\n\n    def _minimal_ext_cmd(cmd):\n        # construct minimal environment\n        env = {}\n        for k in ['SYSTEMROOT', 'PATH', 'HOME']:\n            v = os.environ.get(k)\n            if v is not None:\n                env[k] = v\n        # LANGUAGE is used on win32\n        env['LANGUAGE'] = 'C'\n        env['LANG'] = 'C'\n        env['LC_ALL'] = 'C'\n        out = subprocess.Popen(\n            cmd, stdout=subprocess.PIPE, env=env).communicate()[0]\n        return out\n\n    try:\n        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])\n        sha = out.strip().decode('ascii')\n    except OSError:\n        sha = 'unknown'\n\n    return sha\n\n\ndef get_hash():\n    if os.path.exists('.git'):\n        sha = get_git_hash()[:7]\n    elif os.path.exists(version_file):\n        try:\n            from mmdet.version import __version__\n            sha = __version__.split('+')[-1]\n        except ImportError:\n            raise ImportError('Unable to get git version')\n    else:\n        sha = 'unknown'\n\n    return sha\n\n\ndef write_version_py():\n    content = \"\"\"# GENERATED VERSION FILE\n# TIME: {}\n\n__version__ = '{}'\nshort_version = '{}'\n\"\"\"\n    sha = get_hash()\n    VERSION = SHORT_VERSION + '+' + sha\n\n    with open(version_file, 'w') as f:\n        f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))\n\n\ndef get_version():\n    with open(version_file, 'r') as f:\n        exec(compile(f.read(), version_file, 'exec'))\n    return locals()['__version__']\n\n\nif __name__ == '__main__':\n    write_version_py()\n    setup(\n        name='mmdet',\n        version=get_version(),\n        description='Open MMLab Detection Toolbox',\n        long_description=readme(),\n        keywords='computer vision, object detection',\n        url='https://github.com/open-mmlab/mmdetection',\n        packages=find_packages(exclude=('configs', 'tools', 'demo')),\n        package_data={'mmdet.ops': ['*/*.so']},\n        classifiers=[\n            'Development Status :: 4 - Beta',\n            'License :: OSI Approved :: Apache Software License',\n            'Operating System :: OS Independent',\n            'Programming Language :: Python :: 2',\n            'Programming Language :: Python :: 2.7',\n            'Programming Language :: Python :: 3',\n            'Programming Language :: Python :: 3.4',\n            'Programming Language :: Python :: 3.5',\n            'Programming Language :: Python :: 3.6',\n        ],\n        license='GPLv3',\n        setup_requires=['pytest-runner'],\n        tests_require=['pytest'],\n        install_requires=[\n            'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables',\n            'pycocotools'\n        ],\n        zip_safe=False)\n"
  },
  {
    "path": "mmdetection/tools/analyze_logs.py",
    "content": "import argparse\nimport json\nfrom collections import defaultdict\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport seaborn as sns\n\n\ndef cal_train_time(log_dicts, args):\n    for i, log_dict in enumerate(log_dicts):\n        print('{}Analyze train time of {}{}'.format('-' * 5, args.json_logs[i],\n                                                    '-' * 5))\n        all_times = []\n        for epoch in log_dict.keys():\n            if args.include_outliers:\n                all_times.append(log_dict[epoch]['time'])\n            else:\n                all_times.append(log_dict[epoch]['time'][1:])\n        all_times = np.array(all_times)\n        epoch_ave_time = all_times.mean(-1)\n        slowest_epoch = epoch_ave_time.argmax()\n        fastest_epoch = epoch_ave_time.argmin()\n        std_over_epoch = epoch_ave_time.std()\n        print('slowest epoch {}, average time is {:.4f}'.format(\n            slowest_epoch + 1, epoch_ave_time[slowest_epoch]))\n        print('fastest epoch {}, average time is {:.4f}'.format(\n            fastest_epoch + 1, epoch_ave_time[fastest_epoch]))\n        print('time std over epochs is {:.4f}'.format(std_over_epoch))\n        print('average iter time: {:.4f} s/iter'.format(np.mean(all_times)))\n        print()\n\n\ndef plot_curve(log_dicts, args):\n    if args.backend is not None:\n        plt.switch_backend(args.backend)\n    sns.set_style(args.style)\n    # if legend is None, use {filename}_{key} as legend\n    legend = args.legend\n    if legend is None:\n        legend = []\n        for json_log in args.json_logs:\n            for metric in args.keys:\n                legend.append('{}_{}'.format(json_log, metric))\n    assert len(legend) == (len(args.json_logs) * len(args.keys))\n    metrics = args.keys\n\n    num_metrics = len(metrics)\n    for i, log_dict in enumerate(log_dicts):\n        epochs = list(log_dict.keys())\n        for j, metric in enumerate(metrics):\n            print('plot curve of {}, metric is {}'.format(\n                args.json_logs[i], metric))\n            assert metric in log_dict[\n                epochs[0]], '{} does not contain metric {}'.format(\n                    args.json_logs[i], metric)\n\n            if 'mAP' in metric:\n                xs = np.arange(1, max(epochs) + 1)\n                ys = []\n                for epoch in epochs:\n                    ys += log_dict[epoch][metric]\n                ax = plt.gca()\n                ax.set_xticks(xs)\n                plt.xlabel('epoch')\n                plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')\n            else:\n                xs = []\n                ys = []\n                num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]\n                for epoch in epochs:\n                    iters = log_dict[epoch]['iter']\n                    if log_dict[epoch]['mode'][-1] == 'val':\n                        iters = iters[:-1]\n                    xs.append(\n                        np.array(iters) + (epoch - 1) * num_iters_per_epoch)\n                    ys.append(np.array(log_dict[epoch][metric][:len(iters)]))\n                xs = np.concatenate(xs)\n                ys = np.concatenate(ys)\n                plt.xlabel('iter')\n                plt.plot(\n                    xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)\n            plt.legend()\n        if args.title is not None:\n            plt.title(args.title)\n    if args.out is None:\n        plt.show()\n    else:\n        print('save curve to: {}'.format(args.out))\n        plt.savefig(args.out)\n        plt.cla()\n\n\ndef add_plot_parser(subparsers):\n    parser_plt = subparsers.add_parser(\n        'plot_curve', help='parser for plotting curves')\n    parser_plt.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_plt.add_argument(\n        '--keys',\n        type=str,\n        nargs='+',\n        default=['bbox_mAP'],\n        help='the metric that you want to plot')\n    parser_plt.add_argument('--title', type=str, help='title of figure')\n    parser_plt.add_argument(\n        '--legend',\n        type=str,\n        nargs='+',\n        default=None,\n        help='legend of each plot')\n    parser_plt.add_argument(\n        '--backend', type=str, default=None, help='backend of plt')\n    parser_plt.add_argument(\n        '--style', type=str, default='dark', help='style of plt')\n    parser_plt.add_argument('--out', type=str, default=None)\n\n\ndef add_time_parser(subparsers):\n    parser_time = subparsers.add_parser(\n        'cal_train_time',\n        help='parser for computing the average time per training iteration')\n    parser_time.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_time.add_argument(\n        '--include-outliers',\n        action='store_true',\n        help='include the first value of every epoch when computing '\n        'the average time')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Analyze Json Log')\n    # currently only support plot curve and calculate average train time\n    subparsers = parser.add_subparsers(dest='task', help='task parser')\n    add_plot_parser(subparsers)\n    add_time_parser(subparsers)\n    args = parser.parse_args()\n    return args\n\n\ndef load_json_logs(json_logs):\n    # load and convert json_logs to log_dict, key is epoch, value is a sub dict\n    # keys of sub dict is different metrics, e.g. memory, bbox_mAP\n    # value of sub dict is a list of corresponding values of all iterations\n    log_dicts = [dict() for _ in json_logs]\n    for json_log, log_dict in zip(json_logs, log_dicts):\n        with open(json_log, 'r') as log_file:\n            for l in log_file:\n                log = json.loads(l.strip())\n                epoch = log.pop('epoch')\n                if epoch not in log_dict:\n                    log_dict[epoch] = defaultdict(list)\n                for k, v in log.items():\n                    log_dict[epoch][k].append(v)\n    return log_dicts\n\n\ndef main():\n    args = parse_args()\n\n    json_logs = args.json_logs\n    for json_log in json_logs:\n        assert json_log.endswith('.json')\n\n    log_dicts = load_json_logs(json_logs)\n\n    eval(args.task)(log_dicts, args)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/coco_eval.py",
    "content": "from argparse import ArgumentParser\n\nfrom mmdet.core import coco_eval\n\n\ndef main():\n    parser = ArgumentParser(description='COCO Evaluation')\n    parser.add_argument('result', help='result file path')\n    parser.add_argument('--ann', help='annotation file path')\n    parser.add_argument(\n        '--types',\n        type=str,\n        nargs='+',\n        choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],\n        default=['bbox'],\n        help='result types')\n    parser.add_argument(\n        '--max-dets',\n        type=int,\n        nargs='+',\n        default=[100, 300, 1000],\n        help='proposal numbers, only used for recall evaluation')\n    args = parser.parse_args()\n    coco_eval(args.result, args.types, args.ann, args.max_dets)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/convert_datasets/pascal_voc.py",
    "content": "import argparse\nimport os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.core import voc_classes\n\nlabel_ids = {name: i + 1 for i, name in enumerate(voc_classes())}\n\n\ndef parse_xml(args):\n    xml_path, img_path = args\n    tree = ET.parse(xml_path)\n    root = tree.getroot()\n    size = root.find('size')\n    w = int(size.find('width').text)\n    h = int(size.find('height').text)\n    bboxes = []\n    labels = []\n    bboxes_ignore = []\n    labels_ignore = []\n    for obj in root.findall('object'):\n        name = obj.find('name').text\n        label = label_ids[name]\n        difficult = int(obj.find('difficult').text)\n        bnd_box = obj.find('bndbox')\n        bbox = [\n            int(bnd_box.find('xmin').text),\n            int(bnd_box.find('ymin').text),\n            int(bnd_box.find('xmax').text),\n            int(bnd_box.find('ymax').text)\n        ]\n        if difficult:\n            bboxes_ignore.append(bbox)\n            labels_ignore.append(label)\n        else:\n            bboxes.append(bbox)\n            labels.append(label)\n    if not bboxes:\n        bboxes = np.zeros((0, 4))\n        labels = np.zeros((0, ))\n    else:\n        bboxes = np.array(bboxes, ndmin=2) - 1\n        labels = np.array(labels)\n    if not bboxes_ignore:\n        bboxes_ignore = np.zeros((0, 4))\n        labels_ignore = np.zeros((0, ))\n    else:\n        bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1\n        labels_ignore = np.array(labels_ignore)\n    annotation = {\n        'filename': img_path,\n        'width': w,\n        'height': h,\n        'ann': {\n            'bboxes': bboxes.astype(np.float32),\n            'labels': labels.astype(np.int64),\n            'bboxes_ignore': bboxes_ignore.astype(np.float32),\n            'labels_ignore': labels_ignore.astype(np.int64)\n        }\n    }\n    return annotation\n\n\ndef cvt_annotations(devkit_path, years, split, out_file):\n    if not isinstance(years, list):\n        years = [years]\n    annotations = []\n    for year in years:\n        filelist = osp.join(devkit_path, 'VOC{}/ImageSets/Main/{}.txt'.format(\n            year, split))\n        if not osp.isfile(filelist):\n            print('filelist does not exist: {}, skip voc{} {}'.format(\n                filelist, year, split))\n            return\n        img_names = mmcv.list_from_file(filelist)\n        xml_paths = [\n            osp.join(devkit_path, 'VOC{}/Annotations/{}.xml'.format(\n                year, img_name)) for img_name in img_names\n        ]\n        img_paths = [\n            'VOC{}/JPEGImages/{}.jpg'.format(year, img_name)\n            for img_name in img_names\n        ]\n        part_annotations = mmcv.track_progress(parse_xml,\n                                               list(zip(xml_paths, img_paths)))\n        annotations.extend(part_annotations)\n    mmcv.dump(annotations, out_file)\n    return annotations\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert PASCAL VOC annotations to mmdetection format')\n    parser.add_argument('devkit_path', help='pascal voc devkit path')\n    parser.add_argument('-o', '--out-dir', help='output path')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    devkit_path = args.devkit_path\n    out_dir = args.out_dir if args.out_dir else devkit_path\n    mmcv.mkdir_or_exist(out_dir)\n\n    years = []\n    if osp.isdir(osp.join(devkit_path, 'VOC2007')):\n        years.append('2007')\n    if osp.isdir(osp.join(devkit_path, 'VOC2012')):\n        years.append('2012')\n    if '2007' in years and '2012' in years:\n        years.append(['2007', '2012'])\n    if not years:\n        raise IOError('The devkit path {} contains neither \"VOC2007\" nor '\n                      '\"VOC2012\" subfolder'.format(devkit_path))\n    for year in years:\n        if year == '2007':\n            prefix = 'voc07'\n        elif year == '2012':\n            prefix = 'voc12'\n        elif year == ['2007', '2012']:\n            prefix = 'voc0712'\n        for split in ['train', 'val', 'trainval']:\n            dataset_name = prefix + '_' + split\n            print('processing {} ...'.format(dataset_name))\n            cvt_annotations(devkit_path, year, split,\n                            osp.join(out_dir, dataset_name + '.pkl'))\n        if not isinstance(year, list):\n            dataset_name = prefix + '_test'\n            print('processing {} ...'.format(dataset_name))\n            cvt_annotations(devkit_path, year, 'test',\n                            osp.join(out_dir, dataset_name + '.pkl'))\n    print('Done!')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \\\n    $(dirname \"$0\")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}\n"
  },
  {
    "path": "mmdetection/tools/dist_train.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\nCONFIG=$1\nGPUS=$2\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \\\n    $(dirname \"$0\")/train.py $CONFIG --launcher pytorch ${@:3}\n"
  },
  {
    "path": "mmdetection/tools/publish_model.py",
    "content": "import argparse\nimport subprocess\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Process a checkpoint to be published')\n    parser.add_argument('in_file', help='input checkpoint filename')\n    parser.add_argument('out_file', help='output checkpoint filename')\n    args = parser.parse_args()\n    return args\n\n\ndef process_checkpoint(in_file, out_file):\n    checkpoint = torch.load(in_file, map_location='cpu')\n    # remove optimizer for smaller file size\n    if 'optimizer' in checkpoint:\n        del checkpoint['optimizer']\n    # if it is necessary to remove some sensitive data in checkpoint['meta'],\n    # add the code here.\n    torch.save(checkpoint, out_file)\n    sha = subprocess.check_output(['sha256sum', out_file]).decode()\n    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])\n    subprocess.Popen(['mv', out_file, final_file])\n\n\ndef main():\n    args = parse_args()\n    process_checkpoint(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/slurm_test.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nPY_ARGS=${@:5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\n\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "mmdetection/tools/slurm_train.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${5:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nPY_ARGS=${PY_ARGS:-\"--validate\"}\n\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "mmdetection/tools/test.py",
    "content": "import argparse\nimport os.path as osp\nimport shutil\nimport tempfile\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import load_checkpoint, get_dist_info\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\n\nfrom mmdet.apis import init_dist\nfrom mmdet.core import results2json, coco_eval\nfrom mmdet.datasets import build_dataloader, get_dataset\nfrom mmdet.models import build_detector\n\n\ndef single_gpu_test(model, data_loader, show=False):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=not show, **data)\n        results.append(result)\n\n        if show:\n            model.module.show_result(data, result, dataset.img_norm_cfg)\n\n        batch_size = data['img'][0].size(0)\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n\n\ndef multi_gpu_test(model, data_loader, tmpdir=None):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    rank, world_size = get_dist_info()\n    if rank == 0:\n        prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n        results.append(result)\n\n        if rank == 0:\n            batch_size = data['img'][0].size(0)\n            for _ in range(batch_size * world_size):\n                prog_bar.update()\n\n    # collect results from all ranks\n    results = collect_results(results, len(dataset), tmpdir)\n\n    return results\n\n\ndef collect_results(result_part, size, tmpdir=None):\n    rank, world_size = get_dist_info()\n    # create a tmp dir if it is not specified\n    if tmpdir is None:\n        MAX_LEN = 512\n        # 32 is whitespace\n        dir_tensor = torch.full((MAX_LEN, ),\n                                32,\n                                dtype=torch.uint8,\n                                device='cuda')\n        if rank == 0:\n            tmpdir = tempfile.mkdtemp()\n            tmpdir = torch.tensor(bytearray(tmpdir.encode()),\n                                  dtype=torch.uint8,\n                                  device='cuda')\n            dir_tensor[:len(tmpdir)] = tmpdir\n        dist.broadcast(dir_tensor, 0)\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\n    else:\n        mmcv.mkdir_or_exist(tmpdir)\n    # dump the part result to the dir\n    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))\n    dist.barrier()\n    # collect all parts\n    if rank != 0:\n        return None\n    else:\n        # load results of all parts from tmp dir\n        part_list = []\n        for i in range(world_size):\n            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))\n            part_list.append(mmcv.load(part_file))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        # remove tmp dir\n        shutil.rmtree(tmpdir)\n        return ordered_results\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet test detector')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='output result file')\n    parser.add_argument('--ann_file', default=None, type=str)\n    parser.add_argument('--img_prefix', default=None, type=str)\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],\n        help='eval types')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument('--tmpdir', help='tmp dir for writing some results')\n    parser.add_argument('--flip', action='store_true')\n    parser.add_argument('--launcher',\n                        choices=['none', 'pytorch', 'slurm', 'mpi'],\n                        default='none',\n                        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = mmcv.Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n    if args.ann_file is not None:\n        cfg.data.test.ann_file = args.ann_file\n    if args.img_prefix is not None:\n        cfg.data.test.img_prefix = args.img_prefix\n    if args.flip:\n        cfg.data.test.flip_ratio = 1\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = get_dataset(cfg.data.test)\n    data_loader = build_dataloader(dataset,\n                                   imgs_per_gpu=1,\n                                   workers_per_gpu=cfg.data.workers_per_gpu,\n                                   dist=distributed,\n                                   shuffle=False)\n\n    # build the model and load checkpoint\n    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)\n    load_checkpoint(model, args.checkpoint, map_location='cpu')\n\n    if not distributed:\n        model = MMDataParallel(model, device_ids=[0])\n        outputs = single_gpu_test(model, data_loader, args.show)\n    else:\n        model = MMDistributedDataParallel(model.cuda())\n        outputs = multi_gpu_test(model, data_loader, args.tmpdir)\n\n    rank, _ = get_dist_info()\n    if args.out and rank == 0:\n        print('\\nwriting results to {}'.format(args.out))\n        mmcv.dump(outputs, args.out)\n        eval_types = args.eval\n        if eval_types:\n            print('Starting evaluate {}'.format(' and '.join(eval_types)))\n            if eval_types == ['proposal_fast']:\n                result_file = args.out\n                coco_eval(result_file, eval_types, dataset.coco)\n            else:\n                if not isinstance(outputs[0], dict):\n                    result_file = args.out + '.json'\n                    results2json(dataset, outputs, result_file)\n                    coco_eval(result_file, eval_types, dataset.coco)\n                else:\n                    for name in outputs[0]:\n                        print('\\nEvaluating {}'.format(name))\n                        outputs_ = [out[name] for out in outputs]\n                        result_file = args.out + '.{}.json'.format(name)\n                        results2json(dataset, outputs_, result_file)\n                        coco_eval(result_file, eval_types, dataset.coco)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/test_ensemble.py",
    "content": "import argparse\nimport os.path as osp\nimport shutil\nimport tempfile\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import load_checkpoint, get_dist_info\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\n\nfrom mmdet.apis import init_dist\nfrom mmdet.core import results2json, coco_eval\nfrom mmdet.datasets import build_dataloader, get_dataset\nfrom mmdet.models import build_detector\nfrom mmdet.models.detectors.ensemble_htc import EnsembleHTC\n\n\ndef single_gpu_test(model, data_loader, show=False):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=not show, **data)\n        results.append(result)\n\n        if show:\n            model.module.show_result(data, result, dataset.img_norm_cfg)\n\n        batch_size = data['img'][0].size(0)\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n\n\ndef multi_gpu_test(model, data_loader, tmpdir=None):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    rank, world_size = get_dist_info()\n    if rank == 0:\n        prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n        results.append(result)\n\n        if rank == 0:\n            batch_size = data['img'][0].size(0)\n            for _ in range(batch_size * world_size):\n                prog_bar.update()\n\n    # collect results from all ranks\n    results = collect_results(results, len(dataset), tmpdir)\n\n    return results\n\n\ndef collect_results(result_part, size, tmpdir=None):\n    rank, world_size = get_dist_info()\n    # create a tmp dir if it is not specified\n    if tmpdir is None:\n        MAX_LEN = 512\n        # 32 is whitespace\n        dir_tensor = torch.full((MAX_LEN, ),\n                                32,\n                                dtype=torch.uint8,\n                                device='cuda')\n        if rank == 0:\n            tmpdir = tempfile.mkdtemp()\n            tmpdir = torch.tensor(bytearray(tmpdir.encode()),\n                                  dtype=torch.uint8,\n                                  device='cuda')\n            dir_tensor[:len(tmpdir)] = tmpdir\n        dist.broadcast(dir_tensor, 0)\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\n    else:\n        mmcv.mkdir_or_exist(tmpdir)\n    # dump the part result to the dir\n    mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))\n    dist.barrier()\n    # collect all parts\n    if rank != 0:\n        return None\n    else:\n        # load results of all parts from tmp dir\n        part_list = []\n        for i in range(world_size):\n            part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))\n            part_list.append(mmcv.load(part_file))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        # remove tmp dir\n        shutil.rmtree(tmpdir)\n        return ordered_results\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet test detector')\n    parser.add_argument('config', type=str)\n    parser.add_argument('--checkpoint', type=str, nargs='+')\n    parser.add_argument('--out', help='output result file')\n    parser.add_argument('--ann_file', default=None, type=str)\n    parser.add_argument('--img_prefix', default=None, type=str)\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],\n        help='eval types')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument('--tmpdir', help='tmp dir for writing some results')\n    parser.add_argument('--launcher',\n                        choices=['none', 'pytorch', 'slurm', 'mpi'],\n                        default='none',\n                        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = mmcv.Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n    if args.ann_file is not None:\n        cfg.data.test.ann_file = args.ann_file\n    if args.img_prefix is not None:\n        cfg.data.test.img_prefix = args.img_prefix\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = get_dataset(cfg.data.test)\n    data_loader = build_dataloader(dataset,\n                                   imgs_per_gpu=1,\n                                   workers_per_gpu=cfg.data.workers_per_gpu,\n                                   dist=distributed,\n                                   shuffle=False)\n\n    # build the model and load checkpoint\n    models = []\n    print(f\"checkpoints: {args.checkpoint}\")\n    for checkpoint in args.checkpoint:\n        model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)\n        load_checkpoint(model, checkpoint, map_location='cpu')\n        models.append(model)\n    model = EnsembleHTC(models)\n\n    if not distributed:\n        model = MMDataParallel(model, device_ids=[0])\n        outputs = single_gpu_test(model, data_loader, args.show)\n    else:\n        model = MMDistributedDataParallel(model.cuda())\n        outputs = multi_gpu_test(model, data_loader, args.tmpdir)\n\n    rank, _ = get_dist_info()\n    if args.out and rank == 0:\n        print('\\nwriting results to {}'.format(args.out))\n        mmcv.dump(outputs, args.out)\n        eval_types = args.eval\n        if eval_types:\n            print('Starting evaluate {}'.format(' and '.join(eval_types)))\n            if eval_types == ['proposal_fast']:\n                result_file = args.out\n                coco_eval(result_file, eval_types, dataset.coco)\n            else:\n                if not isinstance(outputs[0], dict):\n                    result_file = args.out + '.json'\n                    results2json(dataset, outputs, result_file)\n                    coco_eval(result_file, eval_types, dataset.coco)\n                else:\n                    for name in outputs[0]:\n                        print('\\nEvaluating {}'.format(name))\n                        outputs_ = [out[name] for out in outputs]\n                        result_file = args.out + '.{}.json'.format(name)\n                        results2json(dataset, outputs_, result_file)\n                        coco_eval(result_file, eval_types, dataset.coco)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/train.py",
    "content": "from __future__ import division\n\nimport argparse\nfrom mmcv import Config\n\nfrom mmdet import __version__\nfrom mmdet.datasets import get_dataset\nfrom mmdet.apis import (train_detector, init_dist, get_root_logger,\n                        set_random_seed)\nfrom mmdet.models import build_detector\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument('--work_dir', help='the dir to save logs and models')\n    parser.add_argument(\n        '--resume_from', help='the checkpoint file to resume from')\n    parser.add_argument(\n        '--validate',\n        action='store_true',\n        help='whether to evaluate the checkpoint during training')\n    parser.add_argument(\n        '--gpus',\n        type=int,\n        default=4,\n        help='number of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument('--seed', type=int, default=None, help='random seed')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    # update configs according to CLI args\n    if args.work_dir is not None:\n        cfg.work_dir = args.work_dir\n    if args.resume_from is not None:\n        cfg.resume_from = args.resume_from\n    cfg.gpus = args.gpus\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # init logger before other steps\n    logger = get_root_logger(cfg.log_level)\n    logger.info('Distributed training: {}'.format(distributed))\n\n    # set random seeds\n    if args.seed is not None:\n        logger.info('Set random seed to {}'.format(args.seed))\n        set_random_seed(args.seed)\n\n    model = build_detector(\n        cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)\n\n    train_dataset = get_dataset(cfg.data.train)\n    if cfg.checkpoint_config is not None:\n        # save mmdet version, config file content and class names in\n        # checkpoints as meta data\n        cfg.checkpoint_config.meta = dict(\n            mmdet_version=__version__, config=cfg.text,\n            classes=train_dataset.CLASSES)\n    # add an attribute for visualization convenience\n    model.CLASSES = train_dataset.CLASSES\n    train_detector(\n        model,\n        train_dataset,\n        cfg,\n        distributed=distributed,\n        validate=args.validate,\n        logger=logger)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/upgrade_model_version.py",
    "content": "import argparse\nimport re\nfrom collections import OrderedDict\n\nimport torch\n\n\ndef convert(in_file, out_file):\n    \"\"\"Convert keys in checkpoints.\n\n    There can be some breaking changes during the development of mmdetection,\n    and this tool is used for upgrading checkpoints trained with old versions\n    to the latest one.\n    \"\"\"\n    checkpoint = torch.load(in_file)\n    in_state_dict = checkpoint.pop('state_dict')\n    out_state_dict = OrderedDict()\n    for key, val in in_state_dict.items():\n        # Use ConvModule instead of nn.Conv2d in RetinaNet\n        # cls_convs.0.weight -> cls_convs.0.conv.weight\n        m = re.search(r'(cls_convs|reg_convs).\\d.(weight|bias)', key)\n        if m is not None:\n            param = m.groups()[1]\n            new_key = key.replace(param, 'conv.{}'.format(param))\n            out_state_dict[new_key] = val\n            continue\n\n        out_state_dict[key] = val\n    checkpoint['state_dict'] = out_state_dict\n    torch.save(checkpoint, out_file)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Upgrade model version')\n    parser.add_argument('in_file', help='input checkpoint file')\n    parser.add_argument('out_file', help='output checkpoint file')\n    args = parser.parse_args()\n    convert(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "mmdetection/tools/voc_eval.py",
    "content": "from argparse import ArgumentParser\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet import datasets\nfrom mmdet.core import eval_map\n\n\ndef voc_eval(result_file, dataset, iou_thr=0.5):\n    det_results = mmcv.load(result_file)\n    gt_bboxes = []\n    gt_labels = []\n    gt_ignore = []\n    for i in range(len(dataset)):\n        ann = dataset.get_ann_info(i)\n        bboxes = ann['bboxes']\n        labels = ann['labels']\n        if 'bboxes_ignore' in ann:\n            ignore = np.concatenate([\n                np.zeros(bboxes.shape[0], dtype=np.bool),\n                np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)\n            ])\n            gt_ignore.append(ignore)\n            bboxes = np.vstack([bboxes, ann['bboxes_ignore']])\n            labels = np.concatenate([labels, ann['labels_ignore']])\n        gt_bboxes.append(bboxes)\n        gt_labels.append(labels)\n    if not gt_ignore:\n        gt_ignore = gt_ignore\n    if hasattr(dataset, 'year') and dataset.year == 2007:\n        dataset_name = 'voc07'\n    else:\n        dataset_name = dataset.CLASSES\n    eval_map(\n        det_results,\n        gt_bboxes,\n        gt_labels,\n        gt_ignore=gt_ignore,\n        scale_ranges=None,\n        iou_thr=iou_thr,\n        dataset=dataset_name,\n        print_summary=True)\n\n\ndef main():\n    parser = ArgumentParser(description='VOC Evaluation')\n    parser.add_argument('result', help='result file path')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--iou-thr',\n        type=float,\n        default=0.5,\n        help='IoU threshold for evaluation')\n    args = parser.parse_args()\n    cfg = mmcv.Config.fromfile(args.config)\n    test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)\n    voc_eval(args.result, test_dataset, args.iou_thr)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "scrips/create_mmdetection_test.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHONPATH=/kaggle-imaterialist python /kaggle-imaterialist/src/create_mmdetection_test.py \\\n    --annotation=/data/sample_submission.csv \\\n    --root=/data/test \\\n    --output=/data/test_mmdetection.pkl\n"
  },
  {
    "path": "scrips/create_mmdetection_train.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHONPATH=/kaggle-imaterialist python /kaggle-imaterialist/src/create_mmdetection_train.py \\\n    --annotation=/data/train.csv.zip \\\n    --root=/data/train \\\n    --output=/data/train_mmdetection.pkl"
  },
  {
    "path": "scrips/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \\\n    /kaggle-imaterialist/mmdetection/tools/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}\n"
  },
  {
    "path": "scrips/dist_test_ensemble.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nGPUS=$2\nMMDETECTION_PREDICTIONS=/data/test_ensemble_predictions.pkl\nSUBMISSION=/data/test_ensemble_submission.csv\nSUBMISSION_WA=/data/test_ensemble_submission_wa.csv\n\npython -m torch.distributed.launch --nproc_per_node=$GPUS /kaggle-imaterialist/mmdetection/tools/test_ensemble.py $CONFIG \\\n    --checkpoint /dumps/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e/epoch_14.pth \\\n                 /dumps/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e/epoch_15.pth \\\n                 /dumps/htc_dconv_c3-c5_mstrain_x101_64x4d_fpn_20e/epoch_17.pth \\\n    --launcher pytorch ${@:4} \\\n    --out=${MMDETECTION_PREDICTIONS}\n\nPYTHONPATH=/kaggle-imaterialist python /kaggle-imaterialist/src/submit.py \\\n    --annotation=/data/test_mmdetection.pkl \\\n    --predictions=${MMDETECTION_PREDICTIONS} \\\n    --output=${SUBMISSION}\n\nPYTHONPATH=/kaggle-imaterialist python /kaggle-imaterialist/src/rm_attribute_classes.py \\\n    --submission=${SUBMISSION} \\\n    --output=${SUBMISSION_WA}"
  },
  {
    "path": "scrips/dist_train.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHON=${PYTHON:-\"python\"}\n\nCONFIG=$1\nGPUS=$2\n\n$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \\\n    /kaggle-imaterialist/mmdetection/tools/train.py $CONFIG --launcher pytorch ${@:3}\n"
  },
  {
    "path": "scrips/prepare_weights.sh",
    "content": "#!/usr/bin/env bash\n\nmkdir /dumps\nwget https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth -O /dumps/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth\npython /kaggle-imaterialist/src/prune.py \\\n    --weights=/dumps/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth \\\n    --output=/dumps/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c_prune.pth\n"
  },
  {
    "path": "scrips/split.sh",
    "content": "#!/usr/bin/env bash\n\nPYTHONPATH=/kaggle-imaterialist python /kaggle-imaterialist/src/split.py \\\n    --annotation=/data/train_mmdetection.pkl \\\n    --train_output=/data/train_99_mmdetection.pkl \\\n    --val_output=/data/val_01_mmdetection.pkl\n"
  },
  {
    "path": "src/__init__.py",
    "content": ""
  },
  {
    "path": "src/create_mmdetection_test.py",
    "content": "import argparse\nimport os\nimport pickle\nfrom functools import partial\nfrom multiprocessing import Pool\n\nimport jpeg4py as jpeg\nimport pandas as pd\nfrom tqdm import tqdm\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--annotation', type=str)\n    parser.add_argument('--root', type=str)\n    parser.add_argument('--output', type=str)\n    parser.add_argument('--n_jobs', type=int, default=40)\n    return parser.parse_args()\n\n\ndef convert(group: dict, root) -> dict:\n    image_id, group = group\n    image = jpeg.JPEG(os.path.join(root, image_id)).decode()\n    height, width = image.shape[:2]\n    return {\n        'filename': image_id,\n        'width': width,\n        'height': height,\n        'ann': {\n            'bboxes': None,\n            'labels': None,\n            'masks': None\n        }\n    }\n\n\ndef main():\n    args = parse_args()\n    annotation = pd.read_csv(args.annotation)\n    print(len(annotation), len(set(annotation['ImageId'])))\n\n    groups = list(annotation.groupby('ImageId'))\n\n    with Pool(args.n_jobs) as p:\n        samples = list(tqdm(iterable=p.imap_unordered(partial(convert, root=args.root), groups), total=len(groups)))\n\n    with open(args.output, 'wb') as f:\n        pickle.dump(samples, f)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/create_mmdetection_train.py",
    "content": "import argparse\nimport os\nimport pickle\nfrom multiprocessing import Pool\n\nimport pandas as pd\nfrom tqdm import tqdm\nfrom src.utils import group2mmdetection\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--annotation', type=str)\n    parser.add_argument('--root', type=str)\n    parser.add_argument('--output', type=str)\n    parser.add_argument('--n_jobs', type=int, default=80)\n    parser.add_argument('--n_samples', type=int, default=-1)\n    return parser.parse_args()\n\n\ndef main():\n    args = parse_args()\n    annotation = pd.read_csv(args.annotation)\n    files = sorted(os.listdir(args.root))\n    if args.n_samples != -1:\n        files = files[:args.n_samples]\n    annotation = annotation.loc[annotation['ImageId'].isin(set(files))]\n    print(len(annotation), len(set(annotation['ImageId'])))\n\n    groups = list(annotation.groupby('ImageId'))\n\n    with Pool(args.n_jobs) as p:\n        samples = list(tqdm(iterable=p.imap_unordered(group2mmdetection, groups), total=len(groups)))\n\n    with open(args.output, 'wb') as f:\n        pickle.dump(samples, f)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/draw.py",
    "content": "import argparse\nimport numpy as np\nfrom multiprocessing import Pool\nimport cv2\nimport os\nimport json\nfrom tqdm import tqdm\nimport mmcv\nimport os.path as osp\nfrom functools import partial\nimport pycocotools.mask as mutils\nimport pandas as pd\n\nN_CLASSES = 46\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--annotation', type=str)\n    parser.add_argument('--predictions', type=str)\n    parser.add_argument('--submission', type=str, default=None)\n    parser.add_argument('--classes', type=str)\n    parser.add_argument('--root', type=str)\n    parser.add_argument('--output', type=str)\n    parser.add_argument('--n_jobs', type=int, default=80)\n    parser.add_argument('--metric_threshold', type=float, default=0.1)\n    return parser.parse_args()\n\n\ndef get_spaced_colors(n, start_color=(75, 0, 130)):\n    r, g, b = start_color\n    step = 256 / n\n    colors = []\n    for i in range(n):\n        r += step\n        g += step\n        b += step\n        colors.append((int(r) % 256, int(g) % 256, int(b) % 256))\n    return np.random.permutation(colors).reshape(-1, 1, 3).astype(np.uint8)\n\n\ndef put_text(img, color, text, i, x_shift=10, y_shift=10):\n    font = cv2.FONT_HERSHEY_SIMPLEX\n    text_size = cv2.getTextSize(text, font, 1, 2)[0]\n    text_x, text_y = x_shift, y_shift + (i + 1) * text_size[1] * 2\n    img = cv2.putText(img, text, (text_x, text_y), font, 1.5, tuple(map(int, color)), 3)\n    return img\n\n\ndef draw_masks(img, masks, colors, classes):\n    if masks is not None:\n        assert len(colors) == len(masks)\n        mask_colors = [colors[i] for i, mask in enumerate(masks) if mask]\n        mask_classes = [classes[i] for i, mask in enumerate(masks) if mask]\n        masks = mmcv.concat_list(masks)\n        for i, (mask, color, cls) in enumerate(zip(masks, mask_colors, mask_classes)):\n            mask = mutils.decode(mask).astype(np.bool)\n            img[mask] = img[mask] * 0.5 + color * 0.5\n            img = put_text(img, color[0], cls, i)\n    return img\n\n\ndef get_gt_masks(annotation):\n    gt_masks = [[] for _ in range(N_CLASSES)]\n    for mask, label in zip(annotation['ann']['masks'], annotation['ann']['labels']):\n        gt_masks[label - 1].append(mask)\n    return gt_masks\n\n\ndef draw(args, root, output, metric_threshold, colors, classes):\n    prediction, annotation, metric = args\n    if metric is None:\n        output_filename = annotation['filename']\n    elif metric < metric_threshold:\n        output_filename = f\"{100 * metric:0.3f}_{annotation['filename']}\"\n    else:\n        return None\n\n    img = cv2.imread(osp.join(root, annotation['filename']))[..., ::-1]\n    _, prediction_masks = prediction\n    prediction_img = draw_masks(img.copy(), prediction_masks, colors, classes)\n    gt_img = draw_masks(img.copy(), get_gt_masks(annotation), colors, classes)\n\n    output_image = np.hstack([img, gt_img, prediction_img])\n    return cv2.imwrite(osp.join(output, output_filename), output_image[..., ::-1])\n\n\ndef main():\n    args = parse_args()\n    predictions = mmcv.load(args.predictions)\n    annotation = mmcv.load(args.annotation)\n    classes = json.load(open(args.classes))\n    classes = [x['name'] for x in classes['categories']]\n\n    if args.submission is not None:\n        submission = pd.read_csv(args.submission)\n        submission = submission.drop_duplicates('ImageId')\n        id2metric = dict(zip(submission['ImageId'], submission['mAP']))\n        metrics = [id2metric[x['filename']] for x in annotation]\n    else:\n        metrics = [None for _ in annotation]\n\n    os.makedirs(args.output, exist_ok=True)\n    colors = get_spaced_colors(N_CLASSES)\n    partial_draw = partial(\n        draw,\n        root=args.root,\n        output=args.output,\n        metric_threshold=args.metric_threshold,\n        colors=colors,\n        classes=classes\n    )\n    with Pool(args.n_jobs) as p:\n        list(\n            tqdm(\n                iterable=p.imap_unordered(partial_draw, zip(predictions, annotation, metrics)), total=len(predictions)\n            )\n        )\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/eda.py",
    "content": "import argparse\nfrom mmdet.datasets import get_dataset\nimport numpy as np\nimport mmcv\nfrom mmcv import Config\nimport os\nimport cv2\nfrom tqdm import tqdm\nimport os.path as osp\n\nfrom src.visualization import draw_bounding_boxes_on_image_array\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('config', type=str)\n    parser.add_argument('--output', type=str)\n    return parser.parse_args()\n\n\ndef draw_masks(img, masks):\n    if masks is not None:\n        for i in range(masks.shape[-1]):\n            mask = masks[..., i]\n            color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8)\n            img[mask] = img[mask] * 0.5 + color_mask * 0.5\n    return img\n\n\ndef main():\n    args = parse_args()\n    os.makedirs(args.output, exist_ok=True)\n    cfg = Config.fromfile(args.config)\n    dataset = get_dataset(cfg.data.train)\n    for i in tqdm(np.random.randint(0, len(dataset), 500)):\n        data = dataset[i]\n        img = data['img'].data.numpy().transpose(1, 2, 0)\n        masks = data['gt_masks'].data.transpose(1, 2, 0).astype(bool)\n        bboxes = data['gt_bboxes'].data.numpy()\n        img = mmcv.imdenormalize(img, mean=cfg.img_norm_cfg.mean, std=cfg.img_norm_cfg.std, to_bgr=False)\n        img = draw_masks(img, masks).astype(np.uint8)\n        draw_bounding_boxes_on_image_array(img, bboxes, use_normalized_coordinates=False, thickness=5)\n        cv2.imwrite(osp.join(args.output, f'{i}_{np.random.randint(0, 10000)}.jpg'), img[..., ::-1])\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/metric.py",
    "content": "import numpy as np\nfrom functools import partial\n\n\ndef precision_at(threshold, iou):\n    matches = iou > threshold\n    true_positives = np.sum(matches, axis=1) == 1  # Correct objects\n    false_positives = np.sum(matches, axis=0) == 0  # Missed objects\n    false_negatives = np.sum(matches, axis=1) == 0  # Extra objects\n    tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)\n    return tp, fp, fn\n\n\ndef calc_iou(y_true, y_prediction):\n    y_true, y_prediction = map(partial(np.expand_dims, axis=0), (y_true, y_prediction))\n\n    true_objects = len(np.unique(y_true))\n    pred_objects = len(np.unique(y_prediction))\n\n    # Compute intersection between all objects\n    intersection = np.histogram2d(y_true.flatten(), y_prediction.flatten(), bins=(true_objects, pred_objects))[0]\n\n    # Compute areas (needed for finding the union between all objects)\n    area_true = np.histogram(y_true, bins=true_objects)[0]\n    area_pred = np.histogram(y_prediction, bins=pred_objects)[0]\n    area_true = np.expand_dims(area_true, -1)\n    area_pred = np.expand_dims(area_pred, 0)\n\n    # Compute union\n    union = area_true + area_pred - intersection\n\n    # Exclude background from the analysis\n    intersection = intersection[1:, 1:]\n    union = union[1:, 1:]\n    union[union == 0] = 1e-9\n\n    # Compute the intersection over union\n    iou = intersection / union\n    return iou\n\n\ndef calc_score_per_class(y_true, y_prediction):\n    iou = calc_iou(y_true, y_prediction)\n\n    # Loop over IoU thresholds\n    precisions = []\n    for t in np.arange(0.5, 1.0, 0.05):\n        tp, fp, fn = precision_at(t, iou)\n        p = tp / (tp + fp + fn)\n        precisions.append(p)\n    return np.mean(precisions)\n"
  },
  {
    "path": "src/prune.py",
    "content": "import argparse\n\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--weights', type=str)\n    parser.add_argument('--output', type=str)\n    return parser.parse_args()\n\n\ndef main():\n    args = parse_args()\n    weights = torch.load(args.weights)\n    weights['state_dict'] = {\n        k: v\n        for k, v in weights['state_dict'].items()\n        if not k.startswith('bbox_head') and not k.startswith('mask_head')\n    }\n    torch.save(weights, args.output)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/rle.py",
    "content": "from itertools import groupby\nfrom pycocotools import mask as mutils\nimport numpy as np\nfrom tqdm import tqdm\n\n\ndef kaggle_rle_encode(mask):\n    pixels = mask.T.flatten()\n    pixels = np.concatenate([[0], pixels, [0]])\n    rle = np.where(pixels[1:] != pixels[:-1])[0] + 1\n    rle[1::2] -= rle[::2]\n    return rle.tolist()\n\n\ndef kaggle_rle_decode(rle, h, w):\n    starts, lengths = map(np.asarray, (rle[::2], rle[1::2]))\n    starts -= 1\n    ends = starts + lengths\n    img = np.zeros(h * w, dtype=np.uint8)\n    for lo, hi in zip(starts, ends):\n        img[lo:hi] = 1\n    return img.reshape((w, h)).T\n\n\ndef coco_rle_encode(mask):\n    rle = {'counts': [], 'size': list(mask.shape)}\n    counts = rle.get('counts')\n    for i, (value, elements) in enumerate(groupby(mask.ravel(order='F'))):\n        if i == 0 and value == 1:\n            counts.append(0)\n        counts.append(len(list(elements)))\n    return rle\n\n\ndef coco_rle_decode(rle, h, w):\n    return mutils.decode(mutils.frPyObjects(rle, h, w))\n\n\ndef kaggle2coco(kaggle_rle, h, w):\n    if not len(kaggle_rle):\n        return {'counts': [h * w], 'size': [h, w]}\n    roll2 = np.roll(kaggle_rle, 2)\n    roll2[:2] = 1\n\n    roll1 = np.roll(kaggle_rle, 1)\n    roll1[:1] = 0\n\n    if h * w != kaggle_rle[-1] + kaggle_rle[-2] - 1:\n        shift = 1\n        end_value = h * w - kaggle_rle[-1] - kaggle_rle[-2] + 1\n    else:\n        shift = 0\n        end_value = 0\n    coco_rle = np.full(len(kaggle_rle) + shift, end_value)\n    coco_rle[:len(coco_rle) - shift] = kaggle_rle.copy()\n    coco_rle[:len(coco_rle) - shift:2] = (kaggle_rle - roll1 - roll2)[::2].copy()\n    return {'counts': coco_rle.tolist(), 'size': [h, w]}\n\n\ndef main():\n    for _ in tqdm(range(100)):\n        h = np.random.randint(1, 1000)\n        w = np.random.randint(1, 1000)\n        mask = np.random.randint(0, 2, h * w).reshape(h, w)\n\n        kaggle_rle = kaggle_rle_encode(mask)\n        coco_rle = coco_rle_encode(mask)\n        assert coco_rle == kaggle2coco(kaggle_rle, h, w)\n        assert np.all(mask == kaggle_rle_decode(kaggle_rle, h, w))\n        assert np.all(mask == coco_rle_decode(coco_rle, h, w))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/rm_attribute_classes.py",
    "content": "import argparse\nimport pandas as pd\n\nATTRIBUTE_CLASSES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--submission', type=str)\n    parser.add_argument('--output', type=str)\n    return parser.parse_args()\n\n\ndef main():\n    args = parse_args()\n    submission = pd.read_csv(args.submission)\n    submission_without_attributes = submission[~submission['ClassId'].isin(ATTRIBUTE_CLASSES)].copy()\n    empty_ids = list(set(submission['ImageId']) - set(submission_without_attributes['ImageId']))\n    submission_empty = pd.DataFrame([empty_ids, [''] * len(empty_ids), ['23'] * len(empty_ids)]\n                                   ).T.rename(columns={\n                                       0: 'ImageId',\n                                       1: 'EncodedPixels',\n                                       2: 'ClassId'\n                                   })\n    pd.concat([submission_without_attributes, submission_empty], sort=True).to_csv(args.output, index=False)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/split.py",
    "content": "import argparse\nimport mmcv\nimport numpy as np\nimport pickle\n\nfrom iterstrat.ml_stratifiers import MultilabelStratifiedKFold\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--annotation', type=str)\n    parser.add_argument('--train_output', type=str)\n    parser.add_argument('--val_output', type=str)\n    parser.add_argument('--n_splits', type=int, default=100)\n    parser.add_argument('--n_jobs', type=int, default=40)\n    parser.add_argument('--n_samples', type=int, default=-1)\n    return parser.parse_args()\n\n\ndef main():\n    args = parse_args()\n    annotation = mmcv.load(args.annotation)\n    all_labels = [x['ann']['labels'] for x in annotation]\n    y = np.zeros((len(all_labels), max([max(x) for x in all_labels])))\n    for labels in all_labels:\n        y[:, labels - 1] = 1\n\n    mskf = MultilabelStratifiedKFold(n_splits=args.n_splits, random_state=777)\n\n    for train_index, val_index in mskf.split(y, y):\n        train_annotation = [x for i, x in enumerate(annotation) if i in train_index]\n        val_annotation = [x for i, x in enumerate(annotation) if i in val_index]\n        with open(args.train_output, 'wb') as f:\n            pickle.dump(train_annotation, f)\n        with open(args.val_output, 'wb') as f:\n            pickle.dump(val_annotation, f)\n        print(f'train size: {len(train_annotation)}, val size: {len(val_annotation)}')\n        break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/submit.py",
    "content": "import argparse\nimport numpy as np\nimport pandas as pd\nfrom multiprocessing import Pool\nimport cv2\nfrom tqdm import tqdm\nimport mmcv\nimport pycocotools.mask as mutils\nfrom src.rle import kaggle_rle_encode\nfrom src.metric import calc_score_per_class\nfrom src.utils import create_labeled_mask, check_overlaps, hard_overlaps_suppression\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--annotation', type=str)\n    parser.add_argument('--predictions', type=str)\n    parser.add_argument('--output', type=str)\n    parser.add_argument('--n_jobs', type=int, default=80)\n    parser.add_argument('--add_metric', action='store_true')\n    return parser.parse_args()\n\n\ndef decode_and_resize(\n    masks, x_min=None, x_max=None, y_min=None, y_max=None, original_height=None, original_width=None\n):\n    binary_mask = mutils.decode(masks)\n    if x_min is not None:\n        crop_height, crop_width, channels = binary_mask.shape\n        assert crop_height == y_max - y_min\n        assert crop_width == x_max - x_min\n        original_mask = np.zeros((original_height, original_width, channels))\n        original_mask[y_min:y_max, x_min:x_max] = binary_mask\n        binary_mask = original_mask\n    binary_mask = cv2.resize(binary_mask, (512, 512), cv2.INTER_NEAREST)\n    if len(binary_mask.shape) == 2:\n        binary_mask = binary_mask[..., np.newaxis]\n    return binary_mask\n\n\ndef create_mask(args):\n    prediction, annotation = args\n    bbox_prediction, mask_prediction = prediction\n\n    samples = []\n    metrics = []\n    for cls, (masks, bboxes) in enumerate(zip(mask_prediction, bbox_prediction)):\n        if masks:\n            prediction_mask = decode_and_resize(\n                masks=masks,\n                x_min=annotation.get('x_min', None),\n                x_max=annotation.get('x_max', None),\n                y_min=annotation.get('y_min', None),\n                y_max=annotation.get('y_max', None),\n                original_height=annotation.get('original_height', None),\n                original_width=annotation.get('original_width', None)\n            )\n            if not check_overlaps(prediction_mask):\n                prediction_mask = hard_overlaps_suppression(prediction_mask.astype(bool), bboxes[..., -1])\n\n            if annotation['ann']['masks'] is not None:\n                indices = np.where(annotation['ann']['labels'] - 1 == cls)[0]\n                if len(indices):\n                    true_mask = decode_and_resize([annotation['ann']['masks'][i] for i in indices])\n                    true_labeled_mask = create_labeled_mask(true_mask)\n                    prediction_labeled_mask = create_labeled_mask(prediction_mask)\n                    metrics.append(calc_score_per_class(true_labeled_mask, prediction_labeled_mask))\n                else:\n                    metrics.append(0)\n\n            for mask_id in range(prediction_mask.shape[-1]):\n                rle = kaggle_rle_encode(prediction_mask[..., mask_id])\n                samples.append(\n                    {\n                        'ImageId': annotation['filename'],\n                        'EncodedPixels': ' '.join(map(str, rle)),\n                        'ClassId': str(cls)\n                    }\n                )\n        elif annotation['ann']['masks'] is not None and np.any(annotation['ann']['labels'] - 1 == cls):\n            metrics.append(0)\n\n    if not len(samples):\n        samples.append({'ImageId': annotation['filename'], 'EncodedPixels': '', 'ClassId': '23'})\n    if not len(metrics):\n        metrics.append(1)\n    return {'samples': samples, 'metric': np.mean(metrics)}\n\n\ndef main():\n    args = parse_args()\n    predictions = mmcv.load(args.predictions)\n    annotation = mmcv.load(args.annotation)\n    print(f'predictions: {args.predictions}')\n    print(f'output: {args.output}')\n\n    with Pool(args.n_jobs) as p:\n        results = list(\n            tqdm(iterable=p.imap_unordered(create_mask, zip(predictions, annotation)), total=len(predictions))\n        )\n    samples = sum([x['samples'] for x in results], [])\n    metrics = [x['metric'] for x in results]\n\n    submission = pd.DataFrame(samples)\n    if args.add_metric:\n        submission['mAP'] = sum([[x['metric']] * len(x['samples']) for x in results], [])\n\n    submission.to_csv(args.output, index=False)\n    print(f'Mask mAP {np.mean(metrics)}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "src/utils.py",
    "content": "import numpy as np\nfrom pycocotools import mask as mutils\n\nfrom src.rle import kaggle2coco\n\n\ndef group2mmdetection(group: dict) -> dict:\n    image_id, group = group\n    assert group['Width'].max() == group['Width'].min()\n    assert group['Height'].max() == group['Height'].min()\n    height, width = group['Height'].max(), group['Width'].max()\n    rles = group['EncodedPixels'].apply(lambda x: kaggle2coco(list(map(int, x.split())), height, width)).tolist()\n    rles = mutils.frPyObjects(rles, height, width)\n    masks = mutils.decode(rles)\n    bboxes = mutils.toBbox(mutils.encode(np.asfortranarray(masks.astype(np.uint8))))\n    bboxes[:, 2] += bboxes[:, 0]\n    bboxes[:, 3] += bboxes[:, 1]\n    return {\n        'filename': image_id,\n        'width': width,\n        'height': height,\n        'ann':\n            {\n                'bboxes': np.array(bboxes, dtype=np.float32),\n                'original_labels': group['ClassId'].values,\n                'labels': group['ClassId'].apply(lambda x: x.split('_')[0]).values.astype(np.int) + 1,\n                'masks': rles\n            }\n    }\n\n\ndef create_labeled_mask(mask):\n    return (np.arange(1, mask.shape[-1] + 1)[None, None, :] * mask).sum(-1)\n\n\ndef check_overlaps(mask):\n    overlap_mask = mask.sum(axis=-1)\n    return np.array_equal(overlap_mask, overlap_mask.astype(bool))\n\n\ndef hard_overlaps_suppression(binary_mask, scores):\n    not_overlap_mask = []\n    for i in np.argsort(scores)[::-1]:\n        current_mask = binary_mask[..., i].copy()\n        for mask in not_overlap_mask:\n            current_mask = np.bitwise_and(current_mask, np.invert(mask))\n        not_overlap_mask.append(current_mask)\n    return np.stack(not_overlap_mask, -1)"
  },
  {
    "path": "src/visualization.py",
    "content": "import numpy as np\nimport PIL.Image as Image\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\n\nINDIGO = (75, 0, 130)\n\n\ndef draw_bounding_box_on_image(\n    image,\n    x_min,\n    y_min,\n    x_max,\n    y_max,\n    color,\n    thickness=4,\n    display_str_list=(),\n    use_normalized_coordinates=True,\n    fontsize=20\n):\n    draw = ImageDraw.Draw(image)\n    im_width, im_height = image.size\n    if use_normalized_coordinates:\n        (left, right, top, bottom) = (x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height)\n    else:\n        (left, right, top, bottom) = (x_min, x_max, y_min, y_max)\n    draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=thickness, fill=color)\n    try:\n        font = ImageFont.truetype('DejaVuSansMono.ttf', fontsize)\n    except IOError:\n        font = ImageFont.load_default()\n\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]\n    # Each display_str has a top and bottom margin of 0.05x.\n    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n\n    if top > total_display_str_height:\n        text_bottom = top\n    else:\n        text_bottom = bottom + total_display_str_height\n    # Reverse list and print from bottom to top.\n    for display_str in display_str_list[::-1]:\n        text_width, text_height = font.getsize(display_str)\n        margin = np.ceil(0.05 * text_height)\n        draw.rectangle([(left, text_bottom - text_height - 2 * margin), (left + text_width, text_bottom)], fill=color)\n        draw.text((left + margin, text_bottom - text_height - margin), display_str, fill=color, font=font)\n        text_bottom -= text_height - 2 * margin\n\n\ndef draw_bounding_boxes_on_image_array(\n    image, bboxes, color=INDIGO, thickness=4, use_normalized_coordinates=True, fontsize=20\n):\n    image_pil = Image.fromarray(image)\n    draw_bounding_boxes_on_image(image_pil, bboxes, color, thickness, use_normalized_coordinates, fontsize)\n    np.copyto(image, np.array(image_pil))\n\n\ndef draw_bounding_boxes_on_image(\n    image, bboxes, color=INDIGO, thickness=4, use_normalized_coordinates=True, fontsize=20\n):\n    for bbox in bboxes:\n        draw_bounding_box_on_image(\n            image, bbox[0], bbox[1], bbox[2], bbox[3], color, thickness, (), use_normalized_coordinates, fontsize\n        )\n"
  }
]