Showing preview only (695K chars total). Download the full file or copy to clipboard to get everything.
Repository: yqyao/FCOS_PLUS
Branch: master
Commit: 0d20ba34ccc3
Files: 240
Total size: 632.0 KB
Directory structure:
gitextract_1mo5xw5_/
├── .flake8
├── .gitignore
├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MASKRCNN_README.md
├── MODEL_ZOO.md
├── README.md
├── TROUBLESHOOTING.md
├── configs/
│ ├── caffe2/
│ │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
│ │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
│ │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
│ ├── cityscapes/
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
│ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
│ ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_faster_rcnn_R_50_C4_1x.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_faster_rcnn_fbnet.yaml
│ ├── e2e_faster_rcnn_fbnet_600.yaml
│ ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
│ ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_50_C4_1x.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_mask_rcnn_fbnet.yaml
│ ├── e2e_mask_rcnn_fbnet_600.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
│ ├── fcos/
│ │ ├── fcos_R_101_FPN_2x.yaml
│ │ ├── fcos_R_50_FPN_1x.yaml
│ │ ├── fcos_R_50_FPN_1x_center.yaml
│ │ ├── fcos_R_50_FPN_1x_center_giou.yaml
│ │ ├── fcos_X_101_32x8d_FPN_2x.yaml
│ │ ├── fcos_X_101_64x4d_FPN_2x.yaml
│ │ ├── fcos_bn_bs16_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml
│ │ └── fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml
│ ├── gn_baselines/
│ │ ├── README.md
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
│ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
│ │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
│ │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
│ ├── pascal_voc/
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
│ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
│ ├── quick_schedules/
│ │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
│ │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
│ │ ├── rpn_R_50_C4_quick.yaml
│ │ └── rpn_R_50_FPN_quick.yaml
│ ├── retinanet/
│ │ ├── retinanet_R-101-FPN_1x.yaml
│ │ ├── retinanet_R-101-FPN_P5_1x.yaml
│ │ ├── retinanet_R-50-FPN_1x.yaml
│ │ ├── retinanet_R-50-FPN_1x_quick.yaml
│ │ ├── retinanet_R-50-FPN_P5_1x.yaml
│ │ └── retinanet_X_101_32x8d_FPN_1x.yaml
│ ├── rpn_R_101_FPN_1x.yaml
│ ├── rpn_R_50_C4_1x.yaml
│ ├── rpn_R_50_FPN_1x.yaml
│ └── rpn_X_101_32x8d_FPN_1x.yaml
├── demo/
│ ├── README.md
│ ├── fcos_demo.py
│ ├── predictor.py
│ └── webcam.py
├── docker/
│ ├── Dockerfile
│ └── docker-jupyter/
│ ├── Dockerfile
│ └── jupyter_notebook_config.py
├── maskrcnn_benchmark/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── defaults.py
│ │ └── paths_catalog.py
│ ├── csrc/
│ │ ├── ROIAlign.h
│ │ ├── ROIPool.h
│ │ ├── SigmoidFocalLoss.h
│ │ ├── cpu/
│ │ │ ├── ROIAlign_cpu.cpp
│ │ │ ├── nms_cpu.cpp
│ │ │ └── vision.h
│ │ ├── cuda/
│ │ │ ├── ROIAlign_cuda.cu
│ │ │ ├── ROIPool_cuda.cu
│ │ │ ├── SigmoidFocalLoss_cuda.cu
│ │ │ ├── nms.cu
│ │ │ └── vision.h
│ │ ├── nms.h
│ │ └── vision.cpp
│ ├── data/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── build.py
│ │ ├── collate_batch.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── coco.py
│ │ │ ├── concat_dataset.py
│ │ │ ├── evaluation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── coco/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── coco_eval.py
│ │ │ │ └── voc/
│ │ │ │ ├── __init__.py
│ │ │ │ └── voc_eval.py
│ │ │ ├── list_dataset.py
│ │ │ └── voc.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── distributed.py
│ │ │ ├── grouped_batch_sampler.py
│ │ │ └── iteration_based_batch_sampler.py
│ │ └── transforms/
│ │ ├── __init__.py
│ │ ├── build.py
│ │ └── transforms.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── trainer.py
│ ├── layers/
│ │ ├── __init__.py
│ │ ├── _utils.py
│ │ ├── batch_norm.py
│ │ ├── iou_loss.py
│ │ ├── misc.py
│ │ ├── nms.py
│ │ ├── roi_align.py
│ │ ├── roi_pool.py
│ │ ├── scale.py
│ │ ├── sigmoid_focal_loss.py
│ │ └── smooth_l1_loss.py
│ ├── modeling/
│ │ ├── __init__.py
│ │ ├── backbone/
│ │ │ ├── __init__.py
│ │ │ ├── backbone.py
│ │ │ ├── fbnet.py
│ │ │ ├── fbnet_builder.py
│ │ │ ├── fbnet_modeldef.py
│ │ │ ├── fpn.py
│ │ │ ├── mobilenet.py
│ │ │ └── resnet.py
│ │ ├── balanced_positive_negative_sampler.py
│ │ ├── box_coder.py
│ │ ├── detector/
│ │ │ ├── __init__.py
│ │ │ ├── detectors.py
│ │ │ └── generalized_rcnn.py
│ │ ├── make_layers.py
│ │ ├── matcher.py
│ │ ├── poolers.py
│ │ ├── registry.py
│ │ ├── roi_heads/
│ │ │ ├── __init__.py
│ │ │ ├── box_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── box_head.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── roi_box_feature_extractors.py
│ │ │ │ └── roi_box_predictors.py
│ │ │ ├── keypoint_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── keypoint_head.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── roi_keypoint_feature_extractors.py
│ │ │ │ └── roi_keypoint_predictors.py
│ │ │ ├── mask_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── mask_head.py
│ │ │ │ ├── roi_mask_feature_extractors.py
│ │ │ │ └── roi_mask_predictors.py
│ │ │ └── roi_heads.py
│ │ ├── rpn/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_generator.py
│ │ │ ├── fcos/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fcos.py
│ │ │ │ ├── inference.py
│ │ │ │ └── loss.py
│ │ │ ├── inference.py
│ │ │ ├── loss.py
│ │ │ ├── retinanet/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ └── retinanet.py
│ │ │ ├── rpn.py
│ │ │ └── utils.py
│ │ └── utils.py
│ ├── solver/
│ │ ├── __init__.py
│ │ ├── build.py
│ │ └── lr_scheduler.py
│ ├── structures/
│ │ ├── __init__.py
│ │ ├── bounding_box.py
│ │ ├── boxlist_ops.py
│ │ ├── image_list.py
│ │ ├── keypoint.py
│ │ └── segmentation_mask.py
│ └── utils/
│ ├── README.md
│ ├── __init__.py
│ ├── c2_model_loading.py
│ ├── checkpoint.py
│ ├── collect_env.py
│ ├── comm.py
│ ├── cv2_util.py
│ ├── env.py
│ ├── imports.py
│ ├── logger.py
│ ├── metric_logger.py
│ ├── miscellaneous.py
│ ├── model_serialization.py
│ ├── model_zoo.py
│ ├── registry.py
│ └── timer.py
├── requirements.txt
├── setup.py
├── tests/
│ ├── checkpoint.py
│ ├── env_tests/
│ │ └── env.py
│ ├── test_backbones.py
│ ├── test_box_coder.py
│ ├── test_configs.py
│ ├── test_data_samplers.py
│ ├── test_detectors.py
│ ├── test_fbnet.py
│ ├── test_feature_extractors.py
│ ├── test_metric_logger.py
│ ├── test_nms.py
│ ├── test_predictors.py
│ ├── test_rpn_heads.py
│ ├── test_segmentation_mask.py
│ └── utils.py
└── tools/
├── cityscapes/
│ ├── convert_cityscapes_to_coco.py
│ └── instances2dict_with_polygons.py
├── remove_solver_states.py
├── test_net.py
└── train_net.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .flake8
================================================
# This is an example .flake8 config, used when developing *Black* itself.
# Keep in sync with setup.cfg which is used for source packages.
[flake8]
ignore = E203, E266, E501, W503
max-line-length = 80
max-complexity = 18
select = B,C,E,F,W,T4,B9
================================================
FILE: .gitignore
================================================
# compilation and distribution
__pycache__
_ext
*.pyc
*.so
maskrcnn_benchmark.egg-info/
build/
dist/
# pytorch/python/numpy formats
*.pth
*.pkl
*.npy
# ipython/jupyter notebooks
*.ipynb
**/.ipynb_checkpoints/
# Editor temporaries
*.swn
*.swo
*.swp
*~
# Pycharm editor settings
.idea
# project dirs
/datasets
/models
================================================
FILE: ABSTRACTIONS.md
================================================
## Abstractions
The main abstractions introduced by `maskrcnn_benchmark` that are useful to
have in mind are the following:
### ImageList
In PyTorch, the first dimension of the input to the network generally represents
the batch dimension, and thus all elements of the same batch have the same
height / width.
In order to support images with different sizes and aspect ratios in the same
batch, we created the `ImageList` class, which holds internally a batch of
images (os possibly different sizes). The images are padded with zeros such that
they have the same final size and batched over the first dimension. The original
sizes of the images before padding are stored in the `image_sizes` attribute,
and the batched tensor in `tensors`.
We provide a convenience function `to_image_list` that accepts a few different
input types, including a list of tensors, and returns an `ImageList` object.
```python
from maskrcnn_benchmark.structures.image_list import to_image_list
images = [torch.rand(3, 100, 200), torch.rand(3, 150, 170)]
batched_images = to_image_list(images)
# it is also possible to make the final batched image be a multiple of a number
batched_images_32 = to_image_list(images, size_divisible=32)
```
### BoxList
The `BoxList` class holds a set of bounding boxes (represented as a `Nx4` tensor) for
a specific image, as well as the size of the image as a `(width, height)` tuple.
It also contains a set of methods that allow to perform geometric
transformations to the bounding boxes (such as cropping, scaling and flipping).
The class accepts bounding boxes from two different input formats:
- `xyxy`, where each box is encoded as a `x1`, `y1`, `x2` and `y2` coordinates, and
- `xywh`, where each box is encoded as `x1`, `y1`, `w` and `h`.
Additionally, each `BoxList` instance can also hold arbitrary additional information
for each bounding box, such as labels, visibility, probability scores etc.
Here is an example on how to create a `BoxList` from a list of coordinates:
```python
from maskrcnn_benchmark.structures.bounding_box import BoxList, FLIP_LEFT_RIGHT
width = 100
height = 200
boxes = [
[0, 10, 50, 50],
[50, 20, 90, 60],
[10, 10, 50, 50]
]
# create a BoxList with 3 boxes
bbox = BoxList(boxes, image_size=(width, height), mode='xyxy')
# perform some box transformations, has similar API as PIL.Image
bbox_scaled = bbox.resize((width * 2, height * 3))
bbox_flipped = bbox.transpose(FLIP_LEFT_RIGHT)
# add labels for each bbox
labels = torch.tensor([0, 10, 1])
bbox.add_field('labels', labels)
# bbox also support a few operations, like indexing
# here, selects boxes 0 and 2
bbox_subset = bbox[[0, 2]]
```
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct
Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
Please read the [full text](https://code.fb.com/codeofconduct/)
so that you can understand what actions will and will not be tolerated.
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Mask-RCNN Benchmark
We want to make contributing to this project as easy and transparent as
possible.
## Our Development Process
Minor changes and improvements will be released on an ongoing basis. Larger changes (e.g., changesets implementing a new paper) will be released on a more periodic basis.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `master`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## Coding Style
* 4 spaces for indentation rather than tabs
* 80 character line length
* PEP8 formatting following [Black](https://black.readthedocs.io/en/stable/)
## License
By contributing to Mask-RCNN Benchmark, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.
================================================
FILE: INSTALL.md
================================================
## Installation
### Requirements:
- PyTorch >= 1.0. Installation instructions can be found in https://pytorch.org/get-started/locally/.
- torchvision==0.2.1
- cocoapi
- yacs
- matplotlib
- GCC >= 4.9
- (optional) OpenCV for the webcam demo
### Option 1: Step-by-step installation
```bash
# first, make sure that your conda is setup properly with the right environment
# for that, check that `which conda`, `which pip` and `which python` points to the
# right path. From a clean conda env, this is what you need to do
conda create --name FCOS
conda activate FCOS
# this installs the right pip and dependencies for the fresh python
conda install ipython
# FCOS and coco api dependencies
pip install ninja yacs cython matplotlib tqdm
# follow PyTorch installation in https://pytorch.org/get-started/locally/
# we give the instructions for CUDA 9.0
conda install -c pytorch torchvision=0.2.1 cudatoolkit=9.0
export INSTALL_DIR=$PWD
# install pycocotools. Please make sure you have installed cython.
cd $INSTALL_DIR
git clone https://github.com/cocodataset/cocoapi.git
cd cocoapi/PythonAPI
python setup.py build_ext install
# install PyTorch Detection
cd $INSTALL_DIR
git clone https://github.com/yqyao/FCOS_PLUS.git
cd FCOS_PLUS
# the following will install the lib with
# symbolic links, so that you can modify
# the files if you want and won't need to
# re-build it
python setup.py build develop
unset INSTALL_DIR
# or if you are on macOS
# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py build develop
```
### Option 2: Docker Image (Requires CUDA, Linux only)
*The following steps are for original maskrcnn-benchmark. Please change the repository name if needed.*
Build image with defaults (`CUDA=9.0`, `CUDNN=7`, `FORCE_CUDA=1`):
nvidia-docker build -t maskrcnn-benchmark docker/
Build image with other CUDA and CUDNN versions:
nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/
Build image with FORCE_CUDA disabled:
nvidia-docker build -t maskrcnn-benchmark --build-arg FORCE_CUDA=0 docker/
Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook):
nvidia-docker build -t maskrcnn-benchmark-jupyter docker/docker-jupyter/
nvidia-docker run -td -p 8888:8888 -e PASSWORD=<password> -v <host-dir>:<container-dir> maskrcnn-benchmark-jupyter
================================================
FILE: LICENSE
================================================
FCOS for non-commercial purposes
Copyright (c) 2019 the authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: MASKRCNN_README.md
================================================
# Faster R-CNN and Mask R-CNN in PyTorch 1.0
This project aims at providing the necessary building blocks for easily
creating detection and segmentation models using PyTorch 1.0.

## Highlights
- **PyTorch 1.0:** RPN, Faster R-CNN and Mask R-CNN implementations that matches or exceeds Detectron accuracies
- **Very fast**: up to **2x** faster than [Detectron](https://github.com/facebookresearch/Detectron) and **30%** faster than [mmdetection](https://github.com/open-mmlab/mmdetection) during training. See [MODEL_ZOO.md](MODEL_ZOO.md) for more details.
- **Memory efficient:** uses roughly 500MB less GPU memory than mmdetection during training
- **Multi-GPU training and inference**
- **Batched inference:** can perform inference using multiple images per batch per GPU
- **CPU support for inference:** runs on CPU in inference time. See our [webcam demo](demo) for an example
- Provides pre-trained models for almost all reference Mask R-CNN and Faster R-CNN configurations with 1x schedule.
## Webcam and Jupyter notebook demo
We provide a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference:
```bash
cd demo
# by default, it runs on the GPU
# for best results, use min-image-size 800
python webcam.py --min-image-size 800
# can also run it on the CPU
python webcam.py --min-image-size 300 MODEL.DEVICE cpu
# or change the model that you want to use
python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
# in order to see the probability heatmaps, pass --show-mask-heatmaps
python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
# for the keypoint demo
python webcam.py --config-file ../configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
```
A notebook with the demo can be found in [demo/Mask_R-CNN_demo.ipynb](demo/Mask_R-CNN_demo.ipynb).
## Installation
Check [INSTALL.md](INSTALL.md) for installation instructions.
## Model Zoo and Baselines
Pre-trained models, baselines and comparison with Detectron and mmdetection
can be found in [MODEL_ZOO.md](MODEL_ZOO.md)
## Inference in a few lines
We provide a helper class to simplify writing inference pipelines using pre-trained models.
Here is how we would do it. Run this from the `demo` folder:
```python
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
config_file = "../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml"
# update the config options with the config file
cfg.merge_from_file(config_file)
# manual override some options
cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
coco_demo = COCODemo(
cfg,
min_image_size=800,
confidence_threshold=0.7,
)
# load image and then run prediction
image = ...
predictions = coco_demo.run_on_opencv_image(image)
```
## Perform training on COCO dataset
For the following examples to work, you need to first install `maskrcnn_benchmark`.
You will also need to download the COCO dataset.
We recommend to symlink the path to the coco dataset to `datasets/` as follows
We use `minival` and `valminusminival` sets from [Detectron](https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/data/README.md#coco-minival-annotations)
```bash
# symlink the coco dataset
cd ~/github/maskrcnn-benchmark
mkdir -p datasets/coco
ln -s /path_to_coco_dataset/annotations datasets/coco/annotations
ln -s /path_to_coco_dataset/train2014 datasets/coco/train2014
ln -s /path_to_coco_dataset/test2014 datasets/coco/test2014
ln -s /path_to_coco_dataset/val2014 datasets/coco/val2014
# or use COCO 2017 version
ln -s /path_to_coco_dataset/annotations datasets/coco/annotations
ln -s /path_to_coco_dataset/train2017 datasets/coco/train2017
ln -s /path_to_coco_dataset/test2017 datasets/coco/test2017
ln -s /path_to_coco_dataset/val2017 datasets/coco/val2017
# for pascal voc dataset:
ln -s /path_to_VOCdevkit_dir datasets/voc
```
P.S. `COCO_2017_train` = `COCO_2014_train` + `valminusminival` , `COCO_2017_val` = `minival`
You can also configure your own paths to the datasets.
For that, all you need to do is to modify `maskrcnn_benchmark/config/paths_catalog.py` to
point to the location where your dataset is stored.
You can also create a new `paths_catalog.py` file which implements the same two classes,
and pass it as a config argument `PATHS_CATALOG` during training.
### Single GPU training
Most of the configuration files that we provide assume that we are running on 8 GPUs.
In order to be able to run it on fewer GPUs, there are a few possibilities:
**1. Run the following without modifications**
```bash
python /path_to_maskrcnn_benchmark/tools/train_net.py --config-file "/path/to/config/file.yaml"
```
This should work out of the box and is very similar to what we should do for multi-GPU training.
But the drawback is that it will use much more GPU memory. The reason is that we set in the
configuration files a global batch size that is divided over the number of GPUs. So if we only
have a single GPU, this means that the batch size for that GPU will be 8x larger, which might lead
to out-of-memory errors.
If you have a lot of memory available, this is the easiest solution.
**2. Modify the cfg parameters**
If you experience out-of-memory errors, you can reduce the global batch size. But this means that
you'll also need to change the learning rate, the number of iterations and the learning rate schedule.
Here is an example for Mask R-CNN R-50 FPN with the 1x schedule:
```bash
python tools/train_net.py --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 SOLVER.MAX_ITER 720000 SOLVER.STEPS "(480000, 640000)" TEST.IMS_PER_BATCH 1
```
This follows the [scheduling rules from Detectron.](https://github.com/facebookresearch/Detectron/blob/master/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml#L14-L30)
Note that we have multiplied the number of iterations by 8x (as well as the learning rate schedules),
and we have divided the learning rate by 8x.
We also changed the batch size during testing, but that is generally not necessary because testing
requires much less memory than training.
### Multi-GPU training
We use internally `torch.distributed.launch` in order to launch
multi-gpu training. This utility function from PyTorch spawns as many
Python processes as the number of GPUs we want to use, and each Python
process will only use a single GPU.
```bash
export NGPUS=8
python -m torch.distributed.launch --nproc_per_node=$NGPUS /path_to_maskrcnn_benchmark/tools/train_net.py --config-file "path/to/config/file.yaml"
```
## Abstractions
For more information on some of the main abstractions in our implementation, see [ABSTRACTIONS.md](ABSTRACTIONS.md).
## Adding your own dataset
This implementation adds support for COCO-style datasets.
But adding support for training on a new dataset can be done as follows:
```python
from maskrcnn_benchmark.structures.bounding_box import BoxList
class MyDataset(object):
def __init__(self, ...):
# as you would do normally
def __getitem__(self, idx):
# load the image as a PIL Image
image = ...
# load the bounding boxes as a list of list of boxes
# in this case, for illustrative purposes, we use
# x1, y1, x2, y2 order.
boxes = [[0, 0, 10, 10], [10, 20, 50, 50]]
# and labels
labels = torch.tensor([10, 20])
# create a BoxList from the boxes
boxlist = BoxList(boxes, image.size, mode="xyxy")
# add the labels to the boxlist
boxlist.add_field("labels", labels)
if self.transforms:
image, boxlist = self.transforms(image, boxlist)
# return the image, the boxlist and the idx in your dataset
return image, boxlist, idx
def get_img_info(self, idx):
# get img_height and img_width. This is used if
# we want to split the batches according to the aspect ratio
# of the image, as it can be more efficient than loading the
# image from disk
return {"height": img_height, "width": img_width}
```
That's it. You can also add extra fields to the boxlist, such as segmentation masks
(using `structures.segmentation_mask.SegmentationMask`), or even your own instance type.
For a full example of how the `COCODataset` is implemented, check [`maskrcnn_benchmark/data/datasets/coco.py`](maskrcnn_benchmark/data/datasets/coco.py).
Once you have created your dataset, it needs to be added in a couple of places:
- [`maskrcnn_benchmark/data/datasets/__init__.py`](maskrcnn_benchmark/data/datasets/__init__.py): add it to `__all__`
- [`maskrcnn_benchmark/config/paths_catalog.py`](maskrcnn_benchmark/config/paths_catalog.py): `DatasetCatalog.DATASETS` and corresponding `if` clause in `DatasetCatalog.get()`
### Testing
While the aforementioned example should work for training, we leverage the
cocoApi for computing the accuracies during testing. Thus, test datasets
should currently follow the cocoApi for now.
To enable your dataset for testing, add a corresponding if statement in [`maskrcnn_benchmark/data/datasets/evaluation/__init__.py`](maskrcnn_benchmark/data/datasets/evaluation/__init__.py):
```python
if isinstance(dataset, datasets.MyDataset):
return coco_evaluation(**args)
```
## Finetuning from Detectron weights on custom datasets
Create a script `tools/trim_detectron_model.py` like [here](https://gist.github.com/wangg12/aea194aa6ab6a4de088f14ee193fd968).
You can decide which keys to be removed and which keys to be kept by modifying the script.
Then you can simply point the converted model path in the config file by changing `MODEL.WEIGHT`.
For further information, please refer to [#15](https://github.com/facebookresearch/maskrcnn-benchmark/issues/15).
## Troubleshooting
If you have issues running or compiling this code, we have compiled a list of common issues in
[TROUBLESHOOTING.md](TROUBLESHOOTING.md). If your issue is not present there, please feel
free to open a new issue.
## Citations
Please consider citing this project in your publications if it helps your research. The following is a BibTeX reference. The BibTeX entry requires the `url` LaTeX package.
```
@misc{massa2018mrcnn,
author = {Massa, Francisco and Girshick, Ross},
title = {{maskrcnn-benchmark: Fast, modular reference implementation of Instance Segmentation and Object Detection algorithms in PyTorch}},
year = {2018},
howpublished = {\url{https://github.com/facebookresearch/maskrcnn-benchmark}},
note = {Accessed: [Insert date here]}
}
```
## Projects using maskrcnn-benchmark
- [RetinaMask: Learning to predict masks improves state-of-the-art single-shot detection for free](https://arxiv.org/abs/1901.03353).
Cheng-Yang Fu, Mykhailo Shvets, and Alexander C. Berg.
Tech report, arXiv,1901.03353.
## License
maskrcnn-benchmark is released under the MIT license. See [LICENSE](LICENSE) for additional details.
================================================
FILE: MODEL_ZOO.md
================================================
## Model Zoo and Baselines
### Hardware
- 8 NVIDIA V100 GPUs
### Software
- PyTorch version: 1.0.0a0+dd2c487
- CUDA 9.2
- CUDNN 7.1
- NCCL 2.2.13-1
### End-to-end Faster and Mask R-CNN baselines
All the baselines were trained using the exact same experimental setup as in Detectron.
We initialize the detection models with ImageNet weights from Caffe2, the same as used by Detectron.
The pre-trained models are available in the link in the model id.
backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | mask AP | model id
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
R-50-C4 | Fast | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.17130 | 34.8 | - | [6358800](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_C4_1x.pth)
R-50-FPN | Fast | 1x | 2 | 4.4 | 0.3530 | 8.8 | 0.12580 | 36.8 | - | [6358793](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_50_FPN_1x.pth)
R-101-FPN | Fast | 1x | 2 | 7.1 | 0.4591 | 11.5 | 0.143149 | 39.1 | - | [6358804](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_R_101_FPN_1x.pth)
X-101-32x8d-FPN | Fast | 1x | 1 | 7.6 | 0.7007 | 35.0 | 0.209965 | 41.2 | - | [6358717](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth)
R-50-C4 | Mask | 1x | 1 | 5.8 | 0.4520 | 22.6 | 0.17796 + 0.028 | 35.6 | 31.5 | [6358801](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_C4_1x.pth)
R-50-FPN | Mask | 1x | 2 | 5.2 | 0.4536 | 11.3 | 0.12966 + 0.034 | 37.8 | 34.2 | [6358792](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_50_FPN_1x.pth)
R-101-FPN | Mask | 1x | 2 | 7.9 | 0.5665 | 14.2 | 0.15384 + 0.034 | 40.1 | 36.1 | [6358805](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_R_101_FPN_1x.pth)
X-101-32x8d-FPN | Mask | 1x | 1 | 7.8 | 0.7562 | 37.8 | 0.21739 + 0.034 | 42.2 | 37.8 | [6358718](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_X_101_32x8d_FPN_1x.pth)
For person keypoint detection:
backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time(hr) | inference time(s/im) | box AP | keypoint AP | model id
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
R-50-FPN | Keypoint | 1x | 2 | 5.7 | 0.3771 | 9.4 | 0.10941 | 53.7 | 64.3 | 9981060
### Light-weight Model baselines
We provided pre-trained models for selected FBNet models.
* All the models are trained from scratched with BN using the training schedule specified below.
* Evaluation is performed on a single NVIDIA V100 GPU with `MODEL.RPN.POST_NMS_TOP_N_TEST` set to `200`.
The following inference time is reported:
* inference total batch=8: Total inference time including data loading, model inference and pre/post preprocessing using 8 images per batch.
* inference model batch=8: Model inference time only and using 8 images per batch.
* inference model batch=1: Model inference time only and using 1 image per batch.
* inferenee caffe2 batch=1: Model inference time for the model in Caffe2 format using 1 image per batch. The Caffe2 models fused the BN to Conv and purely run on C++/CUDA by using Caffe2 ops for rpn/detection post processing.
The pre-trained models are available in the link in the model id.
backbone | type | resolution | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time (hr) | inference total batch=8 (s/im) | inference model batch=8 (s/im) | inference model batch=1 (s/im) | inference caffe2 batch=1 (s/im) | box AP | mask AP | model id
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
[R-50-C4](configs/e2e_faster_rcnn_R_50_C4_1x.yaml) (reference) | Fast | 800 | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.0875 | **0.0793** | 0.0831 | **0.0625** | 34.4 | - | f35857197
[fbnet_chamv1a](configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml) | Fast | 600 | 0.75x | 12 | 13.6 | 0.5444 | 20.5 | 0.0315 | **0.0260** | 0.0376 | **0.0188** | 33.5 | - | [f100940543](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_fbnet_chamv1a_600.pth)
[fbnet_default](configs/e2e_faster_rcnn_fbnet_600.yaml) | Fast | 600 | 0.5x | 16 | 11.1 | 0.4872 | 12.5 | 0.0316 | **0.0250** | 0.0297 | **0.0130** | 28.2 | - | [f101086388](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_fbnet_600.pth)
[R-50-C4](configs/e2e_mask_rcnn_R_50_C4_1x.yaml) (reference) | Mask | 800 | 1x | 1 | 5.8 | 0.452 | 22.6 | 0.0918 | **0.0848** | 0.0844 | - | 35.2 | 31.0 | f35858791
[fbnet_xirb16d](configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml) | Mask | 600 | 0.5x | 16 | 13.4 | 1.1732 | 29 | 0.0386 | **0.0319** | 0.0356 | - | 30.7 | 26.9 | [f101086394](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_fbnet_xirb16d_dsmask.pth)
[fbnet_default](configs/e2e_mask_rcnn_fbnet_600.yaml) | Mask | 600 | 0.5x | 16 | 13.0 | 0.9036 | 23.0 | 0.0327 | **0.0269** | 0.0385 | - | 29.0 | 26.1 | [f101086385](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_fbnet_600.pth)
## Comparison with Detectron and mmdetection
In the following section, we compare our implementation with [Detectron](https://github.com/facebookresearch/Detectron)
and [mmdetection](https://github.com/open-mmlab/mmdetection).
The same remarks from [mmdetection](https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#training-speed)
about different hardware applies here.
### Training speed
The numbers here are in seconds / iteration. The lower, the better.
type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
-- | -- | -- | --
Faster R-CNN R-50 C4 | 0.566 | - | 0.4036
Faster R-CNN R-50 FPN | 0.544 | 0.554 | 0.3530
Faster R-CNN R-101 FPN | 0.647 | - | 0.4591
Faster R-CNN X-101-32x8d FPN | 0.799 | - | 0.7007
Mask R-CNN R-50 C4 | 0.620 | - | 0.4520
Mask R-CNN R-50 FPN | 0.889 | 0.690 | 0.4536
Mask R-CNN R-101 FPN | 1.008 | - | 0.5665
Mask R-CNN X-101-32x8d FPN | 0.961 | - | 0.7562
### Training memory
The lower, the better
type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
-- | -- | -- | --
Faster R-CNN R-50 C4 | 6.3 | - | 5.8
Faster R-CNN R-50 FPN | 7.2 | 4.9 | 4.4
Faster R-CNN R-101 FPN | 8.9 | - | 7.1
Faster R-CNN X-101-32x8d FPN | 7.0 | - | 7.6
Mask R-CNN R-50 C4 | 6.6 | - | 5.8
Mask R-CNN R-50 FPN | 8.6 | 5.9 | 5.2
Mask R-CNN R-101 FPN | 10.2 | - | 7.9
Mask R-CNN X-101-32x8d FPN | 7.7 | - | 7.8
### Accuracy
The higher, the better
type | Detectron (P100) | mmdetection (V100) | maskrcnn_benchmark (V100)
-- | -- | -- | --
Faster R-CNN R-50 C4 | 34.8 | - | 34.8
Faster R-CNN R-50 FPN | 36.7 | 36.7 | 36.8
Faster R-CNN R-101 FPN | 39.4 | - | 39.1
Faster R-CNN X-101-32x8d FPN | 41.3 | - | 41.2
Mask R-CNN R-50 C4 | 35.8 & 31.4 | - | 35.6 & 31.5
Mask R-CNN R-50 FPN | 37.7 & 33.9 | 37.5 & 34.4 | 37.8 & 34.2
Mask R-CNN R-101 FPN | 40.0 & 35.9 | - | 40.1 & 36.1
Mask R-CNN X-101-32x8d FPN | 42.1 & 37.3 | - | 42.2 & 37.8
================================================
FILE: README.md
================================================
# FCOS_PLUS
This project contains some improvements about FCOS (Fully Convolutional One-Stage Object Detection).
## Installation
Please check [INSTALL.md](INSTALL.md) (same as original FCOS) for installation instructions.
**Results**
Model | Total training mem (GB) | Multi-scale training | Testing time / im | AP (minival) | link
--- |:---:|:---:|:---:|:---:|:---:|
FCOS_R_50_FPN_1x | 29.3 | No | 71ms | 37.0 | [model](https://pan.baidu.com/s/1Xcbx7EfOGvwnexXAuovM0A) |
FCOS_R_50_FPN_1x_center | 30.61 | No | 71ms | 37.8 | [model](https://pan.baidu.com/s/1Gs7AzmJRmeYhXUPDQZuSLA) |
FCOS_R_50_FPN_1x_center_liou | 30.61 | No | 71ms | 38.1 | [model](https://pan.baidu.com/s/1HpYrkAsVXNvXRFTd06SGgA) |
FCOS_R_50_FPN_1x_center_giou | 30.61 | No | 71ms | 38.2 | [model](https://pan.baidu.com/s/13_o6343Ikg4td01kVXxGSw) |
FCOS_R_101_FPN_2x | 44.1 | Yes | 74ms | 41.4 | [model](https://pan.baidu.com/s/1u_5OD5NURYe1EYFWnohgEA) |
FCOS_R_101_FPN_2x_center_giou | 44.1 | Yes | 74ms | 42.5 | [model](https://pan.baidu.com/s/1qhHM067ywwlEXfamaFq23g) |
[1] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \
[2] center means [center sample](fcos.pdf) is used in our training. \
[3] liou means the model use linear iou loss function. (1 - iou) \
[4] giou means the use giou loss function. (1 - giou)
## Training
The following command line will train FCOS_R_50_FPN_1x on 8 GPUs with Synchronous Stochastic Gradient Descent (SGD):
python -m torch.distributed.launch \
--nproc_per_node=8 \
--master_port=$((RANDOM + 10000)) \
tools/train_net.py \
--skip-test \
--config-file configs/fcos/fcos_R_50_FPN_1x_center_giou.yaml \
DATALOADER.NUM_WORKERS 2 \
OUTPUT_DIR training_dir/fcos_R_50_FPN_1x_center_giou
Note that:
1) If you want to use fewer GPUs, please change `--nproc_per_node` to the number of GPUs. No other settings need to be changed. The total batch size does not depends on `nproc_per_node`. If you want to change the total batch size, please change `SOLVER.IMS_PER_BATCH` in [configs/fcos/fcos_R_50_FPN_1x_center_giou.yaml](configs/fcos/fcos_R_50_FPN_1x_center_giou.yaml).
2) The models will be saved into `OUTPUT_DIR`.
3) If you want to train FCOS with other backbones, please change `--config-file`.
## Citations
Please consider citing original paper in your publications if the project helps your research.
```
@article{tian2019fcos,
title = {{FCOS}: Fully Convolutional One-Stage Object Detection},
author = {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
journal = {arXiv preprint arXiv:1904.01355},
year = {2019}
}
```
## License
For academic use, this project is licensed under the 2-clause BSD License - see the LICENSE file for details. For commercial use, please contact the authors.
================================================
FILE: TROUBLESHOOTING.md
================================================
# Troubleshooting
Here is a compilation if common issues that you might face
while compiling / running this code:
## Compilation errors when compiling the library
If you encounter build errors like the following:
```
/usr/include/c++/6/type_traits:1558:8: note: provided for ‘template<class _From, class _To> struct std::is_convertible’
struct is_convertible
^~~~~~~~~~~~~~
/usr/include/c++/6/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>&&; bool <anonymous> = true; _Elements = {at::Tensor, at::Tensor, at::Tensor, at::Tensor}]’ not a return-statement
}
^
error: command '/usr/local/cuda/bin/nvcc' failed with exit status 1
```
check your CUDA version and your `gcc` version.
```
nvcc --version
gcc --version
```
If you are using CUDA 9.0 and gcc 6.4.0, then refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/25,
which has a summary of the solution. Basically, CUDA 9.0 is not compatible with gcc 6.4.0.
## ImportError: No module named maskrcnn_benchmark.config when running webcam.py
This means that `maskrcnn-benchmark` has not been properly installed.
Refer to https://github.com/facebookresearch/maskrcnn-benchmark/issues/22 for a few possible issues.
Note that we now support Python 2 as well.
## ImportError: Undefined symbol: __cudaPopCallConfiguration error when import _C
This probably means that the inconsistent version of NVCC compile and your conda CUDAToolKit package. This is firstly mentioned in https://github.com/facebookresearch/maskrcnn-benchmark/issues/45 . All you need to do is:
```
# Check the NVCC compile version(e.g.)
/usr/cuda-9.2/bin/nvcc --version
# Check the CUDAToolKit version(e.g.)
~/anaconda3/bin/conda list | grep cuda
# If you need to update your CUDAToolKit
~/anaconda3/bin/conda install -c anaconda cudatoolkit==9.2
```
Both of them should have the **same** version. For example, if NVCC==9.2 and CUDAToolKit==9.2, this will be fine while when NVCC==9.2 but CUDAToolKit==9, it fails.
## Segmentation fault (core dumped) when running the library
This probably means that you have compiled the library using GCC < 4.9, which is ABI incompatible with PyTorch.
Indeed, during installation, you probably saw a message like
```
Your compiler (g++ 4.8) may be ABI-incompatible with PyTorch!
Please use a compiler that is ABI-compatible with GCC 4.9 and above.
See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.
See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
for instructions on how to install GCC 4.9 or higher.
```
Follow the instructions on https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
to install GCC 4.9 or higher, and try recompiling `maskrcnn-benchmark` again, after cleaning the
`build` folder with
```
rm -rf build
```
================================================
FILE: configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
DATASETS:
TEST: ("coco_2014_minival",)
================================================
FILE: configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 2
ROI_KEYPOINT_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
PREDICTOR: "KeypointRCNNPredictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 56
SHARE_BOX_FEATURE_EXTRACTOR: False
KEYPOINT_ON: True
DATASETS:
TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
TEST: ("keypoints_coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
ROI_MASK_HEAD:
PREDICTOR: "MaskRCNNC4Predictor"
SHARE_BOX_FEATURE_EXTRACTOR: True
MASK_ON: True
DATASETS:
TEST: ("coco_2014_minival",)
================================================
FILE: configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x"
BACKBONE:
CONV_BODY: "R-152-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
================================================
FILE: configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 9
DATASETS:
TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (18000,)
MAX_ITER: 24000
================================================
FILE: configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 9
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (18000,)
MAX_ITER: 24000
================================================
FILE: configs/e2e_faster_rcnn_R_101_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/e2e_faster_rcnn_R_50_C4_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 1000
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/e2e_faster_rcnn_R_50_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
BACKBONE:
CONV_BODY: "R-101-FPN"
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/e2e_faster_rcnn_fbnet.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "default"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
RPN:
ANCHOR_SIZES: (16, 32, 64, 128, 256)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 100
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 512
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (320, )
MAX_SIZE_TRAIN: 640
MIN_SIZE_TEST: 320
MAX_SIZE_TEST: 640
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_faster_rcnn_fbnet_600.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "default"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
RPN:
ANCHOR_SIZES: (32, 64, 128, 256, 512)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 200
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (600, )
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 600
MAX_SIZE_TEST: 1000
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "cham_v1a"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
RPN:
ANCHOR_SIZES: (32, 64, 128, 256, 512)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 200
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 128
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.045
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (90000, 120000)
MAX_ITER: 135000
IMS_PER_BATCH: 96 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (600, )
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 600
MAX_SIZE_TEST: 1000
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 2
ROI_KEYPOINT_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
PREDICTOR: "KeypointRCNNPredictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 56
SHARE_BOX_FEATURE_EXTRACTOR: False
KEYPOINT_ON: True
DATASETS:
TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
TEST: ("keypoints_coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/e2e_mask_rcnn_R_101_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/e2e_mask_rcnn_R_50_C4_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 1000
ROI_MASK_HEAD:
PREDICTOR: "MaskRCNNC4Predictor"
SHARE_BOX_FEATURE_EXTRACTOR: True
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/e2e_mask_rcnn_fbnet.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "default"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
DET_HEAD_LAST_SCALE: 0.0
RPN:
ANCHOR_SIZES: (16, 32, 64, 128, 256)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 100
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
ROI_MASK_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head_mask
PREDICTOR: "MaskRCNNConv1x1Predictor"
RESOLUTION: 12
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (320, )
MAX_SIZE_TRAIN: 640
MIN_SIZE_TEST: 320
MAX_SIZE_TEST: 640
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_mask_rcnn_fbnet_600.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "default"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
DET_HEAD_LAST_SCALE: 0.0
RPN:
ANCHOR_SIZES: (32, 64, 128, 256, 512)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 200
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
ROI_MASK_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head_mask
PREDICTOR: "MaskRCNNConv1x1Predictor"
RESOLUTION: 12
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (600, )
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 600
MAX_SIZE_TEST: 1000
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "xirb16d_dsmask"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
DET_HEAD_LAST_SCALE: -1.0
RPN:
ANCHOR_SIZES: (16, 32, 64, 128, 256)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 100
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 512
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
ROI_MASK_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head_mask
PREDICTOR: "MaskRCNNConv1x1Predictor"
RESOLUTION: 12
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (320, )
MAX_SIZE_TRAIN: 640
MIN_SIZE_TEST: 320
MAX_SIZE_TEST: 640
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
BACKBONE:
CONV_BODY: FBNet
FBNET:
ARCH: "xirb16d_dsmask"
BN_TYPE: "bn"
WIDTH_DIVISOR: 8
DW_CONV_SKIP_BN: True
DW_CONV_SKIP_RELU: True
DET_HEAD_LAST_SCALE: 0.0
RPN:
ANCHOR_SIZES: (32, 64, 128, 256, 512)
ANCHOR_STRIDE: (16, )
BATCH_SIZE_PER_IMAGE: 256
PRE_NMS_TOP_N_TRAIN: 6000
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TRAIN: 2000
POST_NMS_TOP_N_TEST: 200
RPN_HEAD: FBNet.rpn_head
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head
NUM_CLASSES: 81
ROI_MASK_HEAD:
POOLER_RESOLUTION: 6
FEATURE_EXTRACTOR: FBNet.roi_head_mask
PREDICTOR: "MaskRCNNConv1x1Predictor"
RESOLUTION: 12
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.06
WARMUP_FACTOR: 0.1
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 128 # for 8GPUs
# TEST:
# IMS_PER_BATCH: 8
INPUT:
MIN_SIZE_TRAIN: (600, )
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 600
MAX_SIZE_TEST: 1000
PIXEL_MEAN: [103.53, 116.28, 123.675]
PIXEL_STD: [57.375, 57.12, 58.395]
================================================
FILE: configs/fcos/fcos_R_101_FPN_2x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_RANGE_TRAIN: (640, 800)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_R_50_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "pretrain_models/R-50.pkl"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
FCOS:
CENTER_SAMPLE: False
DATASETS:
TRAIN: ("coco_2017_train", )
TEST: ("coco_2017_val", )
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_R_50_FPN_1x_center.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "pretrain_models/R-50.pkl"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
FCOS:
CENTER_SAMPLE: True
POS_RADIUS: 1.5
DATASETS:
TRAIN: ("coco_2017_train", )
TEST: ("coco_2017_val", )
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_R_50_FPN_1x_center_giou.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "pretrain_models/R-50.pkl"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
FCOS:
CENTER_SAMPLE: True
POS_RADIUS: 1.5
LOC_LOSS_TYPE: "giou"
DATASETS:
TRAIN: ("coco_2017_train", )
TEST: ("coco_2017_val", )
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_X_101_32x8d_FPN_2x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
STRIDE_IN_1X1: False
BACKBONE_OUT_CHANNELS: 256
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_RANGE_TRAIN: (640, 800)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_X_101_64x4d_FPN_2x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-64x4d"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
STRIDE_IN_1X1: False
BACKBONE_OUT_CHANNELS: 256
NUM_GROUPS: 64
WIDTH_PER_GROUP: 4
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_RANGE_TRAIN: (640, 800)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_bn_bs16_MNV2_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "MNV2-FPN-RETINANET"
FREEZE_CONV_BODY_AT: 0
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
USE_SYNCBN: False
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_syncbn_bs32_MNV2_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "MNV2-FPN-RETINANET"
FREEZE_CONV_BODY_AT: 0
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
USE_SYNCBN: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 32
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "MNV2-FPN-RETINANET"
FREEZE_CONV_BODY_AT: 0
RESNETS:
BACKBONE_OUT_CHANNELS: 128
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
USE_SYNCBN: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 32
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "MNV2-FPN-RETINANET"
FREEZE_CONV_BODY_AT: 0
RESNETS:
BACKBONE_OUT_CHANNELS: 128
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
USE_SYNCBN: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_RANGE_TRAIN: (640, 800)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 32
WARMUP_METHOD: "constant"
================================================
FILE: configs/fcos/fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth"
RPN_ONLY: True
FCOS_ON: True
BACKBONE:
CONV_BODY: "MNV2-FPN-RETINANET"
FREEZE_CONV_BODY_AT: 0
RESNETS:
BACKBONE_OUT_CHANNELS: 128
RETINANET:
USE_C5: False # FCOS uses P5 instead of C5
USE_SYNCBN: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_RANGE_TRAIN: (640, 800)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 64
WARMUP_METHOD: "constant"
================================================
FILE: configs/gn_baselines/README.md
================================================
### Group Normalization
1 [Group Normalization](https://arxiv.org/abs/1803.08494)
2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883)
3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md)
### Performance
| case | Type | lr schd | im/gpu | bbox AP | mask AP |
|----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:|
| R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 |
| R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 |
| R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 |
| R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 |
================================================
FILE: configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
CONV_HEAD_DIM: 256
NUM_STACKED_CONVS: 4
FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
USE_GN: True # use GN for mask head
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
CONV_LAYERS: (256, 256, 256, 256)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
CONV_HEAD_DIM: 256
NUM_STACKED_CONVS: 4
FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
USE_GN: True # use GN for mask head
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
CONV_LAYERS: (256, 256, 256, 256)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "" # no pretrained model
BACKBONE:
CONV_BODY: "R-50-FPN"
FREEZE_CONV_BODY_AT: 0 # finetune all layers
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (210000, 250000)
MAX_ITER: 270000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "" # no pretrained model
BACKBONE:
CONV_BODY: "R-50-FPN"
FREEZE_CONV_BODY_AT: 0 # finetune all layers
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
CONV_HEAD_DIM: 256
NUM_STACKED_CONVS: 4
FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (210000, 250000)
MAX_ITER: 270000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "" # no pretrained model
BACKBONE:
CONV_BODY: "R-50-FPN"
FREEZE_CONV_BODY_AT: 0 # finetune all layers
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
USE_GN: True # use GN for mask head
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
CONV_LAYERS: (256, 256, 256, 256)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (210000, 250000)
MAX_ITER: 270000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
================================================
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "" # no pretrained model
BACKBONE:
CONV_BODY: "R-50-FPN"
FREEZE_CONV_BODY_AT: 0 # finetune all layers
RESNETS: # use GN for backbone
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
TRANS_FUNC: "BottleneckWithGN"
STEM_FUNC: "StemWithGN"
FPN:
USE_GN: True # use GN for FPN
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 512
POSITIVE_FRACTION: 0.25
ROI_BOX_HEAD:
USE_GN: True # use GN for bbox head
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
CONV_HEAD_DIM: 256
NUM_STACKED_CONVS: 4
FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
USE_GN: True # use GN for mask head
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
CONV_LAYERS: (256, 256, 256, 256)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 8 gpus
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (210000, 250000)
MAX_ITER: 270000
IMS_PER_BATCH: 16
TEST:
IMS_PER_BATCH: 8
================================================
FILE: configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 300
ANCHOR_SIZES: (128, 256, 512)
ROI_BOX_HEAD:
NUM_CLASSES: 21
DATASETS:
TRAIN: ("voc_2007_train", "voc_2007_val")
TEST: ("voc_2007_test",)
SOLVER:
BASE_LR: 0.001
WEIGHT_DECAY: 0.0001
STEPS: (50000, )
MAX_ITER: 70000
IMS_PER_BATCH: 1
TEST:
IMS_PER_BATCH: 1
================================================
FILE: configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 300
ANCHOR_SIZES: (128, 256, 512)
ROI_BOX_HEAD:
NUM_CLASSES: 21
DATASETS:
TRAIN: ("voc_2007_train", "voc_2007_val")
TEST: ("voc_2007_test",)
SOLVER:
BASE_LR: 0.004
WEIGHT_DECAY: 0.0001
STEPS: (12500, )
MAX_ITER: 17500
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 4
================================================
FILE: configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 21
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("voc_2012_train_cocostyle",)
TEST: ("voc_2012_val_cocostyle",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
STEPS: (18000,)
MAX_ITER: 24000
================================================
FILE: configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 2
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 2
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
NUM_CLASSES: 2
ROI_KEYPOINT_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
PREDICTOR: "KeypointRCNNPredictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 56
SHARE_BOX_FEATURE_EXTRACTOR: False
KEYPOINT_ON: True
DATASETS:
TRAIN: ("keypoints_coco_2014_minival",)
TEST: ("keypoints_coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN:
PRE_NMS_TOP_N_TEST: 6000
POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 256
ROI_MASK_HEAD:
PREDICTOR: "MaskRCNNC4Predictor"
SHARE_BOX_FEATURE_EXTRACTOR: True
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: True
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 2
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/rpn_R_50_C4_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
RPN:
PRE_NMS_TOP_N_TEST: 12000
POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/quick_schedules/rpn_R_50_FPN_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 2000
FPN_POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (1500,)
MAX_ITER: 2000
IMS_PER_BATCH: 4
TEST:
IMS_PER_BATCH: 2
================================================
FILE: configs/retinanet/retinanet_R-101-FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800, )
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 4 gpus
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
USE_C5: False
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800, )
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 4 gpus
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/retinanet/retinanet_R-50-FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 4 gpus
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_minival",)
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (600,)
MAX_SIZE_TRAIN: 1000
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1000
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (3500,)
MAX_ITER: 4000
IMS_PER_BATCH: 4
================================================
FILE: configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-50-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
USE_C5: False
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800,)
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 4 gpus
BASE_LR: 0.005
WEIGHT_DECAY: 0.0001
STEPS: (120000, 160000)
MAX_ITER: 180000
IMS_PER_BATCH: 8
================================================
FILE: configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
RPN_ONLY: True
RETINANET_ON: True
BACKBONE:
CONV_BODY: "R-101-FPN-RETINANET"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TRAIN: 2000
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
BATCH_SIZE_PER_IMAGE: 256
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
RETINANET:
SCALES_PER_OCTAVE: 3
STRADDLE_THRESH: -1
FG_IOU_THRESHOLD: 0.5
BG_IOU_THRESHOLD: 0.4
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
INPUT:
MIN_SIZE_TRAIN: (800, )
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
# Assume 4 gpus
BASE_LR: 0.0025
WEIGHT_DECAY: 0.0001
STEPS: (240000, 320000)
MAX_ITER: 360000
IMS_PER_BATCH: 4
================================================
FILE: configs/rpn_R_101_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
RPN_ONLY: True
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 2000
FPN_POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/rpn_R_50_C4_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
RPN:
PRE_NMS_TOP_N_TEST: 12000
POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/rpn_R_50_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
RPN_ONLY: True
BACKBONE:
CONV_BODY: "R-50-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 2000
FPN_POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: configs/rpn_X_101_32x8d_FPN_1x.yaml
================================================
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
RPN_ONLY: True
BACKBONE:
CONV_BODY: "R-101-FPN"
RESNETS:
BACKBONE_OUT_CHANNELS: 256
STRIDE_IN_1X1: False
NUM_GROUPS: 32
WIDTH_PER_GROUP: 8
RPN:
USE_FPN: True
ANCHOR_STRIDE: (4, 8, 16, 32, 64)
PRE_NMS_TOP_N_TEST: 1000
POST_NMS_TOP_N_TEST: 2000
FPN_POST_NMS_TOP_N_TEST: 2000
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival",)
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
STEPS: (60000, 80000)
MAX_ITER: 90000
================================================
FILE: demo/README.md
================================================
## Webcam and Jupyter notebook demo
This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
### With your preferred environment
You can start it by running it from this folder, using one of the following commands:
```bash
# by default, it runs on the GPU
# for best results, use min-image-size 800
python webcam.py --min-image-size 800
# can also run it on the CPU
python webcam.py --min-image-size 300 MODEL.DEVICE cpu
# or change the model that you want to use
python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
# in order to see the probability heatmaps, pass --show-mask-heatmaps
python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
```
### With Docker
Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
Adjust permissions of the X server host (be careful with this step, refer to
[here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
```bash
xhost +
```
Then run a container with the demo:
```
docker run --rm -it \
-e DISPLAY=${DISPLAY} \
--privileged \
-v /tmp/.X11-unix:/tmp/.X11-unix \
--device=/dev/video0:/dev/video0 \
--ipc=host maskrcnn-benchmark \
python demo/webcam.py --min-image-size 300 \
--config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
```
**DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine,
the volume mapping may vary depending on your platform*
================================================
FILE: demo/fcos_demo.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import cv2, os
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
import time
def main():
parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
parser.add_argument(
"--config-file",
default="configs/fcos/fcos_R_50_FPN_1x.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--weights",
default="FCOS_R_50_FPN_1x.pth",
metavar="FILE",
help="path to the trained model",
)
parser.add_argument(
"--images-dir",
default="demo/images",
metavar="DIR",
help="path to demo images directory",
)
parser.add_argument(
"--min-image-size",
type=int,
default=800,
help="Smallest size of the image to feed to the model. "
"Model was trained with 800, which gives best results",
)
parser.add_argument(
"opts",
help="Modify model config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
# load config from file and command-line arguments
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.MODEL.WEIGHT = args.weights
cfg.freeze()
# The following per-class thresholds are computed by maximizing
# per-class f-measure in their precision-recall curve.
# Please see compute_thresholds_for_classes() in coco_eval.py for details.
thresholds_for_classes = [
0.23860901594161987, 0.24108672142028809, 0.2470853328704834,
0.2316885143518448, 0.2708061933517456, 0.23173952102661133,
0.31990334391593933, 0.21302376687526703, 0.20151866972446442,
0.20928964018821716, 0.3793887197971344, 0.2715213894844055,
0.2836397588253021, 0.26449233293533325, 0.1728038638830185,
0.314998596906662, 0.28575003147125244, 0.28987520933151245,
0.2727000117301941, 0.23306897282600403, 0.265937477350235,
0.32663893699645996, 0.27102580666542053, 0.29177549481391907,
0.2043062448501587, 0.24331751465797424, 0.20752687752246857,
0.22951272130012512, 0.22753854095935822, 0.2159966081380844,
0.1993938684463501, 0.23676514625549316, 0.20982342958450317,
0.18315598368644714, 0.2489681988954544, 0.24793922901153564,
0.287187397480011, 0.23045086860656738, 0.2462811917066574,
0.21191294491291046, 0.22845126688480377, 0.24365000426769257,
0.22687821090221405, 0.18365581333637238, 0.2035856395959854,
0.23478077352046967, 0.18431290984153748, 0.18184082210063934,
0.2708037495613098, 0.2268175482749939, 0.19970566034317017,
0.21832780539989471, 0.21120598912239075, 0.270445853471756,
0.189377561211586, 0.2101106345653534, 0.2112293541431427,
0.23484709858894348, 0.22701986134052277, 0.20732736587524414,
0.1953316181898117, 0.3237660229206085, 0.3078872859477997,
0.2881140112876892, 0.38746657967567444, 0.20038367807865143,
0.28123822808265686, 0.2588447630405426, 0.2796839773654938,
0.266757994890213, 0.3266656696796417, 0.25759157538414,
0.2578003704547882, 0.17009201645851135, 0.29051828384399414,
0.24002137780189514, 0.22378061711788177, 0.26134759187698364,
0.1730124056339264, 0.1857597529888153
]
demo_im_names = os.listdir(args.images_dir)
# prepare object that handles inference plus adds predictions on top of image
coco_demo = COCODemo(
cfg,
confidence_thresholds_for_classes=thresholds_for_classes,
min_image_size=args.min_image_size
)
for im_name in demo_im_names:
img = cv2.imread(os.path.join(args.images_dir, im_name))
if img is None:
continue
start_time = time.time()
composite = coco_demo.run_on_opencv_image(img)
print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time))
cv2.imshow(im_name, composite)
print("Press any keys to exit ...")
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
================================================
FILE: demo/predictor.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import cv2
import torch
from torchvision import transforms as T
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.structures.image_list import to_image_list
from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
from maskrcnn_benchmark import layers as L
from maskrcnn_benchmark.utils import cv2_util
class COCODemo(object):
# COCO categories for pretty print
CATEGORIES = [
"__background",
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
]
def __init__(
self,
cfg,
confidence_thresholds_for_classes,
show_mask_heatmaps=False,
masks_per_dim=2,
min_image_size=224,
):
self.cfg = cfg.clone()
self.model = build_detection_model(cfg)
self.model.eval()
self.device = torch.device(cfg.MODEL.DEVICE)
self.model.to(self.device)
self.min_image_size = min_image_size
save_dir = cfg.OUTPUT_DIR
checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)
_ = checkpointer.load(cfg.MODEL.WEIGHT)
self.transforms = self.build_transform()
mask_threshold = -1 if show_mask_heatmaps else 0.5
self.masker = Masker(threshold=mask_threshold, padding=1)
# used to make colors for each class
self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
self.cpu_device = torch.device("cpu")
self.confidence_thresholds_for_classes = torch.tensor(confidence_thresholds_for_classes)
self.show_mask_heatmaps = show_mask_heatmaps
self.masks_per_dim = masks_per_dim
def build_transform(self):
"""
Creates a basic transformation that was used to train the models
"""
cfg = self.cfg
# we are loading images with OpenCV, so we don't need to convert them
# to BGR, they are already! So all we need to do is to normalize
# by 255 if we want to convert to BGR255 format, or flip the channels
# if we want it to be in RGB in [0-1] range.
if cfg.INPUT.TO_BGR255:
to_bgr_transform = T.Lambda(lambda x: x * 255)
else:
to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])
normalize_transform = T.Normalize(
mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
)
transform = T.Compose(
[
T.ToPILImage(),
T.Resize(self.min_image_size),
T.ToTensor(),
to_bgr_transform,
normalize_transform,
]
)
return transform
def run_on_opencv_image(self, image):
"""
Arguments:
image (np.ndarray): an image as returned by OpenCV
Returns:
prediction (BoxList): the detected objects. Additional information
of the detection properties can be found in the fields of
the BoxList via `prediction.fields()`
"""
predictions = self.compute_prediction(image)
top_predictions = self.select_top_predictions(predictions)
result = image.copy()
if self.show_mask_heatmaps:
return self.create_mask_montage(result, top_predictions)
result = self.overlay_boxes(result, top_predictions)
if self.cfg.MODEL.MASK_ON:
result = self.overlay_mask(result, top_predictions)
if self.cfg.MODEL.KEYPOINT_ON:
result = self.overlay_keypoints(result, top_predictions)
result = self.overlay_class_names(result, top_predictions)
return result
def compute_prediction(self, original_image):
"""
Arguments:
original_image (np.ndarray): an image as returned by OpenCV
Returns:
prediction (BoxList): the detected objects. Additional information
of the detection properties can be found in the fields of
the BoxList via `prediction.fields()`
"""
# apply pre-processing to image
image = self.transforms(original_image)
# convert to an ImageList, padded so that it is divisible by
# cfg.DATALOADER.SIZE_DIVISIBILITY
image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY)
image_list = image_list.to(self.device)
# compute predictions
with torch.no_grad():
predictions = self.model(image_list)
predictions = [o.to(self.cpu_device) for o in predictions]
# always single image is passed at a time
prediction = predictions[0]
# reshape prediction (a BoxList) into the original image size
height, width = original_image.shape[:-1]
prediction = prediction.resize((width, height))
if prediction.has_field("mask"):
# if we have masks, paste the masks in the right position
# in the image, as defined by the bounding boxes
masks = prediction.get_field("mask")
# always single image is passed at a time
masks = self.masker([masks], [prediction])[0]
prediction.add_field("mask", masks)
return prediction
def select_top_predictions(self, predictions):
"""
Select only predictions which have a `score` > self.confidence_threshold,
and returns the predictions in descending order of score
Arguments:
predictions (BoxList): the result of the computation by the model.
It should contain the field `scores`.
Returns:
prediction (BoxList): the detected objects. Additional information
of the detection properties can be found in the fields of
the BoxList via `prediction.fields()`
"""
scores = predictions.get_field("scores")
labels = predictions.get_field("labels")
thresholds = self.confidence_thresholds_for_classes[(labels - 1).long()]
keep = torch.nonzero(scores > thresholds).squeeze(1)
predictions = predictions[keep]
scores = predictions.get_field("scores")
_, idx = scores.sort(0, descending=True)
return predictions[idx]
def compute_colors_for_labels(self, labels):
"""
Simple function that adds fixed colors depending on the class
"""
colors = labels[:, None] * self.palette
colors = (colors % 255).numpy().astype("uint8")
return colors
def overlay_boxes(self, image, predictions):
"""
Adds the predicted boxes on top of the image
Arguments:
image (np.ndarray): an image as returned by OpenCV
predictions (BoxList): the result of the computation by the model.
It should contain the field `labels`.
"""
labels = predictions.get_field("labels")
boxes = predictions.bbox
colors = self.compute_colors_for_labels(labels).tolist()
for box, color in zip(boxes, colors):
box = box.to(torch.int64)
top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
image = cv2.rectangle(
image, tuple(top_left), tuple(bottom_right), tuple(color), 2
)
return image
def overlay_mask(self, image, predictions):
"""
Adds the instances contours for each predicted object.
Each label has a different color.
Arguments:
image (np.ndarray): an image as returned by OpenCV
predictions (BoxList): the result of the computation by the model.
It should contain the field `mask` and `labels`.
"""
masks = predictions.get_field("mask").numpy()
labels = predictions.get_field("labels")
colors = self.compute_colors_for_labels(labels).tolist()
for mask, color in zip(masks, colors):
thresh = mask[0, :, :, None]
contours, hierarchy = cv2_util.findContours(
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
image = cv2.drawContours(image, contours, -1, color, 3)
composite = image
return composite
def overlay_keypoints(self, image, predictions):
keypoints = predictions.get_field("keypoints")
kps = keypoints.keypoints
scores = keypoints.get_field("logits")
kps = torch.cat((kps[:, :, 0:2], scores[:, :, None]), dim=2).numpy()
for region in kps:
image = vis_keypoints(image, region.transpose((1, 0)))
return image
def create_mask_montage(self, image, predictions):
"""
Create a montage showing the probability heatmaps for each one one of the
detected objects
Arguments:
image (np.ndarray): an image as returned by OpenCV
predictions (BoxList): the result of the computation by the model.
It should contain the field `mask`.
"""
masks = predictions.get_field("mask")
masks_per_dim = self.masks_per_dim
masks = L.interpolate(
masks.float(), scale_factor=1 / masks_per_dim
).byte()
height, width = masks.shape[-2:]
max_masks = masks_per_dim ** 2
masks = masks[:max_masks]
# handle case where we have less detections than max_masks
if len(masks) < max_masks:
masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8)
masks_padded[: len(masks)] = masks
masks = masks_padded
masks = masks.reshape(masks_per_dim, masks_per_dim, height, width)
result = torch.zeros(
(masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8
)
for y in range(masks_per_dim):
start_y = y * height
end_y = (y + 1) * height
for x in range(masks_per_dim):
start_x = x * width
end_x = (x + 1) * width
result[start_y:end_y, start_x:end_x] = masks[y, x]
return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET)
def overlay_class_names(self, image, predictions):
"""
Adds detected class names and scores in the positions defined by the
top-left corner of the predicted bounding box
Arguments:
image (np.ndarray): an image as returned by OpenCV
predictions (BoxList): the result of the computation by the model.
It should contain the field `scores` and `labels`.
"""
scores = predictions.get_field("scores").tolist()
labels = predictions.get_field("labels").tolist()
labels = [self.CATEGORIES[i] for i in labels]
boxes = predictions.bbox
template = "{}: {:.2f}"
for box, score, label in zip(boxes, scores, labels):
x, y = box[:2]
s = template.format(label, score)
cv2.putText(
image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
)
return image
import numpy as np
import matplotlib.pyplot as plt
from maskrcnn_benchmark.structures.keypoint import PersonKeypoints
def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
"""Visualizes keypoints (adapted from vis_one_image).
kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
"""
dataset_keypoints = PersonKeypoints.NAMES
kp_lines = PersonKeypoints.CONNECTIONS
# Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
cmap = plt.get_cmap('rainbow')
colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
# Perform the drawing on a copy of the image, to allow for blending.
kp_mask = np.copy(img)
# Draw mid shoulder / mid hip first for better visualization.
mid_shoulder = (
kps[:2, dataset_keypoints.index('right_shoulder')] +
kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
sc_mid_shoulder = np.minimum(
kps[2, dataset_keypoints.index('right_shoulder')],
kps[2, dataset_keypoints.index('left_shoulder')])
mid_hip = (
kps[:2, dataset_keypoints.index('right_hip')] +
kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
sc_mid_hip = np.minimum(
kps[2, dataset_keypoints.index('right_hip')],
kps[2, dataset_keypoints.index('left_hip')])
nose_idx = dataset_keypoints.index('nose')
if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
cv2.line(
kp_mask, tuple(mid_shoulder), tuple(mid_hip),
color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
# Draw the keypoints.
for l in range(len(kp_lines)):
i1 = kp_lines[l][0]
i2 = kp_lines[l][1]
p1 = kps[0, i1], kps[1, i1]
p2 = kps[0, i2], kps[1, i2]
if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
cv2.line(
kp_mask, p1, p2,
color=colors[l], thickness=2, lineType=cv2.LINE_AA)
if kps[2, i1] > kp_thresh:
cv2.circle(
kp_mask, p1,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
if kps[2, i2] > kp_thresh:
cv2.circle(
kp_mask, p2,
radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
# Blend the keypoints.
return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
================================================
FILE: demo/webcam.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import cv2
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
import time
def main():
parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
parser.add_argument(
"--config-file",
default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.7,
help="Minimum score for the prediction to be shown",
)
parser.add_argument(
"--min-image-size",
type=int,
default=224,
help="Smallest size of the image to feed to the model. "
"Model was trained with 800, which gives best results",
)
parser.add_argument(
"--show-mask-heatmaps",
dest="show_mask_heatmaps",
help="Show a heatmap probability for the top masks-per-dim masks",
action="store_true",
)
parser.add_argument(
"--masks-per-dim",
type=int,
default=2,
help="Number of heatmaps per dimension to show",
)
parser.add_argument(
"opts",
help="Modify model config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
# load config from file and command-line arguments
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
# prepare object that handles inference plus adds predictions on top of image
coco_demo = COCODemo(
cfg,
confidence_threshold=args.confidence_threshold,
show_mask_heatmaps=args.show_mask_heatmaps,
masks_per_dim=args.masks_per_dim,
min_image_size=args.min_image_size,
)
cam = cv2.VideoCapture(0)
while True:
start_time = time.time()
ret_val, img = cam.read()
composite = coco_demo.run_on_opencv_image(img)
print("Time: {:.2f} s / img".format(time.time() - start_time))
cv2.imshow("COCO detections", composite)
if cv2.waitKey(1) == 27:
break # esc to quit
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
================================================
FILE: docker/Dockerfile
================================================
ARG CUDA="9.0"
ARG CUDNN="7"
FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
# install basics
RUN apt-get update -y \
&& apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++ \
&& apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev
# Install Miniconda
RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& chmod +x /miniconda.sh \
&& /miniconda.sh -b -p /miniconda \
&& rm /miniconda.sh
ENV PATH=/miniconda/bin:$PATH
# Create a Python 3.6 environment
RUN /miniconda/bin/conda install -y conda-build \
&& /miniconda/bin/conda create -y --name py36 python=3.6.7 \
&& /miniconda/bin/conda clean -ya
ENV CONDA_DEFAULT_ENV=py36
ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
ENV PATH=$CONDA_PREFIX/bin:$PATH
ENV CONDA_AUTO_UPDATE_CONDA=false
RUN conda install -y ipython
RUN pip install ninja yacs cython matplotlib opencv-python tqdm
# Install PyTorch 1.0 Nightly
ARG CUDA
RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \
&& conda clean -ya
# Install TorchVision master
RUN git clone https://github.com/pytorch/vision.git \
&& cd vision \
&& python setup.py install
# install pycocotools
RUN git clone https://github.com/cocodataset/cocoapi.git \
&& cd cocoapi/PythonAPI \
&& python setup.py build_ext install
# install PyTorch Detection
ARG FORCE_CUDA="1"
ENV FORCE_CUDA=${FORCE_CUDA}
RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
&& cd maskrcnn-benchmark \
&& python setup.py build develop
WORKDIR /maskrcnn-benchmark
================================================
FILE: docker/docker-jupyter/Dockerfile
================================================
ARG CUDA="9.0"
ARG CUDNN="7"
FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
# install basics
RUN apt-get update -y \
&& apt-get install -y apt-utils git curl ca-certificates bzip2 cmake tree htop bmon iotop g++
# Install Miniconda
RUN curl -so /miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& chmod +x /miniconda.sh \
&& /miniconda.sh -b -p /miniconda \
&& rm /miniconda.sh
ENV PATH=/miniconda/bin:$PATH
# Create a Python 3.6 environment
RUN /miniconda/bin/conda install -y conda-build \
&& /miniconda/bin/conda create -y --name py36 python=3.6.7 \
&& /miniconda/bin/conda clean -ya
ENV CONDA_DEFAULT_ENV=py36
ENV CONDA_PREFIX=/miniconda/envs/$CONDA_DEFAULT_ENV
ENV PATH=$CONDA_PREFIX/bin:$PATH
ENV CONDA_AUTO_UPDATE_CONDA=false
RUN conda install -y ipython
RUN pip install ninja yacs cython matplotlib jupyter
# Install PyTorch 1.0 Nightly and OpenCV
RUN conda install -y pytorch-nightly -c pytorch \
&& conda install -y opencv -c menpo \
&& conda clean -ya
WORKDIR /root
USER root
RUN mkdir /notebooks
WORKDIR /notebooks
# Install TorchVision master
RUN git clone https://github.com/pytorch/vision.git \
&& cd vision \
&& python setup.py install
# install pycocotools
RUN git clone https://github.com/cocodataset/cocoapi.git \
&& cd cocoapi/PythonAPI \
&& python setup.py build_ext install
# install PyTorch Detection
RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \
&& cd maskrcnn-benchmark \
&& python setup.py build develop
RUN jupyter notebook --generate-config
ENV CONFIG_PATH="/root/.jupyter/jupyter_notebook_config.py"
COPY "jupyter_notebook_config.py" ${CONFIG_PATH}
ENTRYPOINT ["sh", "-c", "jupyter notebook --allow-root -y --no-browser --ip=0.0.0.0 --config=${CONFIG_PATH}"]
================================================
FILE: docker/docker-jupyter/jupyter_notebook_config.py
================================================
import os
from IPython.lib import passwd
#c = c # pylint:disable=undefined-variable
c = get_config()
c.NotebookApp.ip = '0.0.0.0'
c.NotebookApp.port = int(os.getenv('PORT', 8888))
c.NotebookApp.open_browser = False
# sets a password if PASSWORD is set in the environment
if 'PASSWORD' in os.environ:
password = os.environ['PASSWORD']
if password:
c.NotebookApp.password = passwd(password)
else:
c.NotebookApp.password = ''
c.NotebookApp.token = ''
del os.environ['PASSWORD']
================================================
FILE: maskrcnn_benchmark/__init__.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
================================================
FILE: maskrcnn_benchmark/config/__init__.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .defaults import _C as cfg
================================================
FILE: maskrcnn_benchmark/config/defaults.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import os
from yacs.config import CfgNode as CN
# -----------------------------------------------------------------------------
# Convention about Training / Test specific parameters
# -----------------------------------------------------------------------------
# Whenever an argument can be either used for training or for testing, the
# corresponding name will be post-fixed by a _TRAIN for a training parameter,
# or _TEST for a test-specific parameter.
# For example, the number of images during training will be
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
# IMAGES_PER_BATCH_TEST
# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------
_C = CN()
_C.MODEL = CN()
_C.MODEL.RPN_ONLY = False
_C.MODEL.MASK_ON = False
_C.MODEL.FCOS_ON = True
_C.MODEL.RETINANET_ON = False
_C.MODEL.KEYPOINT_ON = False
_C.MODEL.DEVICE = "cuda"
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
_C.MODEL.CLS_AGNOSTIC_BBOX_REG = False
# If the WEIGHT starts with a catalog://, like :R-50, the code will look for
# the path in paths_catalog. Else, it will use it as the specified absolute
# path
_C.MODEL.WEIGHT = ""
_C.MODEL.USE_SYNCBN = False
# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# Size of the smallest side of the image during training
_C.INPUT.MIN_SIZE_TRAIN = (800,) # (800,)
# The range of the smallest side for multi-scale training
_C.INPUT.MIN_SIZE_RANGE_TRAIN = (-1, -1) # -1 means disabled and it will use MIN_SIZE_TRAIN
# Maximum size of the side of the image during training
_C.INPUT.MAX_SIZE_TRAIN = 1333
# Size of the smallest side of the image during testing
_C.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
_C.INPUT.MAX_SIZE_TEST = 1333
# Values to be used for image normalization
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717]
# Values to be used for image normalization
_C.INPUT.PIXEL_STD = [1., 1., 1.]
# Convert image to BGR format (for Caffe2 models), in range 0-255
_C.INPUT.TO_BGR255 = True
# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
_C.DATASETS = CN()
# List of the dataset names for training, as present in paths_catalog.py
_C.DATASETS.TRAIN = ()
# List of the dataset names for testing, as present in paths_catalog.py
_C.DATASETS.TEST = ()
# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
_C.DATALOADER = CN()
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 4
# If > 0, this enforces that each collated batch should have a size divisible
# by SIZE_DIVISIBILITY
_C.DATALOADER.SIZE_DIVISIBILITY = 0
# If True, each batch should contain only images for which the aspect ratio
# is compatible. This groups portrait images together, and landscape images
# are not batched with portrait images.
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
# ---------------------------------------------------------------------------- #
# Backbone options
# ---------------------------------------------------------------------------- #
_C.MODEL.BACKBONE = CN()
# The backbone conv body to use
# The string must match a function that is imported in modeling.model_builder
# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN
# backbone)
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4"
# Add StopGrad at a specified stage so the bottom layers are frozen
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2
# GN for backbone
_C.MODEL.BACKBONE.USE_GN = False
# ---------------------------------------------------------------------------- #
# FPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.FPN = CN()
_C.MODEL.FPN.USE_GN = False
_C.MODEL.FPN.USE_RELU = False
# ---------------------------------------------------------------------------- #
# Group Norm options
# ---------------------------------------------------------------------------- #
_C.MODEL.GROUP_NORM = CN()
# Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS)
_C.MODEL.GROUP_NORM.DIM_PER_GP = -1
# Number of groups in GroupNorm (-1 if using DIM_PER_GP)
_C.MODEL.GROUP_NORM.NUM_GROUPS = 32
# GroupNorm's small constant in the denominator
_C.MODEL.GROUP_NORM.EPSILON = 1e-5
# ---------------------------------------------------------------------------- #
# RPN options
# ---------------------------------------------------------------------------- #
_C.MODEL.RPN = CN()
_C.MODEL.RPN.USE_FPN = False
# Base RPN anchor sizes given in absolute pixels w.r.t. the scaled network input
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)
# Stride of the feature map that RPN is attached.
# For FPN, number of strides should match number of scales
_C.MODEL.RPN.ANCHOR_STRIDE = (16,)
# RPN anchor aspect ratios
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0)
# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
_C.MODEL.RPN.STRADDLE_THRESH = 0
# Minimum overlap required between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD
# ==> positive RPN example)
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7
# Maximum overlap allowed between an anchor and ground-truth box for the
# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD
# ==> negative RPN example)
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3
# Total number of RPN examples per image
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
# Target fraction of foreground (positive) examples per RPN minibatch
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
# Number of top scoring RPN proposals to keep before applying NMS
# When FPN is used, this is *per FPN level* (not total)
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000
# Number of top scoring RPN proposals to keep after applying NMS
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000
# NMS threshold used on RPN proposals
_C.MODEL.RPN.NMS_THRESH = 0.7
# Proposal height and width both need to be greater than RPN_MIN_SIZE
# (a the scale used during training or inference)
_C.MODEL.RPN.MIN_SIZE = 0
# Number of top scoring RPN proposals to keep after combining proposals from
# all FPN levels
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000
# Custom rpn head, empty to use default conv or separable conv
_C.MODEL.RPN.RPN_HEAD = "SingleConvRPNHead"
# ---------------------------------------------------------------------------- #
# ROI HEADS options
# ---------------------------------------------------------------------------- #
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.USE_FPN = False
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5
# Overlap threshold for an RoI to be considered background
# (class = 0 if overlap in [0, BG_IOU_THRESHOLD))
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
# These are empirically chosen to approximately lead to unit variance targets
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# RoI minibatch size *per image* (number of regions of interest [ROIs])
# Total number of RoIs per training minibatch =
# TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 2 * 8 = 8192
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
# Only used on test mode
# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
# balance obtaining high recall with not having too many low precision
# detections that will slow down inference post processing steps (like NMS)
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
_C.MODEL.ROI_HEADS.NMS = 0.5
# Maximum number of detections to return per image (100 is based on the limit
# established for the COCO dataset)
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100
_C.MODEL.ROI_BOX_HEAD = CN()
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor"
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 81
# Hidden layer dimension when using an MLP for the RoI box head
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024
# GN
_C.MODEL.ROI_BOX_HEAD.USE_GN = False
# Dilation
_C.MODEL.ROI_BOX_HEAD.DILATION = 1
_C.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM = 256
_C.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS = 4
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256)
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
# Whether or not resize and translate masks to the input image.
_C.MODEL.ROI_MASK_HEAD.POSTPROCESS_MASKS = False
_C.MODEL.ROI_MASK_HEAD.POSTPROCESS_MASKS_THRESHOLD = 0.5
# Dilation
_C.MODEL.ROI_MASK_HEAD.DILATION = 1
# GN
_C.MODEL.ROI_MASK_HEAD.USE_GN = False
_C.MODEL.ROI_KEYPOINT_HEAD = CN()
_C.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR = "KeypointRCNNFeatureExtractor"
_C.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR = "KeypointRCNNPredictor"
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_KEYPOINT_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS = tuple(512 for _ in range(8))
_C.MODEL.ROI_KEYPOINT_HEAD.RESOLUTION = 14
_C.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES = 17
_C.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
# ---------------------------------------------------------------------------- #
# ResNe[X]t options (ResNets = {ResNet, ResNeXt}
# Note that parts of a resnet may be used for both the backbone and the head
# These options apply to both
# ---------------------------------------------------------------------------- #
_C.MODEL.RESNETS = CN()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
_C.MODEL.RESNETS.NUM_GROUPS = 1
# Baseline width of each group
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
# Place the stride 2 conv on the 1x1 filter
# Use True only for the original MSRA ResNet; use False for C2 and Torch models
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
# Residual transformation function
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm"
# ResNet's stem function (conv1 and pool1)
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm"
# Apply dilation in stage "res5"
_C.MODEL.RESNETS.RES5_DILATION = 1
_C.MODEL.RESNETS.BACKBONE_OUT_CHANNELS = 256 * 4
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
# ---------------------------------------------------------------------------- #
# FCOS Options
# ---------------------------------------------------------------------------- #
_C.MODEL.FCOS = CN()
_C.MODEL.FCOS.NUM_CLASSES = 81 # the number of classes including background
_C.MODEL.FCOS.FPN_STRIDES = [8, 16, 32, 64, 128]
_C.MODEL.FCOS.PRIOR_PROB = 0.01
_C.MODEL.FCOS.INFERENCE_TH = 0.05
_C.MODEL.FCOS.NMS_TH = 0.6
_C.MODEL.FCOS.PRE_NMS_TOP_N = 1000
# Focal loss parameter: alpha
_C.MODEL.FCOS.LOSS_ALPHA = 0.25
# Focal loss parameter: gamma
_C.MODEL.FCOS.LOSS_GAMMA = 2.0
_C.MODEL.FCOS.CENTER_SAMPLE = False
_C.MODEL.FCOS.POS_RADIUS = 1.5
_C.MODEL.FCOS.LOC_LOSS_TYPE = 'iou'
_C.MODEL.FCOS.DENSE_POINTS = 1
# the number of convolutions used in the cls and bbox tower
_C.MODEL.FCOS.NUM_CONVS = 4
# ---------------------------------------------------------------------------- #
# RetinaNet Options (Follow the Detectron version)
# ---------------------------------------------------------------------------- #
_C.MODEL.RETINANET = CN()
# This is the number of foreground classes and background.
_C.MODEL.RETINANET.NUM_CLASSES = 81
# Anchor aspect ratios to use
_C.MODEL.RETINANET.ANCHOR_SIZES = (32, 64, 128, 256, 512)
_C.MODEL.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
_C.MODEL.RETINANET.ANCHOR_STRIDES = (8, 16, 32, 64, 128)
_C.MODEL.RETINANET.STRADDLE_THRESH = 0
# Anchor scales per octave
_C.MODEL.RETINANET.OCTAVE = 2.0
_C.MODEL.RETINANET.SCALES_PER_OCTAVE = 3
# Use C5 or P5 to generate P6
_C.MODEL.RETINANET.USE_C5 = True
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
_C.MODEL.RETINANET.NUM_CONVS = 4
# Weight for bbox_regression loss
_C.MODEL.RETINANET.BBOX_REG_WEIGHT = 4.0
# Smooth L1 loss beta for bbox regression
_C.MODEL.RETINANET.BBOX_REG_BETA = 0.11
# During inference, #locs to select based on cls score before NMS is performed
# per FPN level
_C.MODEL.RETINANET.PRE_NMS_TOP_N = 1000
# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
_C.MODEL.RETINANET.FG_IOU_THRESHOLD = 0.5
# IoU overlap ratio for labeling an anchor as negative
# Anchors with < iou overlap are labeled negative
_C.MODEL.RETINANET.BG_IOU_THRESHOLD = 0.4
# Focal loss parameter: alpha
_C.MODEL.RETINANET.LOSS_ALPHA = 0.25
# Focal loss parameter: gamma
_C.MODEL.RETINANET.LOSS_GAMMA = 2.0
# Prior prob for the positives at the beginning of training. This is used to set
# the bias init for the logits layer
_C.MODEL.RETINANET.PRIOR_PROB = 0.01
# Inference cls score threshold, anchors with score > INFERENCE_TH are
# considered for inference
_C.MODEL.RETINANET.INFERENCE_TH = 0.05
# NMS threshold used in RetinaNet
_C.MODEL.RETINANET.NMS_TH = 0.4
# ---------------------------------------------------------------------------- #
# FBNet options
# ---------------------------------------------------------------------------- #
_C.MODEL.FBNET = CN()
_C.MODEL.FBNET.ARCH = "default"
# custom arch
_C.MODEL.FBNET.ARCH_DEF = ""
_C.MODEL.FBNET.BN_TYPE = "bn"
_C.MODEL.FBNET.SCALE_FACTOR = 1.0
# the output channels will be divisible by WIDTH_DIVISOR
_C.MODEL.FBNET.WIDTH_DIVISOR = 1
_C.MODEL.FBNET.DW_CONV_SKIP_BN = True
_C.MODEL.FBNET.DW_CONV_SKIP_RELU = True
# > 0 scale, == 0 skip, < 0 same dimension
_C.MODEL.FBNET.DET_HEAD_LAST_SCALE = 1.0
_C.MODEL.FBNET.DET_HEAD_BLOCKS = []
# overwrite the stride for the head, 0 to use original value
_C.MODEL.FBNET.DET_HEAD_STRIDE = 0
# > 0 scale, == 0 skip, < 0 same dimension
_C.MODEL.FBNET.KPTS_HEAD_LAST_SCALE = 0.0
_C.MODEL.FBNET.KPTS_HEAD_BLOCKS = []
# overwrite the stride for the head, 0 to use original value
_C.MODEL.FBNET.KPTS_HEAD_STRIDE = 0
# > 0 scale, == 0 skip, < 0 same dimension
_C.MODEL.FBNET.MASK_HEAD_LAST_SCALE = 0.0
_C.MODEL.FBNET.MASK_HEAD_BLOCKS = []
# overwrite the stride for the head, 0 to use original value
_C.MODEL.FBNET.MASK_HEAD_STRIDE = 0
# 0 to use all blocks defined in arch_def
_C.MODEL.FBNET.RPN_HEAD_BLOCKS = 0
_C.MODEL.FBNET.RPN_BN_TYPE = ""
# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
_C.SOLVER.MAX_ITER = 40000
_C.SOLVER.BASE_LR = 0.001
_C.SOLVER.BIAS_LR_FACTOR = 2
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500
_C.SOLVER.WARMUP_METHOD = "linear"
_C.SOLVER.CHECKPOINT_PERIOD = 2500
# Number of images per batch
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will
# see 2 images per batch
_C.SOLVER.IMS_PER_BATCH = 16
# ---------------------------------------------------------------------------- #
# Specific test options
# ---------------------------------------------------------------------------- #
_C.TEST = CN()
_C.TEST.EXPECTED_RESULTS = []
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4
# Number of images per batch
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will
# see 2 images per batch
_C.TEST.IMS_PER_BATCH = 8
# Number of detections per image
_C.TEST.DETECTIONS_PER_IMG = 100
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
_C.OUTPUT_DIR = "."
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py")
================================================
FILE: maskrcnn_benchmark/config/paths_catalog.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Centralized catalog of paths."""
import os
class DatasetCatalog(object):
DATA_DIR = "datasets"
DATASETS = {
"coco_2017_train": {
"img_dir": "coco/train2017",
"ann_file": "coco/annotations/instances_train2017.json"
},
"coco_2017_val": {
"img_dir": "coco/val2017",
"ann_file": "coco/annotations/instances_val2017.json"
},
"coco_2014_train": {
"img_dir": "coco/train2014",
"ann_file": "coco/annotations/instances_train2014.json"
},
"coco_2014_val": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/instances_val2014.json"
},
"coco_2014_minival": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/instances_minival2014.json"
},
"coco_2014_valminusminival": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/instances_valminusminival2014.json"
},
"keypoints_coco_2014_train": {
"img_dir": "coco/train2014",
"ann_file": "coco/annotations/person_keypoints_train2014.json",
},
"keypoints_coco_2014_val": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/person_keypoints_val2014.json"
},
"keypoints_coco_2014_minival": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/person_keypoints_minival2014.json",
},
"keypoints_coco_2014_valminusminival": {
"img_dir": "coco/val2014",
"ann_file": "coco/annotations/person_keypoints_valminusminival2014.json",
},
"voc_2007_train": {
"data_dir": "voc/VOC2007",
"split": "train"
},
"voc_2007_train_cocostyle": {
"img_dir": "voc/VOC2007/JPEGImages",
"ann_file": "voc/VOC2007/Annotations/pascal_train2007.json"
},
"voc_2007_val": {
"data_dir": "voc/VOC2007",
"split": "val"
},
"voc_2007_val_cocostyle": {
"img_dir": "voc/VOC2007/JPEGImages",
"ann_file": "voc/VOC2007/Annotations/pascal_val2007.json"
},
"voc_2007_test": {
"data_dir": "voc/VOC2007",
"split": "test"
},
"voc_2007_test_cocostyle": {
"img_dir": "voc/VOC2007/JPEGImages",
"ann_file": "voc/VOC2007/Annotations/pascal_test2007.json"
},
"voc_2012_train": {
"data_dir": "voc/VOC2012",
"split": "train"
},
"voc_2012_train_cocostyle": {
"img_dir": "voc/VOC2012/JPEGImages",
"ann_file": "voc/VOC2012/Annotations/pascal_train2012.json"
},
"voc_2012_val": {
"data_dir": "voc/VOC2012",
"split": "val"
},
"voc_2012_val_cocostyle": {
"img_dir": "voc/VOC2012/JPEGImages",
"ann_file": "voc/VOC2012/Annotations/pascal_val2012.json"
},
"voc_2012_test": {
"data_dir": "voc/VOC2012",
"split": "test"
# PASCAL VOC2012 doesn't made the test annotations available, so there's no json annotation
},
"cityscapes_fine_instanceonly_seg_train_cocostyle": {
"img_dir": "cityscapes/images",
"ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_train.json"
},
"cityscapes_fine_instanceonly_seg_val_cocostyle": {
"img_dir": "cityscapes/images",
"ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_val.json"
},
"cityscapes_fine_instanceonly_seg_test_cocostyle": {
"img_dir": "cityscapes/images",
"ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_test.json"
}
}
@staticmethod
def get(name):
if "coco" in name:
data_dir = DatasetCatalog.DATA_DIR
attrs = DatasetCatalog.DATASETS[name]
args = dict(
root=os.path.join(data_dir, attrs["img_dir"]),
ann_file=os.path.join(data_dir, attrs["ann_file"]),
)
return dict(
factory="COCODataset",
args=args,
)
elif "voc" in name:
data_dir = DatasetCatalog.DATA_DIR
attrs = DatasetCatalog.DATASETS[name]
args = dict(
data_dir=os.path.join(data_dir, attrs["data_dir"]),
split=attrs["split"],
)
return dict(
factory="PascalVOCDataset",
args=args,
)
raise RuntimeError("Dataset not available: {}".format(name))
class ModelCatalog(object):
S3_C2_DETECTRON_URL = "https://dl.fbaipublicfiles.com/detectron"
C2_IMAGENET_MODELS = {
"MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
"MSRA/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
"MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
"MSRA/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
"FAIR/20171220/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
"FAIR/20171220/X-101-64x4d": "ImageNetPretrained/20171220/X-101-64x4d.pkl",
}
C2_DETECTRON_SUFFIX = "output/train/{}coco_2014_train%3A{}coco_2014_valminusminival/generalized_rcnn/model_final.pkl"
C2_DETECTRON_MODELS = {
"35857197/e2e_faster_rcnn_R-50-C4_1x": "01_33_49.iAX0mXvW",
"35857345/e2e_faster_rcnn_R-50-FPN_1x": "01_36_30.cUF7QR7I",
"35857890/e2e_faster_rcnn_R-101-FPN_1x": "01_38_50.sNxI7sX7",
"36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "06_31_39.5MIHi1fZ",
"35858791/e2e_mask_rcnn_R-50-C4_1x": "01_45_57.ZgkA7hPB",
"35858933/e2e_mask_rcnn_R-50-FPN_1x": "01_48_14.DzEQe4wC",
"35861795/e2e_mask_rcnn_R-101-FPN_1x": "02_31_37.KqyEK4tT",
"36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "06_35_59.RZotkLKI",
"37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x": "09_35_36.8pzTQKYK",
# keypoints
"37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "08_42_54.kdzV35ao"
}
@staticmethod
def get(name):
if name.startswith("Caffe2Detectron/COCO"):
return ModelCatalog.get_c2_detectron_12_2017_baselines(name)
if name.startswith("ImageNetPretrained"):
return ModelCatalog.get_c2_imagenet_pretrained(name)
raise RuntimeError("model not present in the catalog {}".format(name))
@staticmethod
def get_c2_imagenet_pretrained(name):
prefix = ModelCatalog.S3_C2_DETECTRON_URL
name = name[len("ImageNetPretrained/"):]
name = ModelCatalog.C2_IMAGENET_MODELS[name]
url = "/".join([prefix, name])
return url
@staticmethod
def get_c2_detectron_12_2017_baselines(name):
# Detectron C2 models are stored following the structure
# prefix/<model_id>/2012_2017_baselines/<model_name>.yaml.<signature>/suffix
# we use as identifiers in the catalog Caffe2Detectron/COCO/<model_id>/<model_name>
prefix = ModelCatalog.S3_C2_DETECTRON_URL
dataset_tag = "keypoints_" if "keypoint" in name else ""
suffix = ModelCatalog.C2_DETECTRON_SUFFIX.format(dataset_tag, dataset_tag)
# remove identification prefix
name = name[len("Caffe2Detectron/COCO/"):]
# split in <model_id> and <model_name>
model_id, model_name = name.split("/")
# parsing to make it match the url address from the Caffe2 models
model_name = "{}.yaml".format(model_name)
signature = ModelCatalog.C2_DETECTRON_MODELS[name]
unique_name = ".".join([model_name, signature])
url = "/".join([prefix, model_id, "12_2017_baselines", unique_name, suffix])
return url
================================================
FILE: maskrcnn_benchmark/csrc/ROIAlign.h
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
#endif
// Interface for Python
at::Tensor ROIAlign_forward(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int sampling_ratio) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
}
at::Tensor ROIAlign_backward(const at::Tensor& grad,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int batch_size,
const int channels,
const int height,
const int width,
const int sampling_ratio) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
================================================
FILE: maskrcnn_benchmark/csrc/ROIPool.h
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
#endif
std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
at::Tensor ROIPool_backward(const at::Tensor& grad,
const at::Tensor& input,
const at::Tensor& rois,
const at::Tensor& argmax,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int batch_size,
const int channels,
const int height,
const int width) {
if (grad.type().is_cuda()) {
#ifdef WITH_CUDA
return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
================================================
FILE: maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
================================================
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
#endif
// Interface for Python
at::Tensor SigmoidFocalLoss_forward(
const at::Tensor& logits,
const at::Tensor& targets,
const int num_classes,
const float gamma,
const float alpha) {
if (logits.type().is_cuda()) {
#ifdef WITH_CUDA
return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
at::Tensor SigmoidFocalLoss_backward(
const at::Tensor& logits,
const at::Tensor& targets,
const at::Tensor& d_losses,
const int num_classes,
const float gamma,
const float alpha) {
if (logits.type().is_cuda()) {
#ifdef WITH_CUDA
return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
================================================
FILE: maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include "cpu/vision.h"
// implementation taken from Caffe2
template <typename T>
struct PreCalc {
int pos1;
int pos2;
int pos3;
int pos4;
T w1;
T w2;
T w3;
T w4;
};
template <typename T>
void pre_calc_for_bilinear_interpolate(
const int height,
const int width,
const int pooled_height,
const int pooled_width,
const int iy_upper,
const int ix_upper,
T roi_start_h,
T roi_start_w,
T bin_size_h,
T bin_size_w,
int roi_bin_grid_h,
int roi_bin_grid_w,
std::vector<PreCalc<T>>& pre_calc) {
int pre_calc_index = 0;
for (int ph = 0; ph < pooled_height; ph++) {
for (int pw = 0; pw < pooled_width; pw++) {
for (int iy = 0; iy < iy_upper; iy++) {
const T yy = roi_start_h + ph * bin_size_h +
static_cast<T>(iy + .5f) * bin_size_h /
static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
for (int ix = 0; ix < ix_upper; ix++) {
const T xx = roi_start_w + pw * bin_size_w +
static_cast<T>(ix + .5f) * bin_size_w /
static_cast<T>(roi_bin_grid_w);
T x = xx;
T y = yy;
// deal with: inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
// empty
PreCalc<T> pc;
pc.pos1 = 0;
pc.pos2 = 0;
pc.pos3 = 0;
pc.pos4 = 0;
pc.w1 = 0;
pc.w2 = 0;
pc.w3 = 0;
pc.w4 = 0;
pre_calc[pre_calc_index] = pc;
pre_calc_index += 1;
continue;
}
if (y <= 0) {
y = 0;
}
if (x <= 0) {
x = 0;
}
int y_low = (int)y;
int x_low = (int)x;
int y_high;
int x_high;
if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}
if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}
T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
// save weights and indeces
PreCalc<T> pc;
pc.pos1 = y_low * width + x_low;
pc.pos2 = y_low * width + x_high;
pc.pos3 = y_high * width + x_low;
pc.pos4 = y_high * width + x_high;
pc.w1 = w1;
pc.w2 = w2;
pc.w3 = w3;
pc.w4 = w4;
pre_calc[pre_calc_index] = pc;
pre_calc_index += 1;
}
}
}
}
}
template <typename T>
void ROIAlignForward_cpu_kernel(
const int nthreads,
const T* bottom_data,
const T& spatial_scale,
const int channels,
const int height,
const int width,
const int pooled_height,
const int pooled_width,
const int sampling_ratio,
const T* bottom_rois,
//int roi_cols,
T* top_data) {
//AT_ASSERT(roi_cols == 4 || roi_cols == 5);
int roi_cols = 5;
int n_rois = nthreads / channels / pooled_width / pooled_height;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
// #pragma omp parallel for num_threads(32)
for (int n = 0; n < n_rois; n++) {
int index_n = n * channels * pooled_width * pooled_height;
// roi could have 4 or 5 columns
const T* offset_bottom_rois = bottom_rois + n * roi_cols;
int roi_batch_ind = 0;
if (roi_cols == 5) {
roi_batch_ind = offset_bottom_rois[0];
offset_bottom_rois++;
}
// Do not using rounding; this implementation detail is critical
T roi_start_w = offset_bottom_rois[0] * spatial_scale;
T roi_start_h = offset_bottom_rois[1] * spatial_scale;
T roi_end_w = offset_bottom_rois[2] * spatial_scale;
T roi_end_h = offset_bottom_rois[3] * spatial_scale;
// T roi_start_w = round(offset_bottom_rois[0] * spatial_scale);
// T roi_start_h = round(offset_bottom_rois[1] * spatial_scale);
// T roi_end_w = round(offset_bottom_rois[2] * spatial_scale);
// T roi_end_h = round(offset_bottom_rois[3] * spatial_scale);
// Force malformed ROIs to be 1x1
T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
// We use roi_bin_grid to sample the grid and mimic integral
int roi_bin_grid_h = (sampling_ratio > 0)
? sampling_ratio
: ceil(roi_height / pooled_height); // e.g., = 2
int roi_bin_grid_w =
(sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
// We do average (integral) pooling inside a bin
const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
// we want to precalculate indeces and weights shared by all chanels,
// this is the key point of optimiation
std::vector<PreCalc<T>> pre_calc(
roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
pre_calc_for_bilinear_interpolate(
height,
width,
pooled_height,
pooled_width,
roi_bin_grid_h,
roi_bin_grid_w,
roi_start_h,
roi_start_w,
bin_size_h,
bin_size_w,
roi_bin_grid_h,
roi_bin_grid_w,
pre_calc);
for (int c = 0; c < channels; c++) {
int index_n_c = index_n + c * pooled_width * pooled_height;
const T* offset_bottom_data =
bottom_data + (roi_batch_ind * channels + c) * height * width;
int pre_calc_index = 0;
for (int ph = 0; ph < pooled_height; ph++) {
for (int pw = 0; pw < pooled_width; pw++) {
int index = index_n_c + ph * pooled_width + pw;
T output_val = 0.;
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
PreCalc<T> pc = pre_calc[pre_calc_index];
output_val += pc.w1 * offset_bottom_data[pc.pos1] +
pc.w2 * offset_bottom_data[pc.pos2] +
pc.w3 * offset_bottom_data[pc.pos3] +
pc.w4 * offset_bottom_data[pc.pos4];
pre_calc_index += 1;
}
}
output_val /= count;
top_data[index] = output_val;
} // for pw
} // for ph
} // for c
} // for n
}
at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int sampling_ratio) {
AT_ASSERTM(!input.type().is_cuda(), "input must be a CPU tensor");
AT_ASSERTM(!rois.type().is_cuda(), "rois must be a CPU tensor");
auto num_rois = rois.size(0);
auto channels = input.size(1);
auto height = input.size(2);
auto width = input.size(3);
auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options());
auto output_size = num_rois * pooled_height * pooled_width * channels;
if (output.numel() == 0) {
return output;
}
AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] {
ROIAlignForward_cpu_kernel<scalar_t>(
output_size,
input.data<scalar_t>(),
spatial_scale,
channels,
height,
width,
pooled_height,
pooled_width,
sampling_ratio,
rois.data<scalar_t>(),
output.data<scalar_t>());
});
return output;
}
================================================
FILE: maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include "cpu/vision.h"
template <typename scalar_t>
at::Tensor nms_cpu_kernel(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold) {
AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
if (dets.numel() == 0) {
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
}
auto x1_t = dets.select(1, 0).contiguous();
auto y1_t = dets.select(1, 1).contiguous();
auto x2_t = dets.select(1, 2).contiguous();
auto y2_t = dets.select(1, 3).contiguous();
at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
auto ndets = dets.size(0);
at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
auto suppressed = suppressed_t.data<uint8_t>();
auto order = order_t.data<int64_t>();
auto x1 = x1_t.data<scalar_t>();
auto y1 = y1_t.data<scalar_t>();
auto x2 = x2_t.data<scalar_t>();
auto y2 = y2_t.data<scalar_t>();
auto areas = areas_t.data<scalar_t>();
for (int64_t _i = 0; _i < ndets; _i++) {
auto i = order[_i];
if (suppressed[i] == 1)
continue;
auto ix1 = x1[i];
auto iy1 = y1[i];
auto ix2 = x2[i];
auto iy2 = y2[i];
auto iarea = areas[i];
for (int64_t _j = _i + 1; _j < ndets; _j++) {
auto j = order[_j];
if (suppressed[j] == 1)
continue;
auto xx1 = std::max(ix1, x1[j]);
auto yy1 = std::max(iy1, y1[j]);
auto xx2 = std::min(ix2, x2[j]);
auto yy2 = std::min(iy2, y2[j]);
auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
auto inter = w * h;
auto ovr = inter / (iarea + areas[j] - inter);
if (ovr >= threshold)
suppressed[j] = 1;
}
}
return at::nonzero(suppressed_t == 0).squeeze(1);
}
at::Tensor nms_cpu(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold) {
at::Tensor result;
AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
});
return result;
}
================================================
FILE: maskrcnn_benchmark/csrc/cpu/vision.h
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#pragma once
#include <torch/extension.h>
at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int sampling_ratio);
at::Tensor nms_cpu(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold);
================================================
FILE: maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCDeviceUtils.cuh>
// TODO make it in a common file
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
template <typename T>
__device__ T bilinear_interpolate(const T* bottom_data,
const int height, const int width,
T y, T x,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
//empty
return 0;
}
if (y <= 0) y = 0;
if (x <= 0) x = 0;
int y_low = (int) y;
int x_low = (int) x;
int y_high;
int x_high;
if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T) y_low;
} else {
y_high = y_low + 1;
}
if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T) x_low;
} else {
x_high = x_low + 1;
}
T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
// do bilinear interpolation
T v1 = bottom_data[y_low * width + x_low];
T v2 = bottom_data[y_low * width + x_high];
T v3 = bottom_data[y_high * width + x_low];
T v4 = bottom_data[y_high * width + x_high];
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
template <typename T>
__global__ void RoIAlignForward(const int nthreads, const T* bottom_data,
const T spatial_scale, const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const int sampling_ratio,
const T* bottom_rois, T* top_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
// (n, c, ph, pw) is an element in the pooled output
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const T* offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
// Do not using rounding; this implementation detail is critical
T roi_start_w = offset_bottom_rois[1] * spatial_scale;
T roi_start_h = offset_bottom_rois[2] * spatial_scale;
T roi_end_w = offset_bottom_rois[3] * spatial_scale;
T roi_end_h = offset_bottom_rois[4] * spatial_scale;
// T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
// T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
// T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
// T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
// Force malformed ROIs to be 1x1
T roi_width = max(roi_end_w - roi_start_w, (T)1.);
T roi_height = max(roi_end_h - roi_start_h, (T)1.);
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
const T* offset_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
// We use roi_bin_grid to sample the grid and mimic integral
int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
// We do average (integral) pooling inside a bin
const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
T output_val = 0.;
for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
{
const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
for (int ix = 0; ix < roi_bin_grid_w; ix ++)
{
const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, index);
output_val += val;
}
}
output_val /= count;
top_data[index] = output_val;
}
}
template <typename T>
__device__ void bilinear_interpolate_gradient(
const int height, const int width,
T y, T x,
T & w1, T & w2, T & w3, T & w4,
int & x_low, int & x_high, int & y_low, int & y_high,
const int index /* index for debug only*/) {
// deal with cases that inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
//empty
w1 = w2 = w3 = w4 = 0.;
x_low = x_high = y_low = y_high = -1;
return;
}
if (y <= 0) y = 0;
if (x <= 0) x = 0;
y_low = (int) y;
x_low = (int) x;
if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T) y_low;
} else {
y_high = y_low + 1;
}
if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T) x_low;
} else {
x_high = x_low + 1;
}
T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
// reference in forward
// T v1 = bottom_data[y_low * width + x_low];
// T v2 = bottom_data[y_low * width + x_high];
// T v3 = bottom_data[y_high * width + x_low];
// T v4 = bottom_data[y_high * width + x_high];
// T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
return;
}
template <typename T>
__global__ void RoIAlignBackwardFeature(const int nthreads, const T* top_diff,
const int num_rois, const T spatial_scale,
const int channels, const int height, const int width,
const int pooled_height, const int pooled_width,
const int sampling_ratio,
T* bottom_diff,
const T* bottom_rois) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
// (n, c, ph, pw) is an element in the pooled output
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const T* offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
// Do not using rounding; this implementation detail is critical
T roi_start_w = offset_bottom_rois[1] * spatial_scale;
T roi_start_h = offset_bottom_rois[2] * spatial_scale;
T roi_end_w = offset_bottom_rois[3] * spatial_scale;
T roi_end_h = offset_bottom_rois[4] * spatial_scale;
// T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
// T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
// T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
// T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
// Force malformed ROIs to be 1x1
T roi_width = max(roi_end_w - roi_start_w, (T)1.);
T roi_height = max(roi_end_h - roi_start_h, (T)1.);
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
T* offset_bottom_diff = bottom_diff + (roi_batch_ind * channels + c) * height * width;
int top_offset = (n * channels + c) * pooled_height * pooled_width;
const T* offset_top_diff = top_diff + top_offset;
const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
// We use roi_bin_grid to sample the grid and mimic integral
int roi_bin_grid_h = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_height / pooled_height); // e.g., = 2
int roi_bin_grid_w = (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
// We do average (integral) pooling inside a bin
const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
for (int iy = 0; iy < roi_bin_grid_h; iy ++) // e.g., iy = 0, 1
{
const T y = roi_start_h + ph * bin_size_h + static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
for (int ix = 0; ix < roi_bin_grid_w; ix ++)
{
const T x = roi_start_w + pw * bin_size_w + static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
T w1, w2, w3, w4;
int x_low, x_high, y_low, y_high;
bilinear_interpolate_gradient(height, width, y, x,
w1, w2, w3, w4,
x_low, x_high, y_low, y_high,
index);
T g1 = top_diff_this_bin * w1 / count;
T g2 = top_diff_this_bin * w2 / count;
T g3 = top_diff_this_bin * w3 / count;
T g4 = top_diff_this_bin * w4 / count;
if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0)
{
atomicAdd(offset_bottom_diff + y_low * width + x_low, static_cast<T>(g1));
atomicAdd(offset_bottom_diff + y_low * width + x_high, static_cast<T>(g2));
atomicAdd(offset_bottom_diff + y_high * width + x_low, static_cast<T>(g3));
atomicAdd(offset_bottom_diff + y_high * width + x_high, static_cast<T>(g4));
} // if
} // ix
} // iy
} // CUDA_1D_KERNEL_LOOP
} // RoIAlignBackward
at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int sampling_ratio) {
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor");
auto num_rois = rois.size(0);
auto channels = input.size(1);
auto height = input.size(2);
auto width = input.size(3);
auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options());
auto output_size = num_rois * pooled_height * pooled_width * channels;
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(THCCeilDiv((long)output_size, 512L), 4096L));
dim3 block(512);
if (output.numel() == 0) {
THCudaCheck(cudaGetLastError());
return output;
}
AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] {
RoIAlignForward<scalar_t><<<grid, block, 0, stream>>>(
output_size,
input.contiguous().data<scalar_t>(),
spatial_scale,
channels,
height,
width,
pooled_height,
pooled_width,
sampling_ratio,
rois.contiguous().data<scalar_t>(),
output.data<scalar_t>());
});
THCudaCheck(cudaGetLastError());
return output;
}
// TODO remove the dependency on input and use instead its sizes -> save memory
at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width,
const int batch_size,
const int channels,
const int height,
const int width,
const int sampling_ratio) {
AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor");
AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor");
auto num_rois = rois.size(0);
auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(THCCeilDiv((long)grad.numel(), 512L), 4096L));
dim3 block(512);
// handle possibly empty gradients
if (grad.numel() == 0) {
THCudaCheck(cudaGetLastError());
return grad_input;
}
AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] {
RoIAlignBackwardFeature<scalar_t><<<grid, block, 0, stream>>>(
grad.numel(),
grad.contiguous().data<scalar_t>(),
num_rois,
spatial_scale,
channels,
height,
width,
pooled_height,
pooled_width,
sampling_ratio,
grad_input.data<scalar_t>(),
rois.contiguous().data<scalar_t>());
});
THCudaCheck(cudaGetLastError());
return grad_input;
}
================================================
FILE: maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu
================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCDeviceUtils.cuh>
// TODO make it in a common file
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
template <typename T>
__global__ void RoIPoolFForward(const int nthreads, const T* bottom_data,
const T spatial_scale, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width,
const T* bottom_rois, T* top_data, int* argmax_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
// (n, c, ph, pw) is an element in the pooled output
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const T* offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
int roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
// Force malformed ROIs to be 1x1
int roi_width = max(roi_end_w - roi_start_w + 1, 1);
int roi_height = max(roi_end_h - roi_start_h + 1, 1);
T bin_size_h = static_cast<T>(roi_height)
/ static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width)
/ static_cast<T>(pooled_width);
int hstart = static_cast<int>(floor(static_cast<T>(ph)
* bin_size_h));
int wstart = static_cast<int>(floor(static_cast<T>(pw)
* bin_size_w));
int hend = static_cast<int>(ceil(static_cast<T>(ph + 1)
* bin_size_h));
int wend = static_cast<int>(ceil(static_cast<T>(pw + 1)
* bin_size_w));
// Add roi offsets and clip to input boundaries
hstart = min(max(hstart + roi_start_h, 0), height);
hend = min(max(hend + roi_start_h, 0), height);
wstart = min(max(wstart + roi_start_w, 0), width);
wend = min(max(wend + roi_start_w, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart);
// Define an empty pooling region to be zero
T maxval = is_empty ? 0 : -FLT_MAX;
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
int maxidx = -1;
const T* offset_bottom_data =
bottom_data + (roi_batch_ind * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
int bottom_index = h * width + w;
if (offset_bottom_data[bottom_index] > maxval) {
maxval = offset_bottom_data[bottom_index];
maxidx = bottom_index;
}
}
}
top_data[index] = maxval;
argmax_data[index] = maxidx;
}
}
template <typename T>
__global__ void RoIPoolFBackward(const int nthreads, const T* top_diff,
const int* argmax_data, const int num_rois, const T spatial_scale,
const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, T* bottom_diff,
const T* bottom_rois) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
// (n, c, ph, pw) is an element in the pooled output
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
const T* offset_bottom_rois = bottom_rois + n * 5;
int roi_batch_ind = offset_bottom_rois[0];
int bottom_offset = (roi_batch_ind * channels + c) * height * width;
int top_offset = (n * channels + c) * pooled_height * pooled_width;
const T* offset_top_diff = top_diff + top_offset;
T* offset_bottom_diff = bottom_diff + bottom_offset;
const int* offset_argmax_data = argmax_data + top_offset;
int argmax = offset_argmax_data[ph * pooled_width + pw];
if (argmax != -1) {
atomicAdd(
offset_bottom_diff + argmax,
static_cast<T>(offset_top_diff[ph * pooled_width + pw]));
}
}
}
std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width) {
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor");
auto num_rois = rois.size(0);
auto channels = input.size(1);
auto height = input.size(2);
auto width = input.size(3);
auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options());
auto output_size = num_rois * pooled_height * pooled_width * channels;
auto argmax = at::zeros({num_rois, channels, pooled_height, pooled_width}, input.options().dtype(at::kInt));
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 grid(std::min(THCCeilDiv((long)output_size, 512L), 4096L));
dim3 block(512);
if (output.numel() == 0) {
THCudaCheck(cudaGetLastError());
return std::make_tuple(output, argmax);
}
AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] {
RoIPoolFForward<scalar_t><<<grid, block, 0, stream>>>(
output_size,
input.contiguous().data<scalar_t>(),
gitextract_1mo5xw5_/
├── .flake8
├── .gitignore
├── ABSTRACTIONS.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MASKRCNN_README.md
├── MODEL_ZOO.md
├── README.md
├── TROUBLESHOOTING.md
├── configs/
│ ├── caffe2/
│ │ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
│ │ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
│ │ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
│ │ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
│ ├── cityscapes/
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
│ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
│ ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_faster_rcnn_R_50_C4_1x.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_faster_rcnn_fbnet.yaml
│ ├── e2e_faster_rcnn_fbnet_600.yaml
│ ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
│ ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_50_C4_1x.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_mask_rcnn_fbnet.yaml
│ ├── e2e_mask_rcnn_fbnet_600.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
│ ├── fcos/
│ │ ├── fcos_R_101_FPN_2x.yaml
│ │ ├── fcos_R_50_FPN_1x.yaml
│ │ ├── fcos_R_50_FPN_1x_center.yaml
│ │ ├── fcos_R_50_FPN_1x_center_giou.yaml
│ │ ├── fcos_X_101_32x8d_FPN_2x.yaml
│ │ ├── fcos_X_101_64x4d_FPN_2x.yaml
│ │ ├── fcos_bn_bs16_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml
│ │ ├── fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml
│ │ └── fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml
│ ├── gn_baselines/
│ │ ├── README.md
│ │ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
│ │ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
│ │ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
│ │ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
│ ├── pascal_voc/
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
│ │ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
│ │ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
│ ├── quick_schedules/
│ │ ├── e2e_faster_rcnn_R_50_C4_quick.yaml
│ │ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
│ │ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_mask_rcnn_R_50_C4_quick.yaml
│ │ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
│ │ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
│ │ ├── rpn_R_50_C4_quick.yaml
│ │ └── rpn_R_50_FPN_quick.yaml
│ ├── retinanet/
│ │ ├── retinanet_R-101-FPN_1x.yaml
│ │ ├── retinanet_R-101-FPN_P5_1x.yaml
│ │ ├── retinanet_R-50-FPN_1x.yaml
│ │ ├── retinanet_R-50-FPN_1x_quick.yaml
│ │ ├── retinanet_R-50-FPN_P5_1x.yaml
│ │ └── retinanet_X_101_32x8d_FPN_1x.yaml
│ ├── rpn_R_101_FPN_1x.yaml
│ ├── rpn_R_50_C4_1x.yaml
│ ├── rpn_R_50_FPN_1x.yaml
│ └── rpn_X_101_32x8d_FPN_1x.yaml
├── demo/
│ ├── README.md
│ ├── fcos_demo.py
│ ├── predictor.py
│ └── webcam.py
├── docker/
│ ├── Dockerfile
│ └── docker-jupyter/
│ ├── Dockerfile
│ └── jupyter_notebook_config.py
├── maskrcnn_benchmark/
│ ├── __init__.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── defaults.py
│ │ └── paths_catalog.py
│ ├── csrc/
│ │ ├── ROIAlign.h
│ │ ├── ROIPool.h
│ │ ├── SigmoidFocalLoss.h
│ │ ├── cpu/
│ │ │ ├── ROIAlign_cpu.cpp
│ │ │ ├── nms_cpu.cpp
│ │ │ └── vision.h
│ │ ├── cuda/
│ │ │ ├── ROIAlign_cuda.cu
│ │ │ ├── ROIPool_cuda.cu
│ │ │ ├── SigmoidFocalLoss_cuda.cu
│ │ │ ├── nms.cu
│ │ │ └── vision.h
│ │ ├── nms.h
│ │ └── vision.cpp
│ ├── data/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── build.py
│ │ ├── collate_batch.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── coco.py
│ │ │ ├── concat_dataset.py
│ │ │ ├── evaluation/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── coco/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── coco_eval.py
│ │ │ │ └── voc/
│ │ │ │ ├── __init__.py
│ │ │ │ └── voc_eval.py
│ │ │ ├── list_dataset.py
│ │ │ └── voc.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── distributed.py
│ │ │ ├── grouped_batch_sampler.py
│ │ │ └── iteration_based_batch_sampler.py
│ │ └── transforms/
│ │ ├── __init__.py
│ │ ├── build.py
│ │ └── transforms.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── trainer.py
│ ├── layers/
│ │ ├── __init__.py
│ │ ├── _utils.py
│ │ ├── batch_norm.py
│ │ ├── iou_loss.py
│ │ ├── misc.py
│ │ ├── nms.py
│ │ ├── roi_align.py
│ │ ├── roi_pool.py
│ │ ├── scale.py
│ │ ├── sigmoid_focal_loss.py
│ │ └── smooth_l1_loss.py
│ ├── modeling/
│ │ ├── __init__.py
│ │ ├── backbone/
│ │ │ ├── __init__.py
│ │ │ ├── backbone.py
│ │ │ ├── fbnet.py
│ │ │ ├── fbnet_builder.py
│ │ │ ├── fbnet_modeldef.py
│ │ │ ├── fpn.py
│ │ │ ├── mobilenet.py
│ │ │ └── resnet.py
│ │ ├── balanced_positive_negative_sampler.py
│ │ ├── box_coder.py
│ │ ├── detector/
│ │ │ ├── __init__.py
│ │ │ ├── detectors.py
│ │ │ └── generalized_rcnn.py
│ │ ├── make_layers.py
│ │ ├── matcher.py
│ │ ├── poolers.py
│ │ ├── registry.py
│ │ ├── roi_heads/
│ │ │ ├── __init__.py
│ │ │ ├── box_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── box_head.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── roi_box_feature_extractors.py
│ │ │ │ └── roi_box_predictors.py
│ │ │ ├── keypoint_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── keypoint_head.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── roi_keypoint_feature_extractors.py
│ │ │ │ └── roi_keypoint_predictors.py
│ │ │ ├── mask_head/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── mask_head.py
│ │ │ │ ├── roi_mask_feature_extractors.py
│ │ │ │ └── roi_mask_predictors.py
│ │ │ └── roi_heads.py
│ │ ├── rpn/
│ │ │ ├── __init__.py
│ │ │ ├── anchor_generator.py
│ │ │ ├── fcos/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fcos.py
│ │ │ │ ├── inference.py
│ │ │ │ └── loss.py
│ │ │ ├── inference.py
│ │ │ ├── loss.py
│ │ │ ├── retinanet/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── inference.py
│ │ │ │ ├── loss.py
│ │ │ │ └── retinanet.py
│ │ │ ├── rpn.py
│ │ │ └── utils.py
│ │ └── utils.py
│ ├── solver/
│ │ ├── __init__.py
│ │ ├── build.py
│ │ └── lr_scheduler.py
│ ├── structures/
│ │ ├── __init__.py
│ │ ├── bounding_box.py
│ │ ├── boxlist_ops.py
│ │ ├── image_list.py
│ │ ├── keypoint.py
│ │ └── segmentation_mask.py
│ └── utils/
│ ├── README.md
│ ├── __init__.py
│ ├── c2_model_loading.py
│ ├── checkpoint.py
│ ├── collect_env.py
│ ├── comm.py
│ ├── cv2_util.py
│ ├── env.py
│ ├── imports.py
│ ├── logger.py
│ ├── metric_logger.py
│ ├── miscellaneous.py
│ ├── model_serialization.py
│ ├── model_zoo.py
│ ├── registry.py
│ └── timer.py
├── requirements.txt
├── setup.py
├── tests/
│ ├── checkpoint.py
│ ├── env_tests/
│ │ └── env.py
│ ├── test_backbones.py
│ ├── test_box_coder.py
│ ├── test_configs.py
│ ├── test_data_samplers.py
│ ├── test_detectors.py
│ ├── test_fbnet.py
│ ├── test_feature_extractors.py
│ ├── test_metric_logger.py
│ ├── test_nms.py
│ ├── test_predictors.py
│ ├── test_rpn_heads.py
│ ├── test_segmentation_mask.py
│ └── utils.py
└── tools/
├── cityscapes/
│ ├── convert_cityscapes_to_coco.py
│ └── instances2dict_with_polygons.py
├── remove_solver_states.py
├── test_net.py
└── train_net.py
SYMBOL INDEX (736 symbols across 119 files)
FILE: demo/fcos_demo.py
function main (line 11) | def main():
FILE: demo/predictor.py
class COCODemo (line 14) | class COCODemo(object):
method __init__ (line 100) | def __init__(
method build_transform (line 132) | def build_transform(self):
method run_on_opencv_image (line 162) | def run_on_opencv_image(self, image):
method compute_prediction (line 187) | def compute_prediction(self, original_image):
method select_top_predictions (line 224) | def select_top_predictions(self, predictions):
method compute_colors_for_labels (line 247) | def compute_colors_for_labels(self, labels):
method overlay_boxes (line 255) | def overlay_boxes(self, image, predictions):
method overlay_mask (line 278) | def overlay_mask(self, image, predictions):
method overlay_keypoints (line 304) | def overlay_keypoints(self, image, predictions):
method create_mask_montage (line 313) | def create_mask_montage(self, image, predictions):
method overlay_class_names (line 349) | def overlay_class_names(self, image, predictions):
function vis_keypoints (line 378) | def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
FILE: demo/webcam.py
function main (line 11) | def main():
FILE: maskrcnn_benchmark/config/paths_catalog.py
class DatasetCatalog (line 7) | class DatasetCatalog(object):
method get (line 110) | def get(name):
class ModelCatalog (line 136) | class ModelCatalog(object):
method get (line 163) | def get(name):
method get_c2_imagenet_pretrained (line 171) | def get_c2_imagenet_pretrained(name):
method get_c2_detectron_12_2017_baselines (line 179) | def get_c2_detectron_12_2017_baselines(name):
FILE: maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
type PreCalc (line 6) | struct PreCalc {
function pre_calc_for_bilinear_interpolate (line 18) | void pre_calc_for_bilinear_interpolate(
function ROIAlignForward_cpu_kernel (line 114) | void ROIAlignForward_cpu_kernel(
function ROIAlign_forward_cpu (line 221) | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
FILE: maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp
function nms_cpu_kernel (line 6) | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
function nms_cpu (line 67) | at::Tensor nms_cpu(const at::Tensor& dets,
FILE: maskrcnn_benchmark/csrc/nms.h
function threshold (line 12) | float threshold) {
FILE: maskrcnn_benchmark/csrc/vision.cpp
function PYBIND11_MODULE (line 7) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: maskrcnn_benchmark/data/build.py
function build_dataset (line 17) | def build_dataset(dataset_list, transforms, dataset_catalog, is_train=Tr...
function make_data_sampler (line 59) | def make_data_sampler(dataset, shuffle, distributed):
function _quantize (line 69) | def _quantize(x, bins):
function _compute_aspect_ratios (line 76) | def _compute_aspect_ratios(dataset):
function make_batch_data_sampler (line 85) | def make_batch_data_sampler(
function make_data_loader (line 107) | def make_data_loader(cfg, is_train=True, is_distributed=False, start_ite...
FILE: maskrcnn_benchmark/data/collate_batch.py
class BatchCollator (line 5) | class BatchCollator(object):
method __init__ (line 12) | def __init__(self, size_divisible=0):
method __call__ (line 15) | def __call__(self, batch):
FILE: maskrcnn_benchmark/data/datasets/coco.py
function _count_visible_keypoints (line 13) | def _count_visible_keypoints(anno):
function _has_only_empty_bbox (line 17) | def _has_only_empty_bbox(anno):
function has_valid_annotation (line 21) | def has_valid_annotation(anno):
class COCODataset (line 39) | class COCODataset(torchvision.datasets.coco.CocoDetection):
method __init__ (line 40) | def __init__(
method __getitem__ (line 66) | def __getitem__(self, idx):
method get_img_info (line 98) | def get_img_info(self, index):
FILE: maskrcnn_benchmark/data/datasets/concat_dataset.py
class ConcatDataset (line 7) | class ConcatDataset(_ConcatDataset):
method get_idxs (line 13) | def get_idxs(self, idx):
method get_img_info (line 21) | def get_img_info(self, idx):
FILE: maskrcnn_benchmark/data/datasets/evaluation/__init__.py
function evaluate (line 7) | def evaluate(dataset, predictions, output_folder, **kwargs):
FILE: maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py
function coco_evaluation (line 4) | def coco_evaluation(
FILE: maskrcnn_benchmark/data/datasets/evaluation/coco/coco_eval.py
function do_coco_evaluation (line 13) | def do_coco_evaluation(
function prepare_for_coco_detection (line 70) | def prepare_for_coco_detection(predictions, dataset):
function prepare_for_coco_segmentation (line 104) | def prepare_for_coco_segmentation(predictions, dataset):
function prepare_for_coco_keypoint (line 158) | def prepare_for_coco_keypoint(predictions, dataset):
function evaluate_box_proposals (line 189) | def evaluate_box_proposals(
function evaluate_predictions_on_coco (line 305) | def evaluate_predictions_on_coco(
function compute_thresholds_for_classes (line 329) | def compute_thresholds_for_classes(coco_eval):
class COCOResults (line 358) | class COCOResults(object):
method __init__ (line 375) | def __init__(self, *iou_types):
method update (line 385) | def update(self, coco_eval):
method __repr__ (line 398) | def __repr__(self):
function check_expected_results (line 403) | def check_expected_results(results, expected_results, sigma_tol):
FILE: maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py
function voc_evaluation (line 6) | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
FILE: maskrcnn_benchmark/data/datasets/evaluation/voc/voc_eval.py
function do_voc_evaluation (line 12) | def do_voc_evaluation(dataset, predictions, output_folder, logger):
function eval_detection_voc (line 48) | def eval_detection_voc(pred_boxlists, gt_boxlists, iou_thresh=0.5, use_0...
function calc_detection_voc_prec_rec (line 68) | def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0...
function calc_detection_voc_ap (line 160) | def calc_detection_voc_ap(prec, rec, use_07_metric=False):
FILE: maskrcnn_benchmark/data/datasets/list_dataset.py
class ListDataset (line 11) | class ListDataset(object):
method __init__ (line 12) | def __init__(self, image_lists, transforms=None):
method __getitem__ (line 16) | def __getitem__(self, item):
method __len__ (line 28) | def __len__(self):
method get_img_info (line 31) | def get_img_info(self, item):
FILE: maskrcnn_benchmark/data/datasets/voc.py
class PascalVOCDataset (line 17) | class PascalVOCDataset(torch.utils.data.Dataset):
method __init__ (line 43) | def __init__(self, data_dir, split, use_difficult=False, transforms=No...
method __getitem__ (line 61) | def __getitem__(self, index):
method __len__ (line 73) | def __len__(self):
method get_groundtruth (line 76) | def get_groundtruth(self, index):
method _preprocess_annotation (line 87) | def _preprocess_annotation(self, target):
method get_img_info (line 126) | def get_img_info(self, index):
method map_class_id_to_class_name (line 133) | def map_class_id_to_class_name(self, class_id):
FILE: maskrcnn_benchmark/data/samplers/distributed.py
class DistributedSampler (line 10) | class DistributedSampler(Sampler):
method __init__ (line 25) | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
method __iter__ (line 42) | def __iter__(self):
method __len__ (line 62) | def __len__(self):
method set_epoch (line 65) | def set_epoch(self, epoch):
FILE: maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py
class GroupedBatchSampler (line 9) | class GroupedBatchSampler(BatchSampler):
method __init__ (line 24) | def __init__(self, sampler, group_ids, batch_size, drop_uneven=False):
method _prepare_batches (line 40) | def _prepare_batches(self):
method __iter__ (line 102) | def __iter__(self):
method __len__ (line 111) | def __len__(self):
FILE: maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py
class IterationBasedBatchSampler (line 5) | class IterationBasedBatchSampler(BatchSampler):
method __init__ (line 11) | def __init__(self, batch_sampler, num_iterations, start_iter=0):
method __iter__ (line 16) | def __iter__(self):
method __len__ (line 30) | def __len__(self):
FILE: maskrcnn_benchmark/data/transforms/build.py
function build_transforms (line 5) | def build_transforms(cfg, is_train=True):
FILE: maskrcnn_benchmark/data/transforms/transforms.py
class Compose (line 9) | class Compose(object):
method __init__ (line 10) | def __init__(self, transforms):
method __call__ (line 13) | def __call__(self, image, target):
method __repr__ (line 18) | def __repr__(self):
class Resize (line 27) | class Resize(object):
method __init__ (line 28) | def __init__(self, min_size, max_size):
method get_size (line 35) | def get_size(self, image_size):
method __call__ (line 57) | def __call__(self, image, target):
class RandomHorizontalFlip (line 64) | class RandomHorizontalFlip(object):
method __init__ (line 65) | def __init__(self, prob=0.5):
method __call__ (line 68) | def __call__(self, image, target):
class ToTensor (line 75) | class ToTensor(object):
method __call__ (line 76) | def __call__(self, image, target):
class Normalize (line 80) | class Normalize(object):
method __init__ (line 81) | def __init__(self, mean, std, to_bgr255=True):
method __call__ (line 86) | def __call__(self, image, target):
FILE: maskrcnn_benchmark/engine/inference.py
function compute_on_dataset (line 16) | def compute_on_dataset(model, data_loader, device, timer=None):
function _accumulate_predictions_from_multiple_gpus (line 37) | def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
function inference (line 59) | def inference(
FILE: maskrcnn_benchmark/engine/trainer.py
function reduce_loss_dict (line 13) | def reduce_loss_dict(loss_dict):
function do_train (line 38) | def do_train(
FILE: maskrcnn_benchmark/layers/_utils.py
function _load_C_extensions (line 14) | def _load_C_extensions():
FILE: maskrcnn_benchmark/layers/batch_norm.py
class FrozenBatchNorm2d (line 6) | class FrozenBatchNorm2d(nn.Module):
method __init__ (line 12) | def __init__(self, n):
method forward (line 19) | def forward(self, x):
FILE: maskrcnn_benchmark/layers/iou_loss.py
class IOULoss (line 5) | class IOULoss(nn.Module):
method __init__ (line 6) | def __init__(self, loc_loss_type):
method forward (line 10) | def forward(self, pred, target, weight=None):
FILE: maskrcnn_benchmark/layers/misc.py
class _NewEmptyTensorOp (line 17) | class _NewEmptyTensorOp(torch.autograd.Function):
method forward (line 19) | def forward(ctx, x, new_shape):
method backward (line 24) | def backward(ctx, grad):
class Conv2d (line 29) | class Conv2d(torch.nn.Conv2d):
method forward (line 30) | def forward(self, x):
class ConvTranspose2d (line 45) | class ConvTranspose2d(torch.nn.ConvTranspose2d):
method forward (line 46) | def forward(self, x):
class BatchNorm2d (line 66) | class BatchNorm2d(torch.nn.BatchNorm2d):
method forward (line 67) | def forward(self, x):
function interpolate (line 75) | def interpolate(
FILE: maskrcnn_benchmark/layers/roi_align.py
class _ROIAlign (line 11) | class _ROIAlign(Function):
method forward (line 13) | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
method backward (line 26) | def backward(ctx, grad_output):
class ROIAlign (line 50) | class ROIAlign(nn.Module):
method __init__ (line 51) | def __init__(self, output_size, spatial_scale, sampling_ratio):
method forward (line 57) | def forward(self, input, rois):
method __repr__ (line 62) | def __repr__(self):
FILE: maskrcnn_benchmark/layers/roi_pool.py
class _ROIPool (line 11) | class _ROIPool(Function):
method forward (line 13) | def forward(ctx, input, roi, output_size, spatial_scale):
method backward (line 25) | def backward(ctx, grad_output):
class ROIPool (line 49) | class ROIPool(nn.Module):
method __init__ (line 50) | def __init__(self, output_size, spatial_scale):
method forward (line 55) | def forward(self, input, rois):
method __repr__ (line 58) | def __repr__(self):
FILE: maskrcnn_benchmark/layers/scale.py
class Scale (line 5) | class Scale(nn.Module):
method __init__ (line 6) | def __init__(self, init_value=1.0):
method forward (line 10) | def forward(self, input):
FILE: maskrcnn_benchmark/layers/sigmoid_focal_loss.py
class _SigmoidFocalLoss (line 9) | class _SigmoidFocalLoss(Function):
method forward (line 11) | def forward(ctx, logits, targets, gamma, alpha):
method backward (line 25) | def backward(ctx, d_loss):
function sigmoid_focal_loss_cpu (line 40) | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
class SigmoidFocalLoss (line 55) | class SigmoidFocalLoss(nn.Module):
method __init__ (line 56) | def __init__(self, gamma, alpha):
method forward (line 61) | def forward(self, logits, targets):
method __repr__ (line 71) | def __repr__(self):
FILE: maskrcnn_benchmark/layers/smooth_l1_loss.py
function smooth_l1_loss (line 6) | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
FILE: maskrcnn_benchmark/modeling/backbone/backbone.py
function build_resnet_backbone (line 17) | def build_resnet_backbone(cfg):
function build_resnet_fpn_backbone (line 27) | def build_resnet_fpn_backbone(cfg):
function build_resnet_fpn_p3p7_backbone (line 51) | def build_resnet_fpn_p3p7_backbone(cfg):
function build_mnv2_fpn_backbone (line 76) | def build_mnv2_fpn_backbone(cfg):
function build_backbone (line 98) | def build_backbone(cfg):
FILE: maskrcnn_benchmark/modeling/backbone/fbnet.py
function create_builder (line 21) | def create_builder(cfg):
function _get_trunk_cfg (line 71) | def _get_trunk_cfg(arch_def):
class FBNetTrunk (line 79) | class FBNetTrunk(nn.Module):
method __init__ (line 80) | def __init__(
method forward (line 89) | def forward(self, x):
function add_conv_body (line 97) | def add_conv_body(cfg, dim_in=3):
function _get_rpn_stage (line 107) | def _get_rpn_stage(arch_def, num_blocks):
class FBNetRPNHead (line 121) | class FBNetRPNHead(nn.Module):
method __init__ (line 122) | def __init__(
method forward (line 138) | def forward(self, x):
function add_rpn_head (line 144) | def add_rpn_head(cfg, in_channels, num_anchors):
function _get_head_stage (line 157) | def _get_head_stage(arch, head_name, blocks):
class FBNetROIHead (line 174) | class FBNetROIHead(nn.Module):
method __init__ (line 175) | def __init__(
method forward (line 204) | def forward(self, x, proposals):
function add_roi_head (line 211) | def add_roi_head(cfg, in_channels):
function add_roi_head_keypoints (line 226) | def add_roi_head_keypoints(cfg, in_channels):
function add_roi_head_mask (line 241) | def add_roi_head_mask(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/backbone/fbnet_builder.py
function _py2_round (line 26) | def _py2_round(x):
function _get_divisible_by (line 30) | def _get_divisible_by(num, divisible_by, min_val):
class Identity (line 195) | class Identity(nn.Module):
method __init__ (line 196) | def __init__(self, C_in, C_out, stride):
method forward (line 213) | def forward(self, x):
class CascadeConv3x3 (line 221) | class CascadeConv3x3(nn.Sequential):
method __init__ (line 222) | def __init__(self, C_in, C_out, stride):
method forward (line 234) | def forward(self, x):
class Shift (line 241) | class Shift(nn.Module):
method __init__ (line 242) | def __init__(self, C, kernel_size, stride, padding):
method forward (line 269) | def forward(self, x):
class ShiftBlock5x5 (line 295) | class ShiftBlock5x5(nn.Sequential):
method __init__ (line 296) | def __init__(self, C_in, C_out, expansion, stride):
method forward (line 315) | def forward(self, x):
class ChannelShuffle (line 322) | class ChannelShuffle(nn.Module):
method __init__ (line 323) | def __init__(self, groups):
method forward (line 327) | def forward(self, x):
class ConvBNRelu (line 342) | class ConvBNRelu(nn.Sequential):
method __init__ (line 343) | def __init__(
class SEModule (line 397) | class SEModule(nn.Module):
method __init__ (line 400) | def __init__(self, C):
method forward (line 410) | def forward(self, x):
class Upsample (line 414) | class Upsample(nn.Module):
method __init__ (line 415) | def __init__(self, scale_factor, mode, align_corners=None):
method forward (line 421) | def forward(self, x):
function _get_upsample_op (line 428) | def _get_upsample_op(stride):
class IRFBlock (line 445) | class IRFBlock(nn.Module):
method __init__ (line 446) | def __init__(
method forward (line 549) | def forward(self, x):
function _expand_block_cfg (line 563) | def _expand_block_cfg(block_cfg):
function expand_stage_cfg (line 574) | def expand_stage_cfg(stage_cfg):
function expand_stages_cfg (line 583) | def expand_stages_cfg(stage_cfgs):
function _block_cfgs_to_list (line 592) | def _block_cfgs_to_list(block_cfgs):
function _add_to_arch (line 603) | def _add_to_arch(arch, info, name):
function unify_arch_def (line 641) | def unify_arch_def(arch_def):
function get_num_stages (line 671) | def get_num_stages(arch_def):
function get_blocks (line 679) | def get_blocks(arch_def, stage_indices=None, block_indices=None):
class FBNetBuilder (line 693) | class FBNetBuilder(object):
method __init__ (line 694) | def __init__(
method add_first (line 709) | def add_first(self, stage_info, dim_in=3, pad=True):
method add_blocks (line 732) | def add_blocks(self, blocks):
method add_last (line 754) | def add_last(self, stage_info):
method _add_ir_block (line 795) | def _add_ir_block(
method add_ir_block (line 811) | def add_ir_block(self, tcns, block_op_types, **kwargs):
method _get_divisible_width (line 827) | def _get_divisible_width(self, width):
FILE: maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py
function add_archs (line 4) | def add_archs(archs):
FILE: maskrcnn_benchmark/modeling/backbone/fpn.py
class FPN (line 7) | class FPN(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 43) | def forward(self, x):
class LastLevelMaxPool (line 77) | class LastLevelMaxPool(nn.Module):
method forward (line 78) | def forward(self, x):
class LastLevelP6P7 (line 82) | class LastLevelP6P7(nn.Module):
method __init__ (line 86) | def __init__(self, in_channels, out_channels):
method forward (line 95) | def forward(self, c5, p5):
FILE: maskrcnn_benchmark/modeling/backbone/mobilenet.py
function conv_bn (line 11) | def conv_bn(inp, oup, stride):
function conv_1x1_bn (line 19) | def conv_1x1_bn(inp, oup):
class InvertedResidual (line 27) | class InvertedResidual(nn.Module):
method __init__ (line 28) | def __init__(self, inp, oup, stride, expand_ratio):
method forward (line 61) | def forward(self, x):
class MobileNetV2 (line 68) | class MobileNetV2(nn.Module):
method __init__ (line 72) | def __init__(self, cfg, n_class=1000, input_size=224, width_mult=1.):
method _freeze_backbone (line 108) | def _freeze_backbone(self, freeze_at):
method forward (line 113) | def forward(self, x):
method _initialize_weights (line 121) | def _initialize_weights(self):
FILE: maskrcnn_benchmark/modeling/backbone/resnet.py
class ResNet (line 80) | class ResNet(nn.Module):
method __init__ (line 81) | def __init__(self, cfg):
method _freeze_backbone (line 127) | def _freeze_backbone(self, freeze_at):
method forward (line 138) | def forward(self, x):
class ResNetHead (line 148) | class ResNetHead(nn.Module):
method __init__ (line 149) | def __init__(
method forward (line 192) | def forward(self, x):
function _make_stage (line 198) | def _make_stage(
class Bottleneck (line 228) | class Bottleneck(nn.Module):
method __init__ (line 229) | def __init__(
method forward (line 295) | def forward(self, x):
class BaseStem (line 318) | class BaseStem(nn.Module):
method __init__ (line 319) | def __init__(self, cfg, norm_func):
method forward (line 332) | def forward(self, x):
class BottleneckWithFixedBatchNorm (line 340) | class BottleneckWithFixedBatchNorm(Bottleneck):
method __init__ (line 341) | def __init__(
class StemWithFixedBatchNorm (line 363) | class StemWithFixedBatchNorm(BaseStem):
method __init__ (line 364) | def __init__(self, cfg):
class BottleneckWithGN (line 370) | class BottleneckWithGN(Bottleneck):
method __init__ (line 371) | def __init__(
class StemWithGN (line 393) | class StemWithGN(BaseStem):
method __init__ (line 394) | def __init__(self, cfg):
FILE: maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py
class BalancedPositiveNegativeSampler (line 5) | class BalancedPositiveNegativeSampler(object):
method __init__ (line 10) | def __init__(self, batch_size_per_image, positive_fraction):
method __call__ (line 19) | def __call__(self, matched_idxs):
FILE: maskrcnn_benchmark/modeling/box_coder.py
class BoxCoder (line 7) | class BoxCoder(object):
method __init__ (line 13) | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
method encode (line 22) | def encode(self, reference_boxes, proposals):
method decode (line 52) | def decode(self, rel_codes, boxes):
FILE: maskrcnn_benchmark/modeling/detector/detectors.py
function build_detection_model (line 8) | def build_detection_model(cfg):
FILE: maskrcnn_benchmark/modeling/detector/generalized_rcnn.py
class GeneralizedRCNN (line 16) | class GeneralizedRCNN(nn.Module):
method __init__ (line 26) | def __init__(self, cfg):
method forward (line 33) | def forward(self, images, targets=None):
FILE: maskrcnn_benchmark/modeling/make_layers.py
function get_group_gn (line 14) | def get_group_gn(dim, dim_per_gp, num_groups):
function group_norm (line 31) | def group_norm(out_channels, affine=True, divisor=1):
function make_conv3x3 (line 44) | def make_conv3x3(
function make_fc (line 80) | def make_fc(dim_in, hidden_dim, use_gn=False):
function conv_with_kaiming_uniform (line 95) | def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
FILE: maskrcnn_benchmark/modeling/matcher.py
class Matcher (line 5) | class Matcher(object):
method __init__ (line 23) | def __init__(self, high_threshold, low_threshold, allow_low_quality_ma...
method __call__ (line 42) | def __call__(self, match_quality_matrix):
method set_low_quality_matches_ (line 83) | def set_low_quality_matches_(self, matches, all_matches, match_quality...
FILE: maskrcnn_benchmark/modeling/poolers.py
class LevelMapper (line 11) | class LevelMapper(object):
method __init__ (line 16) | def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=...
method __call__ (line 31) | def __call__(self, boxlists):
class Pooler (line 45) | class Pooler(nn.Module):
method __init__ (line 55) | def __init__(self, output_size, scales, sampling_ratio):
method convert_to_roi_format (line 78) | def convert_to_roi_format(self, boxes):
method forward (line 91) | def forward(self, x, boxes):
function make_pooler (line 124) | def make_pooler(cfg, head_name):
FILE: maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py
class ROIBoxHead (line 11) | class ROIBoxHead(torch.nn.Module):
method __init__ (line 16) | def __init__(self, cfg, in_channels):
method forward (line 24) | def forward(self, features, proposals, targets=None):
function build_roi_box_head (line 65) | def build_roi_box_head(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py
class PostProcessor (line 12) | class PostProcessor(nn.Module):
method __init__ (line 19) | def __init__(
method forward (line 43) | def forward(self, x, boxes):
method prepare_boxlist (line 86) | def prepare_boxlist(self, boxes, scores, image_shape):
method filter_results (line 105) | def filter_results(self, boxlist, num_classes):
function make_roi_box_post_processor (line 149) | def make_roi_box_post_processor(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
class FastRCNNLossComputation (line 15) | class FastRCNNLossComputation(object):
method __init__ (line 21) | def __init__(
method match_targets_to_proposals (line 39) | def match_targets_to_proposals(self, proposal, target):
method prepare_targets (line 52) | def prepare_targets(self, proposals, targets):
method subsample (line 82) | def subsample(self, proposals, targets):
method __call__ (line 118) | def __call__(self, class_logits, box_regression):
function make_roi_box_loss_evaluator (line 170) | def make_roi_box_loss_evaluator(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py
class ResNet50Conv5ROIFeatureExtractor (line 14) | class ResNet50Conv5ROIFeatureExtractor(nn.Module):
method __init__ (line 15) | def __init__(self, config, in_channels):
method forward (line 43) | def forward(self, x, proposals):
class FPN2MLPFeatureExtractor (line 50) | class FPN2MLPFeatureExtractor(nn.Module):
method __init__ (line 55) | def __init__(self, cfg, in_channels):
method forward (line 74) | def forward(self, x, proposals):
class FPNXconv1fcFeatureExtractor (line 85) | class FPNXconv1fcFeatureExtractor(nn.Module):
method __init__ (line 90) | def __init__(self, cfg, in_channels):
method forward (line 139) | def forward(self, x, proposals):
function make_roi_box_feature_extractor (line 147) | def make_roi_box_feature_extractor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py
class FastRCNNPredictor (line 7) | class FastRCNNPredictor(nn.Module):
method __init__ (line 8) | def __init__(self, config, in_channels):
method forward (line 26) | def forward(self, x):
class FPNPredictor (line 35) | class FPNPredictor(nn.Module):
method __init__ (line 36) | def __init__(self, cfg, in_channels):
method forward (line 50) | def forward(self, x):
function make_roi_box_predictor (line 60) | def make_roi_box_predictor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/keypoint_head/inference.py
class KeypointPostProcessor (line 5) | class KeypointPostProcessor(nn.Module):
method __init__ (line 6) | def __init__(self, keypointer=None):
method forward (line 10) | def forward(self, x, boxes):
function heatmaps_to_keypoints (line 40) | def heatmaps_to_keypoints(maps, rois):
class Keypointer (line 101) | class Keypointer(object):
method __init__ (line 107) | def __init__(self, padding=0):
method __call__ (line 110) | def __call__(self, masks, boxes):
function make_roi_keypoint_post_processor (line 122) | def make_roi_keypoint_post_processor(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py
class ROIKeypointHead (line 9) | class ROIKeypointHead(torch.nn.Module):
method __init__ (line 10) | def __init__(self, cfg, in_channels):
method forward (line 19) | def forward(self, features, proposals, targets=None):
function build_roi_keypoint_head (line 50) | def build_roi_keypoint_head(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/keypoint_head/loss.py
function project_keypoints_to_heatmap (line 17) | def project_keypoints_to_heatmap(keypoints, proposals, discretization_si...
function cat_boxlist_with_keypoints (line 24) | def cat_boxlist_with_keypoints(boxlists):
function _within_box (line 39) | def _within_box(points, boxes):
class KeypointRCNNLossComputation (line 54) | class KeypointRCNNLossComputation(object):
method __init__ (line 55) | def __init__(self, proposal_matcher, fg_bg_sampler, discretization_size):
method match_targets_to_proposals (line 66) | def match_targets_to_proposals(self, proposal, target):
method prepare_targets (line 79) | def prepare_targets(self, proposals, targets):
method subsample (line 111) | def subsample(self, proposals, targets):
method __call__ (line 145) | def __call__(self, proposals, keypoint_logits):
function make_roi_keypoint_loss_evaluator (line 172) | def make_roi_keypoint_loss_evaluator(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py
class KeypointRCNNFeatureExtractor (line 11) | class KeypointRCNNFeatureExtractor(nn.Module):
method __init__ (line 12) | def __init__(self, cfg, in_channels):
method forward (line 39) | def forward(self, x, proposals):
function make_roi_keypoint_feature_extractor (line 46) | def make_roi_keypoint_feature_extractor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py
class KeypointRCNNPredictor (line 8) | class KeypointRCNNPredictor(nn.Module):
method __init__ (line 9) | def __init__(self, cfg, in_channels):
method forward (line 28) | def forward(self, x):
function make_roi_keypoint_predictor (line 36) | def make_roi_keypoint_predictor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py
class MaskPostProcessor (line 12) | class MaskPostProcessor(nn.Module):
method __init__ (line 23) | def __init__(self, masker=None):
method forward (line 27) | def forward(self, x, boxes):
class MaskPostProcessorCOCOFormat (line 64) | class MaskPostProcessorCOCOFormat(MaskPostProcessor):
method forward (line 71) | def forward(self, x, boxes):
function expand_boxes (line 91) | def expand_boxes(boxes, scale):
function expand_masks (line 108) | def expand_masks(mask, padding):
function paste_mask_in_image (line 118) | def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1):
class Masker (line 157) | class Masker(object):
method __init__ (line 163) | def __init__(self, threshold=0.5, padding=1):
method forward_single_image (line 167) | def forward_single_image(self, masks, boxes):
method __call__ (line 180) | def __call__(self, masks, boxes):
function make_roi_mask_post_processor (line 197) | def make_roi_mask_post_processor(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py
function project_masks_on_boxes (line 11) | def project_masks_on_boxes(segmentation_masks, proposals, discretization...
class MaskRCNNLossComputation (line 45) | class MaskRCNNLossComputation(object):
method __init__ (line 46) | def __init__(self, proposal_matcher, discretization_size):
method match_targets_to_proposals (line 55) | def match_targets_to_proposals(self, proposal, target):
method prepare_targets (line 68) | def prepare_targets(self, proposals, targets):
method __call__ (line 102) | def __call__(self, proposals, mask_logits, targets):
function make_roi_mask_loss_evaluator (line 131) | def make_roi_mask_loss_evaluator(cfg):
FILE: maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py
function keep_only_positive_boxes (line 13) | def keep_only_positive_boxes(boxes):
class ROIMaskHead (line 36) | class ROIMaskHead(torch.nn.Module):
method __init__ (line 37) | def __init__(self, cfg, in_channels):
method forward (line 46) | def forward(self, features, proposals, targets=None):
function build_roi_mask_head (line 82) | def build_roi_mask_head(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py
class MaskRCNNFPNFeatureExtractor (line 17) | class MaskRCNNFPNFeatureExtractor(nn.Module):
method __init__ (line 22) | def __init__(self, cfg, in_channels):
method forward (line 59) | def forward(self, x, proposals):
function make_roi_mask_feature_extractor (line 68) | def make_roi_mask_feature_extractor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py
class MaskRCNNC4Predictor (line 11) | class MaskRCNNC4Predictor(nn.Module):
method __init__ (line 12) | def __init__(self, cfg, in_channels):
method forward (line 29) | def forward(self, x):
class MaskRCNNConv1x1Predictor (line 35) | class MaskRCNNConv1x1Predictor(nn.Module):
method __init__ (line 36) | def __init__(self, cfg, in_channels):
method forward (line 51) | def forward(self, x):
function make_roi_mask_predictor (line 55) | def make_roi_mask_predictor(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/roi_heads/roi_heads.py
class CombinedROIHeads (line 9) | class CombinedROIHeads(torch.nn.ModuleDict):
method __init__ (line 15) | def __init__(self, cfg, heads):
method forward (line 23) | def forward(self, features, proposals, targets=None):
function build_roi_heads (line 58) | def build_roi_heads(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/rpn/anchor_generator.py
class BufferList (line 11) | class BufferList(nn.Module):
method __init__ (line 16) | def __init__(self, buffers=None):
method extend (line 21) | def extend(self, buffers):
method __len__ (line 27) | def __len__(self):
method __iter__ (line 30) | def __iter__(self):
class AnchorGenerator (line 34) | class AnchorGenerator(nn.Module):
method __init__ (line 40) | def __init__(
method num_anchors_per_location (line 70) | def num_anchors_per_location(self):
method grid_anchors (line 73) | def grid_anchors(self, grid_sizes):
method add_visibility_to (line 97) | def add_visibility_to(self, boxlist):
method forward (line 112) | def forward(self, image_list, feature_maps):
function make_anchor_generator (line 128) | def make_anchor_generator(config):
function make_anchor_generator_retinanet (line 146) | def make_anchor_generator_retinanet(config):
function generate_anchors (line 220) | def generate_anchors(
function _generate_anchors (line 234) | def _generate_anchors(base_size, scales, aspect_ratios):
function _whctrs (line 246) | def _whctrs(anchor):
function _mkanchors (line 255) | def _mkanchors(ws, hs, x_ctr, y_ctr):
function _ratio_enum (line 272) | def _ratio_enum(anchor, ratios):
function _scale_enum (line 283) | def _scale_enum(anchor, scales):
FILE: maskrcnn_benchmark/modeling/rpn/fcos/fcos.py
class FCOSHead (line 12) | class FCOSHead(torch.nn.Module):
method __init__ (line 13) | def __init__(self, cfg, in_channels):
method forward (line 80) | def forward(self, x):
class FCOSModule (line 94) | class FCOSModule(torch.nn.Module):
method __init__ (line 100) | def __init__(self, cfg, in_channels):
method forward (line 114) | def forward(self, images, features, targets=None):
method _forward_train (line 144) | def _forward_train(self, locations, box_cls, box_regression, centernes...
method _forward_test (line 155) | def _forward_test(self, locations, box_cls, box_regression, centerness...
method compute_locations (line 162) | def compute_locations(self, features):
method compute_locations_per_level (line 173) | def compute_locations_per_level(self, h, w, stride, device):
method get_dense_locations (line 189) | def get_dense_locations(self, locations, stride, device):
function build_fcos (line 211) | def build_fcos(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/rpn/fcos/inference.py
class FCOSPostProcessor (line 14) | class FCOSPostProcessor(torch.nn.Module):
method __init__ (line 19) | def __init__(self, pre_nms_thresh, pre_nms_top_n, nms_thresh,
method forward_for_single_feature_map (line 40) | def forward_for_single_feature_map(
method forward (line 107) | def forward(self, locations, box_cls, box_regression, centerness, imag...
method select_over_all_levels (line 136) | def select_over_all_levels(self, boxlists):
function make_fcos_postprocessor (line 182) | def make_fcos_postprocessor(config):
FILE: maskrcnn_benchmark/modeling/rpn/fcos/loss.py
class FCOSLossComputation (line 22) | class FCOSLossComputation(object):
method __init__ (line 27) | def __init__(self, cfg):
method get_sample_region (line 42) | def get_sample_region(self, gt, strides, num_points_per, gt_xs, gt_ys,...
method prepare_targets (line 74) | def prepare_targets(self, points, targets):
method compute_targets_for_locations (line 114) | def compute_targets_for_locations(self, locations, targets, object_siz...
method compute_centerness_targets (line 165) | def compute_centerness_targets(self, reg_targets):
method __call__ (line 172) | def __call__(self, locations, box_cls, box_regression, centerness, tar...
function make_fcos_loss_evaluator (line 235) | def make_fcos_loss_evaluator(cfg):
FILE: maskrcnn_benchmark/modeling/rpn/inference.py
class RPNPostProcessor (line 13) | class RPNPostProcessor(torch.nn.Module):
method __init__ (line 19) | def __init__(
method add_gt_proposals (line 51) | def add_gt_proposals(self, proposals, targets):
method forward_for_single_feature_map (line 74) | def forward_for_single_feature_map(self, anchors, objectness, box_regr...
method forward (line 123) | def forward(self, anchors, objectness, box_regression, targets=None):
method select_over_all_levels (line 152) | def select_over_all_levels(self, boxlists):
function make_rpn_postprocessor (line 182) | def make_rpn_postprocessor(config, rpn_box_coder, is_train):
FILE: maskrcnn_benchmark/modeling/rpn/loss.py
class RPNLossComputation (line 21) | class RPNLossComputation(object):
method __init__ (line 26) | def __init__(self, proposal_matcher, fg_bg_sampler, box_coder,
method match_targets_to_anchors (line 42) | def match_targets_to_anchors(self, anchor, target, copied_fields=[]):
method prepare_targets (line 56) | def prepare_targets(self, anchors, targets):
method __call__ (line 92) | def __call__(self, anchors, objectness, box_regression, targets):
function generate_rpn_labels (line 134) | def generate_rpn_labels(matched_targets):
function make_rpn_loss_evaluator (line 140) | def make_rpn_loss_evaluator(cfg, box_coder):
FILE: maskrcnn_benchmark/modeling/rpn/retinanet/inference.py
class RetinaNetPostProcessor (line 14) | class RetinaNetPostProcessor(RPNPostProcessor):
method __init__ (line 19) | def __init__(
method add_gt_proposals (line 53) | def add_gt_proposals(self, proposals, targets):
method forward_for_single_feature_map (line 59) | def forward_for_single_feature_map(
method select_over_all_levels (line 131) | def select_over_all_levels(self, boxlists):
function make_retinanet_postprocessor (line 177) | def make_retinanet_postprocessor(config, rpn_box_coder, is_train):
FILE: maskrcnn_benchmark/modeling/rpn/retinanet/loss.py
class RetinaNetLossComputation (line 19) | class RetinaNetLossComputation(RPNLossComputation):
method __init__ (line 24) | def __init__(self, proposal_matcher, box_coder,
method __call__ (line 43) | def __call__(self, anchors, box_cls, box_regression, targets):
function generate_retinanet_labels (line 83) | def generate_retinanet_labels(matched_targets):
function make_retinanet_loss_evaluator (line 88) | def make_retinanet_loss_evaluator(cfg, box_coder):
FILE: maskrcnn_benchmark/modeling/rpn/retinanet/retinanet.py
class RetinaNetHead (line 13) | class RetinaNetHead(torch.nn.Module):
method __init__ (line 18) | def __init__(self, cfg, in_channels):
method forward (line 79) | def forward(self, x):
class RetinaNetModule (line 88) | class RetinaNetModule(torch.nn.Module):
method __init__ (line 94) | def __init__(self, cfg, in_channels):
method forward (line 112) | def forward(self, images, features, targets=None):
method _forward_train (line 135) | def _forward_train(self, anchors, box_cls, box_regression, targets):
method _forward_test (line 146) | def _forward_test(self, anchors, box_cls, box_regression):
function build_retinanet (line 151) | def build_retinanet(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/rpn/rpn.py
class RPNHeadConvRegressor (line 15) | class RPNHeadConvRegressor(nn.Module):
method __init__ (line 20) | def __init__(self, cfg, in_channels, num_anchors):
method forward (line 37) | def forward(self, x):
class RPNHeadFeatureSingleConv (line 45) | class RPNHeadFeatureSingleConv(nn.Module):
method __init__ (line 50) | def __init__(self, cfg, in_channels):
method forward (line 67) | def forward(self, x):
class RPNHead (line 75) | class RPNHead(nn.Module):
method __init__ (line 80) | def __init__(self, cfg, in_channels, num_anchors):
method forward (line 100) | def forward(self, x):
class RPNModule (line 110) | class RPNModule(torch.nn.Module):
method __init__ (line 116) | def __init__(self, cfg, in_channels):
method forward (line 141) | def forward(self, images, features, targets=None):
method _forward_train (line 164) | def _forward_train(self, anchors, objectness, rpn_box_regression, targ...
method _forward_test (line 187) | def _forward_test(self, anchors, objectness, rpn_box_regression):
function build_rpn (line 201) | def build_rpn(cfg, in_channels):
FILE: maskrcnn_benchmark/modeling/rpn/utils.py
function permute_and_flatten (line 10) | def permute_and_flatten(layer, N, A, C, H, W):
function concat_box_prediction_layers (line 17) | def concat_box_prediction_layers(box_cls, box_regression):
FILE: maskrcnn_benchmark/modeling/utils.py
function cat (line 9) | def cat(tensors, dim=0):
FILE: maskrcnn_benchmark/solver/build.py
function make_optimizer (line 7) | def make_optimizer(cfg, model):
function make_lr_scheduler (line 23) | def make_lr_scheduler(cfg, optimizer):
FILE: maskrcnn_benchmark/solver/lr_scheduler.py
class WarmupMultiStepLR (line 10) | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
method __init__ (line 11) | def __init__(
method get_lr (line 39) | def get_lr(self):
FILE: maskrcnn_benchmark/structures/bounding_box.py
class BoxList (line 9) | class BoxList(object):
method __init__ (line 19) | def __init__(self, bbox, image_size, mode="xyxy"):
method add_field (line 39) | def add_field(self, field, field_data):
method get_field (line 42) | def get_field(self, field):
method has_field (line 45) | def has_field(self, field):
method fields (line 48) | def fields(self):
method _copy_extra_fields (line 51) | def _copy_extra_fields(self, bbox):
method convert (line 55) | def convert(self, mode):
method _split_into_xyxy (line 75) | def _split_into_xyxy(self):
method resize (line 91) | def resize(self, size, *args, **kwargs):
method transpose (line 129) | def transpose(self, method):
method crop (line 167) | def crop(self, box):
method to (line 197) | def to(self, device):
method __getitem__ (line 205) | def __getitem__(self, item):
method __len__ (line 211) | def __len__(self):
method clip_to_image (line 214) | def clip_to_image(self, remove_empty=True):
method area (line 226) | def area(self):
method copy_with_fields (line 238) | def copy_with_fields(self, fields, skip_missing=False):
method __repr__ (line 249) | def __repr__(self):
FILE: maskrcnn_benchmark/structures/boxlist_ops.py
function boxlist_nms (line 9) | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scor...
function remove_small_boxes (line 34) | def remove_small_boxes(boxlist, min_size):
function boxlist_iou (line 53) | def boxlist_iou(boxlist1, boxlist2):
function _cat (line 92) | def _cat(tensors, dim=0):
function cat_boxlist (line 102) | def cat_boxlist(bboxes):
FILE: maskrcnn_benchmark/structures/image_list.py
class ImageList (line 7) | class ImageList(object):
method __init__ (line 15) | def __init__(self, tensors, image_sizes):
method to (line 24) | def to(self, *args, **kwargs):
function to_image_list (line 29) | def to_image_list(tensors, size_divisible=0):
FILE: maskrcnn_benchmark/structures/keypoint.py
class Keypoints (line 8) | class Keypoints(object):
method __init__ (line 9) | def __init__(self, keypoints, size, mode=None):
method crop (line 26) | def crop(self, box):
method resize (line 29) | def resize(self, size, *args, **kwargs):
method transpose (line 40) | def transpose(self, method):
method to (line 61) | def to(self, *args, **kwargs):
method __getitem__ (line 69) | def __getitem__(self, item):
method add_field (line 75) | def add_field(self, field, field_data):
method get_field (line 78) | def get_field(self, field):
method __repr__ (line 81) | def __repr__(self):
function _create_flip_indices (line 89) | def _create_flip_indices(names, flip_map):
class PersonKeypoints (line 97) | class PersonKeypoints(Keypoints):
function kp_connections (line 131) | def kp_connections(keypoints):
function keypoints_to_heat_map (line 154) | def keypoints_to_heat_map(keypoints, rois, heatmap_size):
FILE: maskrcnn_benchmark/structures/segmentation_mask.py
class BinaryMaskList (line 33) | class BinaryMaskList(object):
method __init__ (line 38) | def __init__(self, masks, size):
method transpose (line 84) | def transpose(self, method):
method crop (line 89) | def crop(self, box):
method resize (line 110) | def resize(self, size):
method convert_to_polygon (line 131) | def convert_to_polygon(self):
method to (line 135) | def to(self, *args, **kwargs):
method _findContours (line 138) | def _findContours(self):
method __len__ (line 155) | def __len__(self):
method __getitem__ (line 158) | def __getitem__(self, index):
method __iter__ (line 164) | def __iter__(self):
method __repr__ (line 167) | def __repr__(self):
class PolygonInstance (line 175) | class PolygonInstance(object):
method __init__ (line 182) | def __init__(self, polygons, size):
method transpose (line 215) | def transpose(self, method):
method crop (line 238) | def crop(self, box):
method resize (line 266) | def resize(self, size):
method convert_to_binarymask (line 290) | def convert_to_binarymask(self):
method __len__ (line 300) | def __len__(self):
method __repr__ (line 303) | def __repr__(self):
class PolygonList (line 311) | class PolygonList(object):
method __init__ (line 316) | def __init__(self, polygons, size):
method transpose (line 365) | def transpose(self, method):
method crop (line 377) | def crop(self, box):
method resize (line 386) | def resize(self, size):
method to (line 394) | def to(self, *args, **kwargs):
method convert_to_binarymask (line 397) | def convert_to_binarymask(self):
method __len__ (line 406) | def __len__(self):
method __getitem__ (line 409) | def __getitem__(self, item):
method __iter__ (line 425) | def __iter__(self):
method __repr__ (line 428) | def __repr__(self):
class SegmentationMask (line 436) | class SegmentationMask(object):
method __init__ (line 443) | def __init__(self, instances, size, mode="poly"):
method transpose (line 472) | def transpose(self, method):
method crop (line 476) | def crop(self, box):
method resize (line 481) | def resize(self, size, *args, **kwargs):
method to (line 486) | def to(self, *args, **kwargs):
method convert (line 489) | def convert(self, mode):
method get_mask_tensor (line 502) | def get_mask_tensor(self):
method __len__ (line 509) | def __len__(self):
method __getitem__ (line 512) | def __getitem__(self, item):
method __iter__ (line 516) | def __iter__(self):
method __next__ (line 520) | def __next__(self):
method __repr__ (line 527) | def __repr__(self):
FILE: maskrcnn_benchmark/utils/c2_model_loading.py
function _rename_basic_resnet_weights (line 12) | def _rename_basic_resnet_weights(layer_keys):
function _rename_fpn_weights (line 64) | def _rename_fpn_weights(layer_keys, stage_names):
function _rename_weights_for_resnet (line 84) | def _rename_weights_for_resnet(weights, stage_names):
function _load_c2_pickled_weights (line 133) | def _load_c2_pickled_weights(file_path):
function load_resnet_c2_format (line 164) | def load_resnet_c2_format(cfg, f):
function load_c2_format (line 174) | def load_c2_format(cfg, f):
FILE: maskrcnn_benchmark/utils/checkpoint.py
class Checkpointer (line 13) | class Checkpointer(object):
method __init__ (line 14) | def __init__(
method save (line 32) | def save(self, name, **kwargs):
method load (line 52) | def load(self, f=None):
method has_checkpoint (line 73) | def has_checkpoint(self):
method get_checkpoint_file (line 77) | def get_checkpoint_file(self):
method tag_last_checkpoint (line 89) | def tag_last_checkpoint(self, last_filename):
method _load_file (line 94) | def _load_file(self, f):
method _load_model (line 97) | def _load_model(self, checkpoint):
class DetectronCheckpointer (line 101) | class DetectronCheckpointer(Checkpointer):
method __init__ (line 102) | def __init__(
method _load_file (line 117) | def _load_file(self, f):
FILE: maskrcnn_benchmark/utils/collect_env.py
function get_pil_version (line 7) | def get_pil_version():
function collect_env_info (line 11) | def collect_env_info():
FILE: maskrcnn_benchmark/utils/comm.py
function get_world_size (line 13) | def get_world_size():
function get_rank (line 21) | def get_rank():
function is_main_process (line 29) | def is_main_process():
function synchronize (line 33) | def synchronize():
function all_gather (line 48) | def all_gather(data):
function reduce_dict (line 91) | def reduce_dict(input_dict, average=True):
function is_pytorch_1_1_0_or_later (line 120) | def is_pytorch_1_1_0_or_later():
FILE: maskrcnn_benchmark/utils/cv2_util.py
function findContours (line 8) | def findContours(*args, **kwargs):
FILE: maskrcnn_benchmark/utils/env.py
function setup_environment (line 7) | def setup_environment():
function setup_custom_environment (line 20) | def setup_custom_environment(custom_module_path):
FILE: maskrcnn_benchmark/utils/imports.py
function import_file (line 11) | def import_file(module_name, file_path, make_importable=False):
function import_file (line 21) | def import_file(module_name, file_path, make_importable=None):
FILE: maskrcnn_benchmark/utils/logger.py
function setup_logger (line 7) | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
FILE: maskrcnn_benchmark/utils/metric_logger.py
class SmoothedValue (line 8) | class SmoothedValue(object):
method __init__ (line 13) | def __init__(self, window_size=20):
method update (line 19) | def update(self, value):
method median (line 26) | def median(self):
method avg (line 31) | def avg(self):
method global_avg (line 36) | def global_avg(self):
class MetricLogger (line 40) | class MetricLogger(object):
method __init__ (line 41) | def __init__(self, delimiter="\t"):
method update (line 45) | def update(self, **kwargs):
method __getattr__ (line 52) | def __getattr__(self, attr):
method __str__ (line 60) | def __str__(self):
FILE: maskrcnn_benchmark/utils/miscellaneous.py
function mkdir (line 6) | def mkdir(path):
FILE: maskrcnn_benchmark/utils/model_serialization.py
function align_and_update_state_dicts (line 10) | def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
function strip_prefix_if_present (line 61) | def strip_prefix_if_present(state_dict, prefix):
function load_state_dict (line 71) | def load_state_dict(model, loaded_state_dict):
FILE: maskrcnn_benchmark/utils/model_zoo.py
function cache_url (line 20) | def cache_url(url, model_dir=None, progress=True):
FILE: maskrcnn_benchmark/utils/registry.py
function _register_generic (line 4) | def _register_generic(module_dict, module_name, module):
class Registry (line 9) | class Registry(dict):
method __init__ (line 31) | def __init__(self, *args, **kwargs):
method register (line 34) | def register(self, module_name, module=None):
FILE: maskrcnn_benchmark/utils/timer.py
class Timer (line 8) | class Timer(object):
method __init__ (line 9) | def __init__(self):
method average_time (line 13) | def average_time(self):
method tic (line 16) | def tic(self):
method toc (line 21) | def toc(self, average=True):
method add (line 28) | def add(self, time_diff):
method reset (line 33) | def reset(self):
method avg_time_str (line 39) | def avg_time_str(self):
function get_time_str (line 44) | def get_time_str(time_diff):
FILE: setup.py
function get_extensions (line 17) | def get_extensions():
FILE: tests/checkpoint.py
class TestCheckpointer (line 14) | class TestCheckpointer(unittest.TestCase):
method create_model (line 15) | def create_model(self):
method create_complex_model (line 18) | def create_complex_model(self):
method test_from_last_checkpoint_model (line 36) | def test_from_last_checkpoint_model(self):
method test_from_name_file_model (line 71) | def test_from_name_file_model(self):
method test_complex_model_loaded (line 103) | def test_complex_model_loaded(self):
FILE: tests/env_tests/env.py
function get_config_root_path (line 6) | def get_config_root_path():
FILE: tests/test_backbones.py
class TestBackbones (line 23) | class TestBackbones(unittest.TestCase):
method test_build_backbones (line 24) | def test_build_backbones(self):
FILE: tests/test_box_coder.py
class TestBoxCoder (line 10) | class TestBoxCoder(unittest.TestCase):
method test_box_decoder (line 11) | def test_box_decoder(self):
FILE: tests/test_configs.py
class TestConfigs (line 9) | class TestConfigs(unittest.TestCase):
method test_configs_load (line 10) | def test_configs_load(self):
FILE: tests/test_data_samplers.py
class SubsetSampler (line 15) | class SubsetSampler(Sampler):
method __init__ (line 16) | def __init__(self, indices):
method __iter__ (line 19) | def __iter__(self):
method __len__ (line 22) | def __len__(self):
class TestGroupedBatchSampler (line 26) | class TestGroupedBatchSampler(unittest.TestCase):
method test_respect_order_simple (line 27) | def test_respect_order_simple(self):
method test_respect_order (line 40) | def test_respect_order(self):
method test_respect_order_drop_uneven (line 59) | def test_respect_order_drop_uneven(self):
method test_subset_sampler (line 72) | def test_subset_sampler(self):
method test_permute_subset_sampler (line 85) | def test_permute_subset_sampler(self):
method test_permute_subset_sampler_drop_uneven (line 98) | def test_permute_subset_sampler_drop_uneven(self):
method test_len (line 111) | def test_len(self):
class TestIterationBasedBatchSampler (line 130) | class TestIterationBasedBatchSampler(unittest.TestCase):
method test_number_of_iters_and_elements (line 131) | def test_number_of_iters_and_elements(self):
FILE: tests/test_detectors.py
function get_config_files (line 52) | def get_config_files(file_list, exclude_folders):
function create_model (line 69) | def create_model(cfg, device):
function create_random_input (line 77) | def create_random_input(cfg, device):
function _test_build_detectors (line 86) | def _test_build_detectors(self, device):
function _test_run_selected_detectors (line 99) | def _test_run_selected_detectors(self, cfg_files, device):
class TestDetectors (line 116) | class TestDetectors(unittest.TestCase):
method test_build_detectors (line 117) | def test_build_detectors(self):
method test_build_detectors_cuda (line 122) | def test_build_detectors_cuda(self):
method test_run_selected_detectors (line 126) | def test_run_selected_detectors(self):
method test_run_selected_detectors_cuda (line 134) | def test_run_selected_detectors_cuda(self):
FILE: tests/test_fbnet.py
function _test_primitive (line 13) | def _test_primitive(self, device, op_name, op_func, N, C_in, C_out, expa...
class TestFBNetBuilder (line 23) | class TestFBNetBuilder(unittest.TestCase):
method test_identity (line 24) | def test_identity(self):
method test_primitives (line 35) | def test_primitives(self):
method test_primitives_cuda (line 47) | def test_primitives_cuda(self):
method test_primitives_empty_batch (line 58) | def test_primitives_empty_batch(self):
method test_primitives_cuda_empty_batch (line 71) | def test_primitives_cuda_empty_batch(self):
FILE: tests/test_feature_extractors.py
function _test_feature_extractors (line 25) | def _test_feature_extractors(
class TestFeatureExtractors (line 63) | class TestFeatureExtractors(unittest.TestCase):
method test_roi_box_feature_extractors (line 64) | def test_roi_box_feature_extractors(self):
method test_roi_keypoints_feature_extractors (line 73) | def test_roi_keypoints_feature_extractors(self):
method test_roi_mask_feature_extractors (line 82) | def test_roi_mask_feature_extractors(self):
FILE: tests/test_metric_logger.py
class TestMetricLogger (line 7) | class TestMetricLogger(unittest.TestCase):
method test_update (line 8) | def test_update(self):
method test_no_attr (line 19) | def test_no_attr(self):
FILE: tests/test_nms.py
class TestNMS (line 10) | class TestNMS(unittest.TestCase):
method test_nms_cpu (line 11) | def test_nms_cpu(self):
method test_nms1_cpu (line 60) | def test_nms1_cpu(self):
FILE: tests/test_predictors.py
function _test_predictors (line 23) | def _test_predictors(
class TestPredictors (line 52) | class TestPredictors(unittest.TestCase):
method test_roi_box_predictors (line 53) | def test_roi_box_predictors(self):
method test_roi_keypoints_predictors (line 70) | def test_roi_keypoints_predictors(self):
method test_roi_mask_predictors (line 83) | def test_roi_mask_predictors(self):
FILE: tests/test_rpn_heads.py
class TestRPNHeads (line 19) | class TestRPNHeads(unittest.TestCase):
method test_build_rpn_heads (line 20) | def test_build_rpn_heads(self):
FILE: tests/test_segmentation_mask.py
class TestSegmentationMask (line 7) | class TestSegmentationMask(unittest.TestCase):
method __init__ (line 8) | def __init__(self, method_name='runTest'):
method L1 (line 24) | def L1(self, A, B):
method test_convert (line 30) | def test_convert(self):
method test_crop (line 41) | def test_crop(self):
method test_resize (line 47) | def test_resize(self):
method test_transpose (line 59) | def test_transpose(self):
FILE: tests/utils.py
function get_config_root_path (line 14) | def get_config_root_path():
function load_config (line 18) | def load_config(rel_path):
function load_config_from_file (line 24) | def load_config_from_file(file_path):
FILE: tools/cityscapes/convert_cityscapes_to_coco.py
function parse_args (line 35) | def parse_args():
function poly_to_box (line 50) | def poly_to_box(poly):
function xyxy_to_xywh (line 60) | def xyxy_to_xywh(xyxy_box):
function convert_coco_stuff_mat (line 67) | def convert_coco_stuff_mat(data_dir, out_dir):
function getLabelID (line 108) | def getLabelID(self, instID):
function convert_cityscapes_instance_only (line 115) | def convert_cityscapes_instance_only(
FILE: tools/cityscapes/instances2dict_with_polygons.py
function instances2dict_with_polygons (line 19) | def instances2dict_with_polygons(imageFileList, verbose=False):
function main (line 72) | def main(argv):
FILE: tools/remove_solver_states.py
function main (line 9) | def main():
FILE: tools/test_net.py
function main (line 21) | def main():
FILE: tools/train_net.py
function train (line 30) | def train(cfg, local_rank, distributed):
function run_test (line 85) | def run_test(cfg, model, distributed):
function main (line 117) | def main():
Condensed preview — 240 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (689K chars).
[
{
"path": ".flake8",
"chars": 247,
"preview": "# This is an example .flake8 config, used when developing *Black* itself.\n# Keep in sync with setup.cfg which is used fo"
},
{
"path": ".gitignore",
"chars": 321,
"preview": "# compilation and distribution\n__pycache__\n_ext\n*.pyc\n*.so\nmaskrcnn_benchmark.egg-info/\nbuild/\ndist/\n\n# pytorch/python/n"
},
{
"path": "ABSTRACTIONS.md",
"chars": 2654,
"preview": "## Abstractions\nThe main abstractions introduced by `maskrcnn_benchmark` that are useful to\nhave in mind are the followi"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 244,
"preview": "# Code of Conduct\n\nFacebook has adopted a Code of Conduct that we expect project participants to adhere to.\nPlease read "
},
{
"path": "CONTRIBUTING.md",
"chars": 1641,
"preview": "# Contributing to Mask-RCNN Benchmark\nWe want to make contributing to this project as easy and transparent as\npossible.\n"
},
{
"path": "INSTALL.md",
"chars": 2407,
"preview": "## Installation\n\n### Requirements:\n- PyTorch >= 1.0. Installation instructions can be found in https://pytorch.org/get-s"
},
{
"path": "LICENSE",
"chars": 1327,
"preview": "FCOS for non-commercial purposes\n\nCopyright (c) 2019 the authors\nAll rights reserved.\n\nRedistribution and use in source "
},
{
"path": "MASKRCNN_README.md",
"chars": 11120,
"preview": "# Faster R-CNN and Mask R-CNN in PyTorch 1.0\n\nThis project aims at providing the necessary building blocks for easily\ncr"
},
{
"path": "MODEL_ZOO.md",
"chars": 6877,
"preview": "## Model Zoo and Baselines\n\n### Hardware\n- 8 NVIDIA V100 GPUs\n\n### Software\n- PyTorch version: 1.0.0a0+dd2c487\n- CUDA 9."
},
{
"path": "README.md",
"chars": 2843,
"preview": "# FCOS_PLUS\n\nThis project contains some improvements about FCOS (Fully Convolutional One-Stage Object Detection).\n\n\n## I"
},
{
"path": "TROUBLESHOOTING.md",
"chars": 2930,
"preview": "# Troubleshooting\n\nHere is a compilation if common issues that you might face\nwhile compiling / running this code:\n\n## C"
},
{
"path": "configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml",
"chars": 701,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-F"
},
{
"path": "configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
"chars": 166,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4"
},
{
"path": "configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml",
"chars": 699,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FP"
},
{
"path": "configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml",
"chars": 774,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-3"
},
{
"path": "configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml",
"chars": 1251,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-"
},
{
"path": "configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml",
"chars": 985,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN"
},
{
"path": "configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml",
"chars": 272,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1"
},
{
"path": "configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
"chars": 983,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_"
},
{
"path": "configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml",
"chars": 1066,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x"
},
{
"path": "configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml",
"chars": 1058,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x"
},
{
"path": "configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml",
"chars": 864,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml",
"chars": 1150,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/e2e_faster_rcnn_R_101_FPN_1x.yaml",
"chars": 819,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n BACKBONE:\n CONV_B"
},
{
"path": "configs/e2e_faster_rcnn_R_50_C4_1x.yaml",
"chars": 373,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/e2e_faster_rcnn_R_50_FPN_1x.yaml",
"chars": 817,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml",
"chars": 923,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n BACKB"
},
{
"path": "configs/e2e_faster_rcnn_fbnet.yaml",
"chars": 1041,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"default\"\n BN_TYPE:"
},
{
"path": "configs/e2e_faster_rcnn_fbnet_600.yaml",
"chars": 1044,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"default\"\n BN_TYPE:"
},
{
"path": "configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml",
"chars": 1047,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"cham_v1a\"\n BN_TYPE"
},
{
"path": "configs/e2e_keypoint_rcnn_R_50_FPN_1x.yaml",
"chars": 1220,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/e2e_mask_rcnn_R_101_FPN_1x.yaml",
"chars": 1105,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n BACKBONE:\n CONV_B"
},
{
"path": "configs/e2e_mask_rcnn_R_50_C4_1x.yaml",
"chars": 481,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml",
"chars": 1103,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml",
"chars": 1209,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n BACKB"
},
{
"path": "configs/e2e_mask_rcnn_fbnet.yaml",
"chars": 1271,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"default\"\n BN_TYPE:"
},
{
"path": "configs/e2e_mask_rcnn_fbnet_600.yaml",
"chars": 1274,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"default\"\n BN_TYPE:"
},
{
"path": "configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml",
"chars": 1279,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"xirb16d_dsmask\"\n B"
},
{
"path": "configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml",
"chars": 1281,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n BACKBONE:\n CONV_BODY: FBNet\n FBNET:\n ARCH: \"xirb16d_dsmask\"\n B"
},
{
"path": "configs/fcos/fcos_R_101_FPN_2x.yaml",
"chars": 664,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n RPN_ONLY: True\n FCO"
},
{
"path": "configs/fcos/fcos_R_50_FPN_1x.yaml",
"chars": 639,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"pretrain_models/R-50.pkl\"\n RPN_ONLY: True\n FCOS_ON: True\n BA"
},
{
"path": "configs/fcos/fcos_R_50_FPN_1x_center.yaml",
"chars": 658,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"pretrain_models/R-50.pkl\"\n RPN_ONLY: True\n FCOS_ON: True\n BA"
},
{
"path": "configs/fcos/fcos_R_50_FPN_1x_center_giou.yaml",
"chars": 684,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"pretrain_models/R-50.pkl\"\n RPN_ONLY: True\n FCOS_ON: True\n BA"
},
{
"path": "configs/fcos/fcos_X_101_32x8d_FPN_2x.yaml",
"chars": 747,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n RPN_O"
},
{
"path": "configs/fcos/fcos_X_101_64x4d_FPN_2x.yaml",
"chars": 747,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-64x4d\"\n RPN_O"
},
{
"path": "configs/fcos/fcos_bn_bs16_MNV2_FPN_1x.yaml",
"chars": 747,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download"
},
{
"path": "configs/fcos/fcos_syncbn_bs32_MNV2_FPN_1x.yaml",
"chars": 746,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download"
},
{
"path": "configs/fcos/fcos_syncbn_bs32_c128_MNV2_FPN_1x.yaml",
"chars": 746,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download"
},
{
"path": "configs/fcos/fcos_syncbn_bs32_c128_ms_MNV2_FPN_1x.yaml",
"chars": 756,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download"
},
{
"path": "configs/fcos/fcos_syncbn_bs64_c128_ms_MNV2_FPN_1x.yaml",
"chars": 756,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download"
},
{
"path": "configs/gn_baselines/README.md",
"chars": 804,
"preview": "### Group Normalization\n1 [Group Normalization](https://arxiv.org/abs/1803.08494) \n2 [Rethinking ImageNet Pre-training]"
},
{
"path": "configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml",
"chars": 1230,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml",
"chars": 1282,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml",
"chars": 1594,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml",
"chars": 1646,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml",
"chars": 1263,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml",
"chars": 1315,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml",
"chars": 1627,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml",
"chars": 1679,
"preview": "INPUT:\n MIN_SIZE_TRAIN: (800,)\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MAX_SIZE_TEST: 1333\nMODEL:\n META_ARCHITEC"
},
{
"path": "configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml",
"chars": 442,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml",
"chars": 442,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml",
"chars": 1103,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml",
"chars": 503,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml",
"chars": 956,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml",
"chars": 1040,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n BACKB"
},
{
"path": "configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml",
"chars": 1316,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml",
"chars": 611,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN:\n PRE_NMS_TOP_"
},
{
"path": "configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml",
"chars": 1242,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n BACKBONE:\n CONV_BO"
},
{
"path": "configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml",
"chars": 1326,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n BACKB"
},
{
"path": "configs/quick_schedules/rpn_R_50_C4_quick.yaml",
"chars": 478,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n RPN:"
},
{
"path": "configs/quick_schedules/rpn_R_50_FPN_quick.yaml",
"chars": 683,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n BACK"
},
{
"path": "configs/retinanet/retinanet_R-101-FPN_1x.yaml",
"chars": 1203,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n RPN_ONLY: True\n RET"
},
{
"path": "configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml",
"chars": 1221,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n RPN_ONLY: True\n RET"
},
{
"path": "configs/retinanet/retinanet_R-50-FPN_1x.yaml",
"chars": 1200,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n RETI"
},
{
"path": "configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml",
"chars": 1145,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n RETI"
},
{
"path": "configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml",
"chars": 1218,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n RETI"
},
{
"path": "configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml",
"chars": 1286,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n RPN_O"
},
{
"path": "configs/rpn_R_101_FPN_1x.yaml",
"chars": 576,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-101\"\n RPN_ONLY: True\n BAC"
},
{
"path": "configs/rpn_R_50_C4_1x.yaml",
"chars": 369,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n RPN:"
},
{
"path": "configs/rpn_R_50_FPN_1x.yaml",
"chars": 574,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/MSRA/R-50\"\n RPN_ONLY: True\n BACK"
},
{
"path": "configs/rpn_X_101_32x8d_FPN_1x.yaml",
"chars": 658,
"preview": "MODEL:\n META_ARCHITECTURE: \"GeneralizedRCNN\"\n WEIGHT: \"catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d\"\n RPN_O"
},
{
"path": "demo/README.md",
"chars": 1557,
"preview": "## Webcam and Jupyter notebook demo\n\nThis folder contains a simple webcam demo that illustrates how you can use `maskrcn"
},
{
"path": "demo/fcos_demo.py",
"chars": 4263,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport argparse\nimport cv2, os\n\nfrom maskrcnn_be"
},
{
"path": "demo/predictor.py",
"chars": 15344,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport cv2\nimport torch\nfrom torchvision import "
},
{
"path": "demo/webcam.py",
"chars": 2329,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport argparse\nimport cv2\n\nfrom maskrcnn_benchm"
},
{
"path": "docker/Dockerfile",
"chars": 1683,
"preview": "ARG CUDA=\"9.0\"\nARG CUDNN=\"7\"\n\nFROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04\n\nRUN echo 'debconf debconf/fronte"
},
{
"path": "docker/docker-jupyter/Dockerfile",
"chars": 1880,
"preview": "ARG CUDA=\"9.0\"\nARG CUDNN=\"7\"\n\nFROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-devel-ubuntu16.04\n\nRUN echo 'debconf debconf/fronte"
},
{
"path": "docker/docker-jupyter/jupyter_notebook_config.py",
"chars": 498,
"preview": "import os\nfrom IPython.lib import passwd\n\n#c = c # pylint:disable=undefined-variable\nc = get_config()\nc.NotebookApp.ip "
},
{
"path": "maskrcnn_benchmark/__init__.py",
"chars": 72,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n"
},
{
"path": "maskrcnn_benchmark/config/__init__.py",
"chars": 104,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .defaults import _C as cfg\n"
},
{
"path": "maskrcnn_benchmark/config/defaults.py",
"chars": 17184,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport os\n\nfrom yacs.config import CfgNode as CN"
},
{
"path": "maskrcnn_benchmark/config/paths_catalog.py",
"chars": 7960,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"Centralized catalog of paths.\"\"\"\n\nimport os\n\n"
},
{
"path": "maskrcnn_benchmark/csrc/ROIAlign.h",
"chars": 1654,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef W"
},
{
"path": "maskrcnn_benchmark/csrc/ROIPool.h",
"chars": 1630,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef W"
},
{
"path": "maskrcnn_benchmark/csrc/SigmoidFocalLoss.h",
"chars": 1043,
"preview": "#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#endif\n\n// Interface for Python\nat::Ten"
},
{
"path": "maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp",
"chars": 7939,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include \"cpu/vision.h\"\n\n// implementation take"
},
{
"path": "maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp",
"chars": 2461,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include \"cpu/vision.h\"\n\n\ntemplate <typename sc"
},
{
"path": "maskrcnn_benchmark/csrc/cpu/vision.h",
"chars": 594,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#pragma once\n#include <torch/extension.h>\n\n\nat:"
},
{
"path": "maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu",
"chars": 12327,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDA"
},
{
"path": "maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu",
"chars": 7855,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDA"
},
{
"path": "maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu",
"chars": 5728,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n// This file is modified from https://github.c"
},
{
"path": "maskrcnn_benchmark/csrc/cuda/nms.cu",
"chars": 4850,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDA"
},
{
"path": "maskrcnn_benchmark/csrc/cuda/vision.h",
"chars": 2628,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#pragma once\n#include <torch/extension.h>\n\n\nat:"
},
{
"path": "maskrcnn_benchmark/csrc/nms.h",
"chars": 716,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#pragma once\n#include \"cpu/vision.h\"\n\n#ifdef WI"
},
{
"path": "maskrcnn_benchmark/csrc/vision.cpp",
"chars": 723,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n#include \"nms.h\"\n#include \"ROIAlign.h\"\n#include"
},
{
"path": "maskrcnn_benchmark/data/README.md",
"chars": 2763,
"preview": "# Setting Up Datasets\nThis file describes how to perform training on other datasets.\n\nOnly Pascal VOC dataset can be loa"
},
{
"path": "maskrcnn_benchmark/data/__init__.py",
"chars": 108,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .build import make_data_loader\n"
},
{
"path": "maskrcnn_benchmark/data/build.py",
"chars": 6659,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport bisect\nimport copy\nimport logging\n\nimport"
},
{
"path": "maskrcnn_benchmark/data/collate_batch.py",
"chars": 673,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom maskrcnn_benchmark.structures.image_list im"
},
{
"path": "maskrcnn_benchmark/data/datasets/__init__.py",
"chars": 242,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .coco import COCODataset\nfrom .voc import P"
},
{
"path": "maskrcnn_benchmark/data/datasets/coco.py",
"chars": 3634,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nimport torchvision\n\nfrom maskrcnn_b"
},
{
"path": "maskrcnn_benchmark/data/datasets/concat_dataset.py",
"chars": 766,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport bisect\n\nfrom torch.utils.data.dataset imp"
},
{
"path": "maskrcnn_benchmark/data/datasets/evaluation/__init__.py",
"chars": 1001,
"preview": "from maskrcnn_benchmark.data import datasets\n\nfrom .coco import coco_evaluation\nfrom .voc import voc_evaluation\n\n\ndef ev"
},
{
"path": "maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py",
"chars": 494,
"preview": "from .coco_eval import do_coco_evaluation\n\n\ndef coco_evaluation(\n dataset,\n predictions,\n output_folder,\n bo"
},
{
"path": "maskrcnn_benchmark/data/datasets/evaluation/coco/coco_eval.py",
"chars": 14825,
"preview": "import logging\nimport tempfile\nimport os\nimport torch\nfrom collections import OrderedDict\nfrom tqdm import tqdm\n\nfrom ma"
},
{
"path": "maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py",
"chars": 505,
"preview": "import logging\n\nfrom .voc_eval import do_voc_evaluation\n\n\ndef voc_evaluation(dataset, predictions, output_folder, box_on"
},
{
"path": "maskrcnn_benchmark/data/datasets/evaluation/voc/voc_eval.py",
"chars": 8153,
"preview": "# A modification version from chainercv repository.\n# (See https://github.com/chainer/chainercv/blob/master/chainercv/ev"
},
{
"path": "maskrcnn_benchmark/data/datasets/list_dataset.py",
"chars": 943,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nSimple dataset class that wraps a list of pa"
},
{
"path": "maskrcnn_benchmark/data/datasets/voc.py",
"chars": 4121,
"preview": "import os\n\nimport torch\nimport torch.utils.data\nfrom PIL import Image\nimport sys\n\nif sys.version_info[0] == 2:\n impor"
},
{
"path": "maskrcnn_benchmark/data/samplers/__init__.py",
"chars": 328,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .distributed import DistributedSampler\nfrom"
},
{
"path": "maskrcnn_benchmark/data/samplers/distributed.py",
"chars": 2569,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n# Code is copy-pasted exactly as in torch.utils."
},
{
"path": "maskrcnn_benchmark/data/samplers/grouped_batch_sampler.py",
"chars": 4845,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport itertools\n\nimport torch\nfrom torch.utils."
},
{
"path": "maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py",
"chars": 1164,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom torch.utils.data.sampler import BatchSample"
},
{
"path": "maskrcnn_benchmark/data/transforms/__init__.py",
"chars": 284,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .transforms import Compose\nfrom .transforms"
},
{
"path": "maskrcnn_benchmark/data/transforms/build.py",
"chars": 1184,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom . import transforms as T\n\n\ndef build_transf"
},
{
"path": "maskrcnn_benchmark/data/transforms/transforms.py",
"chars": 2589,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport random\n\nimport torch\nimport torchvision\nf"
},
{
"path": "maskrcnn_benchmark/engine/__init__.py",
"chars": 72,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n"
},
{
"path": "maskrcnn_benchmark/engine/inference.py",
"chars": 3813,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport logging\nimport time\nimport os\n\nimport tor"
},
{
"path": "maskrcnn_benchmark/engine/trainer.py",
"chars": 4060,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport datetime\nimport logging\nimport time\n\nimpo"
},
{
"path": "maskrcnn_benchmark/layers/__init__.py",
"chars": 781,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\nfrom .batch_norm import FrozenBatc"
},
{
"path": "maskrcnn_benchmark/layers/_utils.py",
"chars": 1165,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport glob\nimport os.path\n\nimport torch\n\ntry:\n "
},
{
"path": "maskrcnn_benchmark/layers/batch_norm.py",
"chars": 799,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\n\n\nclass Frozen"
},
{
"path": "maskrcnn_benchmark/layers/iou_loss.py",
"chars": 1862,
"preview": "import torch\nfrom torch import nn\n\n\nclass IOULoss(nn.Module):\n def __init__(self, loc_loss_type):\n super(IOULo"
},
{
"path": "maskrcnn_benchmark/layers/misc.py",
"chars": 3504,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nhelper class that supports empty tensors on "
},
{
"path": "maskrcnn_benchmark/layers/nms.py",
"chars": 216,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n# from ._utils import _C\nfrom maskrcnn_benchmark"
},
{
"path": "maskrcnn_benchmark/layers/roi_align.py",
"chars": 2110,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\nfrom torch.aut"
},
{
"path": "maskrcnn_benchmark/layers/roi_pool.py",
"chars": 1855,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\nfrom torch.aut"
},
{
"path": "maskrcnn_benchmark/layers/scale.py",
"chars": 270,
"preview": "import torch\nfrom torch import nn\n\n\nclass Scale(nn.Module):\n def __init__(self, init_value=1.0):\n super(Scale,"
},
{
"path": "maskrcnn_benchmark/layers/sigmoid_focal_loss.py",
"chars": 2342,
"preview": "import torch\nfrom torch import nn\nfrom torch.autograd import Function\nfrom torch.autograd.function import once_different"
},
{
"path": "maskrcnn_benchmark/layers/smooth_l1_loss.py",
"chars": 481,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\n\n# TODO maybe push this to nn?\ndef"
},
{
"path": "maskrcnn_benchmark/modeling/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/backbone/__init__.py",
"chars": 129,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .backbone import build_backbone\nfrom . impo"
},
{
"path": "maskrcnn_benchmark/modeling/backbone/backbone.py",
"chars": 3564,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom collections import OrderedDict\n\nfrom torch "
},
{
"path": "maskrcnn_benchmark/modeling/backbone/fbnet.py",
"chars": 7845,
"preview": "from __future__ import absolute_import, division, print_function, unicode_literals\n\nimport copy\nimport json\nimport loggi"
},
{
"path": "maskrcnn_benchmark/modeling/backbone/fbnet_builder.py",
"chars": 24964,
"preview": "\"\"\"\nFBNet model builder\n\"\"\"\n\nfrom __future__ import absolute_import, division, print_function, unicode_literals\n\nimport "
},
{
"path": "maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py",
"chars": 5985,
"preview": "from __future__ import absolute_import, division, print_function, unicode_literals\n\n\ndef add_archs(archs):\n global MO"
},
{
"path": "maskrcnn_benchmark/modeling/backbone/fpn.py",
"chars": 3939,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nimport torch.nn.functional as F\nfro"
},
{
"path": "maskrcnn_benchmark/modeling/backbone/mobilenet.py",
"chars": 4624,
"preview": "# taken from https://github.com/tonylins/pytorch-mobilenet-v2/\n# Published by Ji Lin, tonylins\n# licensed under the Apa"
},
{
"path": "maskrcnn_benchmark/modeling/backbone/resnet.py",
"chars": 12814,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nVariant of the resnet module that takes cfg "
},
{
"path": "maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py",
"chars": 2718,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\n\nclass BalancedPositiveNegativeSam"
},
{
"path": "maskrcnn_benchmark/modeling/box_coder.py",
"chars": 3367,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport math\n\nimport torch\n\n\nclass BoxCoder(objec"
},
{
"path": "maskrcnn_benchmark/modeling/detector/__init__.py",
"chars": 117,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .detectors import build_detection_model\n"
},
{
"path": "maskrcnn_benchmark/modeling/detector/detectors.py",
"chars": 324,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .generalized_rcnn import GeneralizedRCNN\n\n\n"
},
{
"path": "maskrcnn_benchmark/modeling/detector/generalized_rcnn.py",
"chars": 2231,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nImplements the Generalized R-CNN framework\n\""
},
{
"path": "maskrcnn_benchmark/modeling/make_layers.py",
"chars": 3576,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nMiscellaneous utility functions\n\"\"\"\n\nimport "
},
{
"path": "maskrcnn_benchmark/modeling/matcher.py",
"chars": 5129,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\n\nclass Matcher(object):\n \"\"\"\n "
},
{
"path": "maskrcnn_benchmark/modeling/poolers.py",
"chars": 4551,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nimport torch.nn.functional as F\nfro"
},
{
"path": "maskrcnn_benchmark/modeling/registry.py",
"chars": 400,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\nfrom maskrcnn_benchmark.utils.registry import R"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py",
"chars": 2765,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\n\nfrom .roi_box"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py",
"chars": 6445,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nimport torch.nn.functional as F\nfro"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py",
"chars": 7066,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch.nn import functional as "
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_feature_extractors.py",
"chars": 5404,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\nfrom torch.nn "
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py",
"chars": 2295,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom maskrcnn_benchmark.modeling import registry"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/inference.py",
"chars": 4468,
"preview": "import torch\nfrom torch import nn\n\n\nclass KeypointPostProcessor(nn.Module):\n def __init__(self, keypointer=None):\n "
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py",
"chars": 2057,
"preview": "import torch\n\nfrom .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor\nfrom .roi_keypoint_predic"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/loss.py",
"chars": 7104,
"preview": "import torch\nfrom torch.nn import functional as F\n\nfrom maskrcnn_benchmark.modeling.matcher import Matcher\n\nfrom maskrcn"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py",
"chars": 1892,
"preview": "from torch import nn\nfrom torch.nn import functional as F\n\nfrom maskrcnn_benchmark.modeling import registry\nfrom maskrcn"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py",
"chars": 1273,
"preview": "from torch import nn\n\nfrom maskrcnn_benchmark import layers\nfrom maskrcnn_benchmark.modeling import registry\n\n\n@registry"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py",
"chars": 6563,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport numpy as np\nimport torch\nfrom torch impor"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/loss.py",
"chars": 5367,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch.nn import functional as "
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py",
"chars": 3126,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nfrom torch import nn\n\nfrom maskrcnn"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py",
"chars": 2502,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom torch import nn\nfrom torch.nn import functi"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py",
"chars": 2229,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom torch import nn\nfrom torch.nn import functi"
},
{
"path": "maskrcnn_benchmark/modeling/roi_heads/roi_heads.py",
"chars": 3269,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\nfrom .box_head.box_head import bui"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/__init__.py",
"chars": 101,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n# from .rpn import build_rpn\n"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/anchor_generator.py",
"chars": 9948,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport math\n\nimport numpy as np\nimport torch\nfro"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/fcos/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/rpn/fcos/fcos.py",
"chars": 7652,
"preview": "import math\nimport torch\nimport torch.nn.functional as F\nfrom torch import nn\n\nfrom .inference import make_fcos_postproc"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/fcos/inference.py",
"chars": 7815,
"preview": "import torch\n\nfrom ..inference import RPNPostProcessor\nfrom ..utils import permute_and_flatten\n\nfrom maskrcnn_benchmark."
},
{
"path": "maskrcnn_benchmark/modeling/rpn/fcos/loss.py",
"chars": 9840,
"preview": "\"\"\"\nThis file contains specific functions for computing losses of FCOS\nfile\n\"\"\"\n\nimport torch\nfrom torch.nn import funct"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/inference.py",
"chars": 7466,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\nfrom maskrcnn_benchmark.modeling.b"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/loss.py",
"chars": 5768,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nThis file contains specific functions for co"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/modeling/rpn/retinanet/inference.py",
"chars": 6923,
"preview": "import torch\n\nfrom ..inference import RPNPostProcessor\nfrom ..utils import permute_and_flatten\n\nfrom maskrcnn_benchmark."
},
{
"path": "maskrcnn_benchmark/modeling/rpn/retinanet/loss.py",
"chars": 3484,
"preview": "\"\"\"\nThis file contains specific functions for computing losses on the RetinaNet\nfile\n\"\"\"\n\nimport torch\nfrom torch.nn imp"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/retinanet/retinanet.py",
"chars": 5301,
"preview": "import math\nimport torch\nimport torch.nn.functional as F\nfrom torch import nn\n\nfrom .inference import make_retinanet_po"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/rpn.py",
"chars": 7742,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\nimport torch.nn.functional as F\nfro"
},
{
"path": "maskrcnn_benchmark/modeling/rpn/utils.py",
"chars": 1679,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nUtility functions minipulating the predictio"
},
{
"path": "maskrcnn_benchmark/modeling/utils.py",
"chars": 400,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\n\"\"\"\nMiscellaneous utility functions\n\"\"\"\n\nimport "
},
{
"path": "maskrcnn_benchmark/solver/__init__.py",
"chars": 187,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom .build import make_optimizer\nfrom .build im"
},
{
"path": "maskrcnn_benchmark/solver/build.py",
"chars": 976,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\nfrom .lr_scheduler import WarmupMu"
},
{
"path": "maskrcnn_benchmark/solver/lr_scheduler.py",
"chars": 1817,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom bisect import bisect_right\n\nimport torch\n\n\n"
},
{
"path": "maskrcnn_benchmark/structures/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "maskrcnn_benchmark/structures/bounding_box.py",
"chars": 9646,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\n# transpose\nFLIP_LEFT_RIGHT = 0\nFL"
},
{
"path": "maskrcnn_benchmark/structures/boxlist_ops.py",
"chars": 3637,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nimport torch\n\nfrom .bounding_box import BoxList\n"
},
{
"path": "maskrcnn_benchmark/structures/image_list.py",
"chars": 2485,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.\nfrom __future__ import division\n\nimport torch\n\n\n"
}
]
// ... and 40 more files (download for full content)
About this extraction
This page contains the full source code of the yqyao/FCOS_PLUS GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 240 files (632.0 KB), approximately 182.0k tokens, and a symbol index with 736 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.